aboutsummaryrefslogtreecommitdiff
path: root/src/share/vm/opto
diff options
context:
space:
mode:
authorduke <none@none>2007-12-01 00:00:00 +0000
committerduke <none@none>2007-12-01 00:00:00 +0000
commitfa6b5a8027b86d2f8a200e72b4ef6a0d3f9189d3 (patch)
tree8376f6e5c41e70162b5867d9e1fea3f17f540473 /src/share/vm/opto
Initial loadjdk7-b24
Diffstat (limited to 'src/share/vm/opto')
-rw-r--r--src/share/vm/opto/addnode.cpp871
-rw-r--r--src/share/vm/opto/addnode.hpp239
-rw-r--r--src/share/vm/opto/adlcVMDeps.hpp44
-rw-r--r--src/share/vm/opto/block.cpp952
-rw-r--r--src/share/vm/opto/block.hpp510
-rw-r--r--src/share/vm/opto/buildOopMap.cpp623
-rw-r--r--src/share/vm/opto/bytecodeInfo.cpp490
-rw-r--r--src/share/vm/opto/c2_globals.cpp28
-rw-r--r--src/share/vm/opto/c2_globals.hpp382
-rw-r--r--src/share/vm/opto/c2compiler.cpp129
-rw-r--r--src/share/vm/opto/c2compiler.hpp56
-rw-r--r--src/share/vm/opto/callGenerator.cpp744
-rw-r--r--src/share/vm/opto/callGenerator.hpp266
-rw-r--r--src/share/vm/opto/callnode.cpp1311
-rw-r--r--src/share/vm/opto/callnode.hpp814
-rw-r--r--src/share/vm/opto/cfgnode.cpp1954
-rw-r--r--src/share/vm/opto/cfgnode.hpp481
-rw-r--r--src/share/vm/opto/chaitin.cpp2042
-rw-r--r--src/share/vm/opto/chaitin.hpp501
-rw-r--r--src/share/vm/opto/classes.cpp34
-rw-r--r--src/share/vm/opto/classes.hpp308
-rw-r--r--src/share/vm/opto/coalesce.cpp915
-rw-r--r--src/share/vm/opto/coalesce.hpp109
-rw-r--r--src/share/vm/opto/compile.cpp2384
-rw-r--r--src/share/vm/opto/compile.hpp720
-rw-r--r--src/share/vm/opto/connode.cpp1227
-rw-r--r--src/share/vm/opto/connode.hpp578
-rw-r--r--src/share/vm/opto/divnode.cpp1031
-rw-r--r--src/share/vm/opto/divnode.hpp177
-rw-r--r--src/share/vm/opto/doCall.cpp862
-rw-r--r--src/share/vm/opto/domgraph.cpp664
-rw-r--r--src/share/vm/opto/escape.cpp1346
-rw-r--r--src/share/vm/opto/escape.hpp319
-rw-r--r--src/share/vm/opto/gcm.cpp1767
-rw-r--r--src/share/vm/opto/generateOptoStub.cpp291
-rw-r--r--src/share/vm/opto/graphKit.cpp3146
-rw-r--r--src/share/vm/opto/graphKit.hpp720
-rw-r--r--src/share/vm/opto/idealGraphPrinter.cpp1919
-rw-r--r--src/share/vm/opto/idealGraphPrinter.hpp323
-rw-r--r--src/share/vm/opto/idealKit.cpp503
-rw-r--r--src/share/vm/opto/idealKit.hpp230
-rw-r--r--src/share/vm/opto/ifg.cpp813
-rw-r--r--src/share/vm/opto/ifnode.cpp922
-rw-r--r--src/share/vm/opto/indexSet.cpp573
-rw-r--r--src/share/vm/opto/indexSet.hpp461
-rw-r--r--src/share/vm/opto/lcm.cpp934
-rw-r--r--src/share/vm/opto/library_call.cpp4921
-rw-r--r--src/share/vm/opto/live.cpp314
-rw-r--r--src/share/vm/opto/live.hpp74
-rw-r--r--src/share/vm/opto/locknode.cpp122
-rw-r--r--src/share/vm/opto/locknode.hpp97
-rw-r--r--src/share/vm/opto/loopTransform.cpp1729
-rw-r--r--src/share/vm/opto/loopUnswitch.cpp237
-rw-r--r--src/share/vm/opto/loopnode.cpp2886
-rw-r--r--src/share/vm/opto/loopnode.hpp919
-rw-r--r--src/share/vm/opto/loopopts.cpp2677
-rw-r--r--src/share/vm/opto/machnode.cpp707
-rw-r--r--src/share/vm/opto/machnode.hpp826
-rw-r--r--src/share/vm/opto/macro.cpp995
-rw-r--r--src/share/vm/opto/macro.hpp107
-rw-r--r--src/share/vm/opto/matcher.cpp2123
-rw-r--r--src/share/vm/opto/matcher.hpp392
-rw-r--r--src/share/vm/opto/memnode.cpp3222
-rw-r--r--src/share/vm/opto/memnode.hpp1062
-rw-r--r--src/share/vm/opto/mulnode.cpp1310
-rw-r--r--src/share/vm/opto/mulnode.hpp247
-rw-r--r--src/share/vm/opto/multnode.cpp129
-rw-r--r--src/share/vm/opto/multnode.hpp81
-rw-r--r--src/share/vm/opto/node.cpp1919
-rw-r--r--src/share/vm/opto/node.hpp1492
-rw-r--r--src/share/vm/opto/opcodes.cpp42
-rw-r--r--src/share/vm/opto/opcodes.hpp43
-rw-r--r--src/share/vm/opto/optoreg.hpp194
-rw-r--r--src/share/vm/opto/output.cpp2680
-rw-r--r--src/share/vm/opto/output.hpp215
-rw-r--r--src/share/vm/opto/parse.hpp555
-rw-r--r--src/share/vm/opto/parse1.cpp2166
-rw-r--r--src/share/vm/opto/parse2.cpp2171
-rw-r--r--src/share/vm/opto/parse3.cpp463
-rw-r--r--src/share/vm/opto/parseHelper.cpp520
-rw-r--r--src/share/vm/opto/phase.cpp164
-rw-r--r--src/share/vm/opto/phase.hpp113
-rw-r--r--src/share/vm/opto/phaseX.cpp1758
-rw-r--r--src/share/vm/opto/phaseX.hpp516
-rw-r--r--src/share/vm/opto/postaloc.cpp584
-rw-r--r--src/share/vm/opto/reg_split.cpp1300
-rw-r--r--src/share/vm/opto/regalloc.cpp127
-rw-r--r--src/share/vm/opto/regalloc.hpp133
-rw-r--r--src/share/vm/opto/regmask.cpp288
-rw-r--r--src/share/vm/opto/regmask.hpp264
-rw-r--r--src/share/vm/opto/rootnode.cpp81
-rw-r--r--src/share/vm/opto/rootnode.hpp62
-rw-r--r--src/share/vm/opto/runtime.cpp1177
-rw-r--r--src/share/vm/opto/runtime.hpp289
-rw-r--r--src/share/vm/opto/split_if.cpp536
-rw-r--r--src/share/vm/opto/subnode.cpp1206
-rw-r--r--src/share/vm/opto/subnode.hpp501
-rw-r--r--src/share/vm/opto/superword.cpp2025
-rw-r--r--src/share/vm/opto/superword.hpp506
-rw-r--r--src/share/vm/opto/type.cpp3751
-rw-r--r--src/share/vm/opto/type.hpp1124
-rw-r--r--src/share/vm/opto/vectornode.cpp478
-rw-r--r--src/share/vm/opto/vectornode.hpp1134
103 files changed, 91467 insertions, 0 deletions
diff --git a/src/share/vm/opto/addnode.cpp b/src/share/vm/opto/addnode.cpp
new file mode 100644
index 000000000..42a17c997
--- /dev/null
+++ b/src/share/vm/opto/addnode.cpp
@@ -0,0 +1,871 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+#include "incls/_precompiled.incl"
+#include "incls/_addnode.cpp.incl"
+
+#define MAXFLOAT ((float)3.40282346638528860e+38)
+
+// Classic Add functionality. This covers all the usual 'add' behaviors for
+// an algebraic ring. Add-integer, add-float, add-double, and binary-or are
+// all inherited from this class. The various identity values are supplied
+// by virtual functions.
+
+
+//=============================================================================
+//------------------------------hash-------------------------------------------
+// Hash function over AddNodes. Needs to be commutative; i.e., I swap
+// (commute) inputs to AddNodes willy-nilly so the hash function must return
+// the same value in the presence of edge swapping.
+uint AddNode::hash() const {
+ return (uintptr_t)in(1) + (uintptr_t)in(2) + Opcode();
+}
+
+//------------------------------Identity---------------------------------------
+// If either input is a constant 0, return the other input.
+Node *AddNode::Identity( PhaseTransform *phase ) {
+ const Type *zero = add_id(); // The additive identity
+ if( phase->type( in(1) )->higher_equal( zero ) ) return in(2);
+ if( phase->type( in(2) )->higher_equal( zero ) ) return in(1);
+ return this;
+}
+
+//------------------------------commute----------------------------------------
+// Commute operands to move loads and constants to the right.
+static bool commute( Node *add, int con_left, int con_right ) {
+ Node *in1 = add->in(1);
+ Node *in2 = add->in(2);
+
+ // Convert "1+x" into "x+1".
+ // Right is a constant; leave it
+ if( con_right ) return false;
+ // Left is a constant; move it right.
+ if( con_left ) {
+ add->swap_edges(1, 2);
+ return true;
+ }
+
+ // Convert "Load+x" into "x+Load".
+ // Now check for loads
+ if( in2->is_Load() ) return false;
+ // Left is a Load and Right is not; move it right.
+ if( in1->is_Load() ) {
+ add->swap_edges(1, 2);
+ return true;
+ }
+
+ PhiNode *phi;
+ // Check for tight loop increments: Loop-phi of Add of loop-phi
+ if( in1->is_Phi() && (phi = in1->as_Phi()) && !phi->is_copy() && phi->region()->is_Loop() && phi->in(2)==add)
+ return false;
+ if( in2->is_Phi() && (phi = in2->as_Phi()) && !phi->is_copy() && phi->region()->is_Loop() && phi->in(2)==add){
+ add->swap_edges(1, 2);
+ return true;
+ }
+
+ // Otherwise, sort inputs (commutativity) to help value numbering.
+ if( in1->_idx > in2->_idx ) {
+ add->swap_edges(1, 2);
+ return true;
+ }
+ return false;
+}
+
+//------------------------------Idealize---------------------------------------
+// If we get here, we assume we are associative!
+Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ int con_left = t1->singleton();
+ int con_right = t2->singleton();
+
+ // Check for commutative operation desired
+ if( commute(this,con_left,con_right) ) return this;
+
+ AddNode *progress = NULL; // Progress flag
+
+ // Convert "(x+1)+2" into "x+(1+2)". If the right input is a
+ // constant, and the left input is an add of a constant, flatten the
+ // expression tree.
+ Node *add1 = in(1);
+ Node *add2 = in(2);
+ int add1_op = add1->Opcode();
+ int this_op = Opcode();
+ if( con_right && t2 != Type::TOP && // Right input is a constant?
+ add1_op == this_op ) { // Left input is an Add?
+
+ // Type of left _in right input
+ const Type *t12 = phase->type( add1->in(2) );
+ if( t12->singleton() && t12 != Type::TOP ) { // Left input is an add of a constant?
+ // Check for rare case of closed data cycle which can happen inside
+ // unreachable loops. In these cases the computation is undefined.
+#ifdef ASSERT
+ Node *add11 = add1->in(1);
+ int add11_op = add11->Opcode();
+ if( (add1 == add1->in(1))
+ || (add11_op == this_op && add11->in(1) == add1) ) {
+ assert(false, "dead loop in AddNode::Ideal");
+ }
+#endif
+ // The Add of the flattened expression
+ Node *x1 = add1->in(1);
+ Node *x2 = phase->makecon( add1->as_Add()->add_ring( t2, t12 ));
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ if( igvn ) {
+ set_req_X(2,x2,igvn);
+ set_req_X(1,x1,igvn);
+ } else {
+ set_req(2,x2);
+ set_req(1,x1);
+ }
+ progress = this; // Made progress
+ add1 = in(1);
+ add1_op = add1->Opcode();
+ }
+ }
+
+ // Convert "(x+1)+y" into "(x+y)+1". Push constants down the expression tree.
+ if( add1_op == this_op && !con_right ) {
+ Node *a12 = add1->in(2);
+ const Type *t12 = phase->type( a12 );
+ if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) ) {
+ add2 = add1->clone();
+ add2->set_req(2, in(2));
+ add2 = phase->transform(add2);
+ set_req(1, add2);
+ set_req(2, a12);
+ progress = this;
+ add2 = a12;
+ }
+ }
+
+ // Convert "x+(y+1)" into "(x+y)+1". Push constants down the expression tree.
+ int add2_op = add2->Opcode();
+ if( add2_op == this_op && !con_left ) {
+ Node *a22 = add2->in(2);
+ const Type *t22 = phase->type( a22 );
+ if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) ) {
+ Node *addx = add2->clone();
+ addx->set_req(1, in(1));
+ addx->set_req(2, add2->in(1));
+ addx = phase->transform(addx);
+ set_req(1, addx);
+ set_req(2, a22);
+ progress = this;
+ }
+ }
+
+ return progress;
+}
+
+//------------------------------Value-----------------------------------------
+// An add node sums it's two _in. If one input is an RSD, we must mixin
+// the other input's symbols.
+const Type *AddNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ // Check for an addition involving the additive identity
+ const Type *tadd = add_of_identity( t1, t2 );
+ if( tadd ) return tadd;
+
+ return add_ring(t1,t2); // Local flavor of type addition
+}
+
+//------------------------------add_identity-----------------------------------
+// Check for addition of the identity
+const Type *AddNode::add_of_identity( const Type *t1, const Type *t2 ) const {
+ const Type *zero = add_id(); // The additive identity
+ if( t1->higher_equal( zero ) ) return t2;
+ if( t2->higher_equal( zero ) ) return t1;
+
+ return NULL;
+}
+
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ int op1 = in(1)->Opcode();
+ int op2 = in(2)->Opcode();
+ // Fold (con1-x)+con2 into (con1+con2)-x
+ if( op1 == Op_SubI ) {
+ const Type *t_sub1 = phase->type( in(1)->in(1) );
+ const Type *t_2 = phase->type( in(2) );
+ if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
+ return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ),
+ in(1)->in(2) );
+ // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
+ if( op2 == Op_SubI ) {
+ // Check for dead cycle: d = (a-b)+(c-d)
+ assert( in(1)->in(2) != this && in(2)->in(2) != this,
+ "dead loop in AddINode::Ideal" );
+ Node *sub = new (phase->C, 3) SubINode(NULL, NULL);
+ sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in(1)->in(1), in(2)->in(1) ) ));
+ sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in(1)->in(2), in(2)->in(2) ) ));
+ return sub;
+ }
+ }
+
+ // Convert "x+(0-y)" into "(x-y)"
+ if( op2 == Op_SubI && phase->type(in(2)->in(1)) == TypeInt::ZERO )
+ return new (phase->C, 3) SubINode(in(1), in(2)->in(2) );
+
+ // Convert "(0-y)+x" into "(x-y)"
+ if( op1 == Op_SubI && phase->type(in(1)->in(1)) == TypeInt::ZERO )
+ return new (phase->C, 3) SubINode( in(2), in(1)->in(2) );
+
+ // Convert (x>>>z)+y into (x+(y<<z))>>>z for small constant z and y.
+ // Helps with array allocation math constant folding
+ // See 4790063:
+ // Unrestricted transformation is unsafe for some runtime values of 'x'
+ // ( x == 0, z == 1, y == -1 ) fails
+ // ( x == -5, z == 1, y == 1 ) fails
+ // Transform works for small z and small negative y when the addition
+ // (x + (y << z)) does not cross zero.
+ // Implement support for negative y and (x >= -(y << z))
+ // Have not observed cases where type information exists to support
+ // positive y and (x <= -(y << z))
+ if( op1 == Op_URShiftI && op2 == Op_ConI &&
+ in(1)->in(2)->Opcode() == Op_ConI ) {
+ jint z = phase->type( in(1)->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
+ jint y = phase->type( in(2) )->is_int()->get_con();
+
+ if( z < 5 && -5 < y && y < 0 ) {
+ const Type *t_in11 = phase->type(in(1)->in(1));
+ if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) {
+ Node *a = phase->transform( new (phase->C, 3) AddINode( in(1)->in(1), phase->intcon(y<<z) ) );
+ return new (phase->C, 3) URShiftINode( a, in(1)->in(2) );
+ }
+ }
+ }
+
+ return AddNode::Ideal(phase, can_reshape);
+}
+
+
+//------------------------------Identity---------------------------------------
+// Fold (x-y)+y OR y+(x-y) into x
+Node *AddINode::Identity( PhaseTransform *phase ) {
+ if( in(1)->Opcode() == Op_SubI && phase->eqv(in(1)->in(2),in(2)) ) {
+ return in(1)->in(1);
+ }
+ else if( in(2)->Opcode() == Op_SubI && phase->eqv(in(2)->in(2),in(1)) ) {
+ return in(2)->in(1);
+ }
+ return AddNode::Identity(phase);
+}
+
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs. Guaranteed never
+// to be passed a TOP or BOTTOM type, these are filtered out by
+// pre-check.
+const Type *AddINode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeInt *r0 = t0->is_int(); // Handy access
+ const TypeInt *r1 = t1->is_int();
+ int lo = r0->_lo + r1->_lo;
+ int hi = r0->_hi + r1->_hi;
+ if( !(r0->is_con() && r1->is_con()) ) {
+ // Not both constants, compute approximate result
+ if( (r0->_lo & r1->_lo) < 0 && lo >= 0 ) {
+ lo = min_jint; hi = max_jint; // Underflow on the low side
+ }
+ if( (~(r0->_hi | r1->_hi)) < 0 && hi < 0 ) {
+ lo = min_jint; hi = max_jint; // Overflow on the high side
+ }
+ if( lo > hi ) { // Handle overflow
+ lo = min_jint; hi = max_jint;
+ }
+ } else {
+ // both constants, compute precise result using 'lo' and 'hi'
+ // Semantics define overflow and underflow for integer addition
+ // as expected. In particular: 0x80000000 + 0x80000000 --> 0x0
+ }
+ return TypeInt::make( lo, hi, MAX2(r0->_widen,r1->_widen) );
+}
+
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ int op1 = in(1)->Opcode();
+ int op2 = in(2)->Opcode();
+ // Fold (con1-x)+con2 into (con1+con2)-x
+ if( op1 == Op_SubL ) {
+ const Type *t_sub1 = phase->type( in(1)->in(1) );
+ const Type *t_2 = phase->type( in(2) );
+ if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
+ return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ),
+ in(1)->in(2) );
+ // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
+ if( op2 == Op_SubL ) {
+ // Check for dead cycle: d = (a-b)+(c-d)
+ assert( in(1)->in(2) != this && in(2)->in(2) != this,
+ "dead loop in AddLNode::Ideal" );
+ Node *sub = new (phase->C, 3) SubLNode(NULL, NULL);
+ sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(1), in(2)->in(1) ) ));
+ sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(2), in(2)->in(2) ) ));
+ return sub;
+ }
+ }
+
+ // Convert "x+(0-y)" into "(x-y)"
+ if( op2 == Op_SubL && phase->type(in(2)->in(1)) == TypeLong::ZERO )
+ return new (phase->C, 3) SubLNode(in(1), in(2)->in(2) );
+
+ // Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)"
+ // into "(X<<1)+Y" and let shift-folding happen.
+ if( op2 == Op_AddL &&
+ in(2)->in(1) == in(1) &&
+ op1 != Op_ConL &&
+ 0 ) {
+ Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in(1),phase->intcon(1)));
+ return new (phase->C, 3) AddLNode(shift,in(2)->in(2));
+ }
+
+ return AddNode::Ideal(phase, can_reshape);
+}
+
+
+//------------------------------Identity---------------------------------------
+// Fold (x-y)+y OR y+(x-y) into x
+Node *AddLNode::Identity( PhaseTransform *phase ) {
+ if( in(1)->Opcode() == Op_SubL && phase->eqv(in(1)->in(2),in(2)) ) {
+ return in(1)->in(1);
+ }
+ else if( in(2)->Opcode() == Op_SubL && phase->eqv(in(2)->in(2),in(1)) ) {
+ return in(2)->in(1);
+ }
+ return AddNode::Identity(phase);
+}
+
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs. Guaranteed never
+// to be passed a TOP or BOTTOM type, these are filtered out by
+// pre-check.
+const Type *AddLNode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeLong *r0 = t0->is_long(); // Handy access
+ const TypeLong *r1 = t1->is_long();
+ jlong lo = r0->_lo + r1->_lo;
+ jlong hi = r0->_hi + r1->_hi;
+ if( !(r0->is_con() && r1->is_con()) ) {
+ // Not both constants, compute approximate result
+ if( (r0->_lo & r1->_lo) < 0 && lo >= 0 ) {
+ lo =min_jlong; hi = max_jlong; // Underflow on the low side
+ }
+ if( (~(r0->_hi | r1->_hi)) < 0 && hi < 0 ) {
+ lo = min_jlong; hi = max_jlong; // Overflow on the high side
+ }
+ if( lo > hi ) { // Handle overflow
+ lo = min_jlong; hi = max_jlong;
+ }
+ } else {
+ // both constants, compute precise result using 'lo' and 'hi'
+ // Semantics define overflow and underflow for integer addition
+ // as expected. In particular: 0x80000000 + 0x80000000 --> 0x0
+ }
+ return TypeLong::make( lo, hi, MAX2(r0->_widen,r1->_widen) );
+}
+
+
+//=============================================================================
+//------------------------------add_of_identity--------------------------------
+// Check for addition of the identity
+const Type *AddFNode::add_of_identity( const Type *t1, const Type *t2 ) const {
+ // x ADD 0 should return x unless 'x' is a -zero
+ //
+ // const Type *zero = add_id(); // The additive identity
+ // jfloat f1 = t1->getf();
+ // jfloat f2 = t2->getf();
+ //
+ // if( t1->higher_equal( zero ) ) return t2;
+ // if( t2->higher_equal( zero ) ) return t1;
+
+ return NULL;
+}
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+// This also type-checks the inputs for sanity. Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AddFNode::add_ring( const Type *t0, const Type *t1 ) const {
+ // We must be adding 2 float constants.
+ return TypeF::make( t0->getf() + t1->getf() );
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AddFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+ return AddNode::Ideal(phase, can_reshape); // commutative and associative transforms
+ }
+
+ // Floating point additions are not associative because of boundary conditions (infinity)
+ return commute(this,
+ phase->type( in(1) )->singleton(),
+ phase->type( in(2) )->singleton() ) ? this : NULL;
+}
+
+
+//=============================================================================
+//------------------------------add_of_identity--------------------------------
+// Check for addition of the identity
+const Type *AddDNode::add_of_identity( const Type *t1, const Type *t2 ) const {
+ // x ADD 0 should return x unless 'x' is a -zero
+ //
+ // const Type *zero = add_id(); // The additive identity
+ // jfloat f1 = t1->getf();
+ // jfloat f2 = t2->getf();
+ //
+ // if( t1->higher_equal( zero ) ) return t2;
+ // if( t2->higher_equal( zero ) ) return t1;
+
+ return NULL;
+}
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+// This also type-checks the inputs for sanity. Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AddDNode::add_ring( const Type *t0, const Type *t1 ) const {
+ // We must be adding 2 double constants.
+ return TypeD::make( t0->getd() + t1->getd() );
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AddDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+ return AddNode::Ideal(phase, can_reshape); // commutative and associative transforms
+ }
+
+ // Floating point additions are not associative because of boundary conditions (infinity)
+ return commute(this,
+ phase->type( in(1) )->singleton(),
+ phase->type( in(2) )->singleton() ) ? this : NULL;
+}
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If one input is a constant 0, return the other input.
+Node *AddPNode::Identity( PhaseTransform *phase ) {
+ return ( phase->type( in(Offset) )->higher_equal( TypeX_ZERO ) ) ? in(Address) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+Node *AddPNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Bail out if dead inputs
+ if( phase->type( in(Address) ) == Type::TOP ) return NULL;
+
+ // If the left input is an add of a constant, flatten the expression tree.
+ const Node *n = in(Address);
+ if (n->is_AddP() && n->in(Base) == in(Base)) {
+ const AddPNode *addp = n->as_AddP(); // Left input is an AddP
+ assert( !addp->in(Address)->is_AddP() ||
+ addp->in(Address)->as_AddP() != addp,
+ "dead loop in AddPNode::Ideal" );
+ // Type of left input's right input
+ const Type *t = phase->type( addp->in(Offset) );
+ if( t == Type::TOP ) return NULL;
+ const TypeX *t12 = t->is_intptr_t();
+ if( t12->is_con() ) { // Left input is an add of a constant?
+ // If the right input is a constant, combine constants
+ const Type *temp_t2 = phase->type( in(Offset) );
+ if( temp_t2 == Type::TOP ) return NULL;
+ const TypeX *t2 = temp_t2->is_intptr_t();
+ if( t2->is_con() ) {
+ // The Add of the flattened expression
+ set_req(Address, addp->in(Address));
+ set_req(Offset , phase->MakeConX(t2->get_con() + t12->get_con()));
+ return this; // Made progress
+ }
+ // Else move the constant to the right. ((A+con)+B) into ((A+B)+con)
+ set_req(Address, phase->transform(new (phase->C, 4) AddPNode(in(Base),addp->in(Address),in(Offset))));
+ set_req(Offset , addp->in(Offset));
+ return this;
+ }
+ }
+
+ // Raw pointers?
+ if( in(Base)->bottom_type() == Type::TOP ) {
+ // If this is a NULL+long form (from unsafe accesses), switch to a rawptr.
+ if (phase->type(in(Address)) == TypePtr::NULL_PTR) {
+ Node* offset = in(Offset);
+ return new (phase->C, 2) CastX2PNode(offset);
+ }
+ }
+
+ // If the right is an add of a constant, push the offset down.
+ // Convert: (ptr + (offset+con)) into (ptr+offset)+con.
+ // The idea is to merge array_base+scaled_index groups together,
+ // and only have different constant offsets from the same base.
+ const Node *add = in(Offset);
+ if( add->Opcode() == Op_AddX && add->in(1) != add ) {
+ const Type *t22 = phase->type( add->in(2) );
+ if( t22->singleton() && (t22 != Type::TOP) ) { // Right input is an add of a constant?
+ set_req(Address, phase->transform(new (phase->C, 4) AddPNode(in(Base),in(Address),add->in(1))));
+ set_req(Offset, add->in(2));
+ return this; // Made progress
+ }
+ }
+
+ return NULL; // No progress
+}
+
+//------------------------------bottom_type------------------------------------
+// Bottom-type is the pointer-type with unknown offset.
+const Type *AddPNode::bottom_type() const {
+ if (in(Address) == NULL) return TypePtr::BOTTOM;
+ const TypePtr *tp = in(Address)->bottom_type()->isa_ptr();
+ if( !tp ) return Type::TOP; // TOP input means TOP output
+ assert( in(Offset)->Opcode() != Op_ConP, "" );
+ const Type *t = in(Offset)->bottom_type();
+ if( t == Type::TOP )
+ return tp->add_offset(Type::OffsetTop);
+ const TypeX *tx = t->is_intptr_t();
+ intptr_t txoffset = Type::OffsetBot;
+ if (tx->is_con()) { // Left input is an add of a constant?
+ txoffset = tx->get_con();
+ if (txoffset != (int)txoffset)
+ txoffset = Type::OffsetBot; // oops: add_offset will choke on it
+ }
+ return tp->add_offset(txoffset);
+}
+
+//------------------------------Value------------------------------------------
+const Type *AddPNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(Address) );
+ const Type *t2 = phase->type( in(Offset) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Left input is a pointer
+ const TypePtr *p1 = t1->isa_ptr();
+ // Right input is an int
+ const TypeX *p2 = t2->is_intptr_t();
+ // Add 'em
+ intptr_t p2offset = Type::OffsetBot;
+ if (p2->is_con()) { // Left input is an add of a constant?
+ p2offset = p2->get_con();
+ if (p2offset != (int)p2offset)
+ p2offset = Type::OffsetBot; // oops: add_offset will choke on it
+ }
+ return p1->add_offset(p2offset);
+}
+
+//------------------------Ideal_base_and_offset--------------------------------
+// Split an oop pointer into a base and offset.
+// (The offset might be Type::OffsetBot in the case of an array.)
+// Return the base, or NULL if failure.
+Node* AddPNode::Ideal_base_and_offset(Node* ptr, PhaseTransform* phase,
+ // second return value:
+ intptr_t& offset) {
+ if (ptr->is_AddP()) {
+ Node* base = ptr->in(AddPNode::Base);
+ Node* addr = ptr->in(AddPNode::Address);
+ Node* offs = ptr->in(AddPNode::Offset);
+ if (base == addr || base->is_top()) {
+ offset = phase->find_intptr_t_con(offs, Type::OffsetBot);
+ if (offset != Type::OffsetBot) {
+ return addr;
+ }
+ }
+ }
+ offset = Type::OffsetBot;
+ return NULL;
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not? Do not match base pointer edge
+uint AddPNode::match_edge(uint idx) const {
+ return idx > Base;
+}
+
+//---------------------------mach_bottom_type----------------------------------
+// Utility function for use by ADLC. Implements bottom_type for matched AddP.
+const Type *AddPNode::mach_bottom_type( const MachNode* n) {
+ Node* base = n->in(Base);
+ const Type *t = base->bottom_type();
+ if ( t == Type::TOP ) {
+ // an untyped pointer
+ return TypeRawPtr::BOTTOM;
+ }
+ const TypePtr* tp = t->isa_oopptr();
+ if ( tp == NULL ) return t;
+ if ( tp->_offset == TypePtr::OffsetBot ) return tp;
+
+ // We must carefully add up the various offsets...
+ intptr_t offset = 0;
+ const TypePtr* tptr = NULL;
+
+ uint numopnds = n->num_opnds();
+ uint index = n->oper_input_base();
+ for ( uint i = 1; i < numopnds; i++ ) {
+ MachOper *opnd = n->_opnds[i];
+ // Check for any interesting operand info.
+ // In particular, check for both memory and non-memory operands.
+ // %%%%% Clean this up: use xadd_offset
+ int con = opnd->constant();
+ if ( con == TypePtr::OffsetBot ) goto bottom_out;
+ offset += con;
+ con = opnd->constant_disp();
+ if ( con == TypePtr::OffsetBot ) goto bottom_out;
+ offset += con;
+ if( opnd->scale() != 0 ) goto bottom_out;
+
+ // Check each operand input edge. Find the 1 allowed pointer
+ // edge. Other edges must be index edges; track exact constant
+ // inputs and otherwise assume the worst.
+ for ( uint j = opnd->num_edges(); j > 0; j-- ) {
+ Node* edge = n->in(index++);
+ const Type* et = edge->bottom_type();
+ const TypeX* eti = et->isa_intptr_t();
+ if ( eti == NULL ) {
+ // there must be one pointer among the operands
+ guarantee(tptr == NULL, "must be only one pointer operand");
+ tptr = et->isa_oopptr();
+ guarantee(tptr != NULL, "non-int operand must be pointer");
+ continue;
+ }
+ if ( eti->_hi != eti->_lo ) goto bottom_out;
+ offset += eti->_lo;
+ }
+ }
+ guarantee(tptr != NULL, "must be exactly one pointer operand");
+ return tptr->add_offset(offset);
+
+ bottom_out:
+ return tp->add_offset(TypePtr::OffsetBot);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *OrINode::Identity( PhaseTransform *phase ) {
+ // x | x => x
+ if (phase->eqv(in(1), in(2))) {
+ return in(1);
+ }
+
+ return AddNode::Identity(phase);
+}
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs IN THE CURRENT RING. For
+// the logical operations the ring's ADD is really a logical OR function.
+// This also type-checks the inputs for sanity. Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *OrINode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeInt *r0 = t0->is_int(); // Handy access
+ const TypeInt *r1 = t1->is_int();
+
+ // If both args are bool, can figure out better types
+ if ( r0 == TypeInt::BOOL ) {
+ if ( r1 == TypeInt::ONE) {
+ return TypeInt::ONE;
+ } else if ( r1 == TypeInt::BOOL ) {
+ return TypeInt::BOOL;
+ }
+ } else if ( r0 == TypeInt::ONE ) {
+ if ( r1 == TypeInt::BOOL ) {
+ return TypeInt::ONE;
+ }
+ }
+
+ // If either input is not a constant, just return all integers.
+ if( !r0->is_con() || !r1->is_con() )
+ return TypeInt::INT; // Any integer, but still no symbols.
+
+ // Otherwise just OR them bits.
+ return TypeInt::make( r0->get_con() | r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *OrLNode::Identity( PhaseTransform *phase ) {
+ // x | x => x
+ if (phase->eqv(in(1), in(2))) {
+ return in(1);
+ }
+
+ return AddNode::Identity(phase);
+}
+
+//------------------------------add_ring---------------------------------------
+const Type *OrLNode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeLong *r0 = t0->is_long(); // Handy access
+ const TypeLong *r1 = t1->is_long();
+
+ // If either input is not a constant, just return all integers.
+ if( !r0->is_con() || !r1->is_con() )
+ return TypeLong::LONG; // Any integer, but still no symbols.
+
+ // Otherwise just OR them bits.
+ return TypeLong::make( r0->get_con() | r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs IN THE CURRENT RING. For
+// the logical operations the ring's ADD is really a logical OR function.
+// This also type-checks the inputs for sanity. Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *XorINode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeInt *r0 = t0->is_int(); // Handy access
+ const TypeInt *r1 = t1->is_int();
+
+ // Complementing a boolean?
+ if( r0 == TypeInt::BOOL && ( r1 == TypeInt::ONE
+ || r1 == TypeInt::BOOL))
+ return TypeInt::BOOL;
+
+ if( !r0->is_con() || !r1->is_con() ) // Not constants
+ return TypeInt::INT; // Any integer, but still no symbols.
+
+ // Otherwise just XOR them bits.
+ return TypeInt::make( r0->get_con() ^ r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------add_ring---------------------------------------
+const Type *XorLNode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeLong *r0 = t0->is_long(); // Handy access
+ const TypeLong *r1 = t1->is_long();
+
+ // If either input is not a constant, just return all integers.
+ if( !r0->is_con() || !r1->is_con() )
+ return TypeLong::LONG; // Any integer, but still no symbols.
+
+ // Otherwise just OR them bits.
+ return TypeLong::make( r0->get_con() ^ r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+const Type *MaxINode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeInt *r0 = t0->is_int(); // Handy access
+ const TypeInt *r1 = t1->is_int();
+
+ // Otherwise just MAX them bits.
+ return TypeInt::make( MAX2(r0->_lo,r1->_lo), MAX2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) );
+}
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+// MINs show up in range-check loop limit calculations. Look for
+// "MIN2(x+c0,MIN2(y,x+c1))". Pick the smaller constant: "MIN2(x+c0,y)"
+Node *MinINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node *progress = NULL;
+ // Force a right-spline graph
+ Node *l = in(1);
+ Node *r = in(2);
+ // Transform MinI1( MinI2(a,b), c) into MinI1( a, MinI2(b,c) )
+ // to force a right-spline graph for the rest of MinINode::Ideal().
+ if( l->Opcode() == Op_MinI ) {
+ assert( l != l->in(1), "dead loop in MinINode::Ideal" );
+ r = phase->transform(new (phase->C, 3) MinINode(l->in(2),r));
+ l = l->in(1);
+ set_req(1, l);
+ set_req(2, r);
+ return this;
+ }
+
+ // Get left input & constant
+ Node *x = l;
+ int x_off = 0;
+ if( x->Opcode() == Op_AddI && // Check for "x+c0" and collect constant
+ x->in(2)->is_Con() ) {
+ const Type *t = x->in(2)->bottom_type();
+ if( t == Type::TOP ) return NULL; // No progress
+ x_off = t->is_int()->get_con();
+ x = x->in(1);
+ }
+
+ // Scan a right-spline-tree for MINs
+ Node *y = r;
+ int y_off = 0;
+ // Check final part of MIN tree
+ if( y->Opcode() == Op_AddI && // Check for "y+c1" and collect constant
+ y->in(2)->is_Con() ) {
+ const Type *t = y->in(2)->bottom_type();
+ if( t == Type::TOP ) return NULL; // No progress
+ y_off = t->is_int()->get_con();
+ y = y->in(1);
+ }
+ if( x->_idx > y->_idx && r->Opcode() != Op_MinI ) {
+ swap_edges(1, 2);
+ return this;
+ }
+
+
+ if( r->Opcode() == Op_MinI ) {
+ assert( r != r->in(2), "dead loop in MinINode::Ideal" );
+ y = r->in(1);
+ // Check final part of MIN tree
+ if( y->Opcode() == Op_AddI &&// Check for "y+c1" and collect constant
+ y->in(2)->is_Con() ) {
+ const Type *t = y->in(2)->bottom_type();
+ if( t == Type::TOP ) return NULL; // No progress
+ y_off = t->is_int()->get_con();
+ y = y->in(1);
+ }
+
+ if( x->_idx > y->_idx )
+ return new (phase->C, 3) MinINode(r->in(1),phase->transform(new (phase->C, 3) MinINode(l,r->in(2))));
+
+ // See if covers: MIN2(x+c0,MIN2(y+c1,z))
+ if( !phase->eqv(x,y) ) return NULL;
+ // If (y == x) transform MIN2(x+c0, MIN2(x+c1,z)) into
+ // MIN2(x+c0 or x+c1 which less, z).
+ return new (phase->C, 3) MinINode(phase->transform(new (phase->C, 3) AddINode(x,phase->intcon(MIN2(x_off,y_off)))),r->in(2));
+ } else {
+ // See if covers: MIN2(x+c0,y+c1)
+ if( !phase->eqv(x,y) ) return NULL;
+ // If (y == x) transform MIN2(x+c0,x+c1) into x+c0 or x+c1 which less.
+ return new (phase->C, 3) AddINode(x,phase->intcon(MIN2(x_off,y_off)));
+ }
+
+}
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+const Type *MinINode::add_ring( const Type *t0, const Type *t1 ) const {
+ const TypeInt *r0 = t0->is_int(); // Handy access
+ const TypeInt *r1 = t1->is_int();
+
+ // Otherwise just MIN them bits.
+ return TypeInt::make( MIN2(r0->_lo,r1->_lo), MIN2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) );
+}
diff --git a/src/share/vm/opto/addnode.hpp b/src/share/vm/opto/addnode.hpp
new file mode 100644
index 000000000..5170f50e1
--- /dev/null
+++ b/src/share/vm/opto/addnode.hpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+class PhaseTransform;
+
+//------------------------------AddNode----------------------------------------
+// Classic Add functionality. This covers all the usual 'add' behaviors for
+// an algebraic ring. Add-integer, add-float, add-double, and binary-or are
+// all inherited from this class. The various identity values are supplied
+// by virtual functions.
+class AddNode : public Node {
+ virtual uint hash() const;
+public:
+ AddNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
+ init_class_id(Class_Add);
+ }
+
+ // Handle algebraic identities here. If we have an identity, return the Node
+ // we are equivalent to. We look for "add of zero" as an identity.
+ virtual Node *Identity( PhaseTransform *phase );
+
+ // We also canonicalize the Node, moving constants to the right input,
+ // and flatten expressions (so that 1+x+2 becomes x+3).
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+ // Compute a new Type for this node. Basically we just do the pre-check,
+ // then call the virtual add() to set the type.
+ virtual const Type *Value( PhaseTransform *phase ) const;
+
+ // Check if this addition involves the additive identity
+ virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
+
+ // Supplied function returns the sum of the inputs.
+ // This also type-checks the inputs for sanity. Guaranteed never to
+ // be passed a TOP or BOTTOM type, these are filtered out by a pre-check.
+ virtual const Type *add_ring( const Type *, const Type * ) const = 0;
+
+ // Supplied function to return the additive identity type
+ virtual const Type *add_id() const = 0;
+
+};
+
+//------------------------------AddINode---------------------------------------
+// Add 2 integers
+class AddINode : public AddNode {
+public:
+ AddINode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeInt::ZERO; }
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------AddLNode---------------------------------------
+// Add 2 longs
+class AddLNode : public AddNode {
+public:
+ AddLNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeLong::ZERO; }
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------AddFNode---------------------------------------
+// Add 2 floats
+class AddFNode : public AddNode {
+public:
+ AddFNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeF::ZERO; }
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual Node *Identity( PhaseTransform *phase ) { return this; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------AddDNode---------------------------------------
+// Add 2 doubles
+class AddDNode : public AddNode {
+public:
+ AddDNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeD::ZERO; }
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual Node *Identity( PhaseTransform *phase ) { return this; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------AddPNode---------------------------------------
+// Add pointer plus integer to get pointer. NOT commutative, really.
+// So not really an AddNode. Lives here, because people associate it with
+// an add.
+class AddPNode : public Node {
+public:
+ enum { Control, // When is it safe to do this add?
+ Base, // Base oop, for GC purposes
+ Address, // Actually address, derived from base
+ Offset } ; // Offset added to address
+ AddPNode( Node *base, Node *ptr, Node *off ) : Node(0,base,ptr,off) {
+ init_class_id(Class_AddP);
+ }
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const;
+ virtual uint ideal_reg() const { return Op_RegP; }
+ Node *base_node() { assert( req() > Base, "Missing base"); return in(Base); }
+ static Node* Ideal_base_and_offset(Node* ptr, PhaseTransform* phase,
+ // second return value:
+ intptr_t& offset);
+ // Do not match base-ptr edge
+ virtual uint match_edge(uint idx) const;
+ static const Type *mach_bottom_type(const MachNode* n); // used by ad_<arch>.hpp
+};
+
+//------------------------------OrINode----------------------------------------
+// Logically OR 2 integers. Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class OrINode : public AddNode {
+public:
+ OrINode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeInt::ZERO; }
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------OrLNode----------------------------------------
+// Logically OR 2 longs. Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class OrLNode : public AddNode {
+public:
+ OrLNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeLong::ZERO; }
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------XorINode---------------------------------------
+// XOR'ing 2 integers
+class XorINode : public AddNode {
+public:
+ XorINode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeInt::ZERO; }
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------XorINode---------------------------------------
+// XOR'ing 2 longs
+class XorLNode : public AddNode {
+public:
+ XorLNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeLong::ZERO; }
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------MaxNode----------------------------------------
+// Max (or min) of 2 values. Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring. Only new thing is that we allow
+// 2 equal inputs to be equal.
+class MaxNode : public AddNode {
+public:
+ MaxNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+ virtual int Opcode() const = 0;
+};
+
+//------------------------------MaxINode---------------------------------------
+// Maximum of 2 integers. Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class MaxINode : public MaxNode {
+public:
+ MaxINode( Node *in1, Node *in2 ) : MaxNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeInt::make(min_jint); }
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------MinINode---------------------------------------
+// MINimum of 2 integers. Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class MinINode : public MaxNode {
+public:
+ MinINode( Node *in1, Node *in2 ) : MaxNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *add_ring( const Type *, const Type * ) const;
+ virtual const Type *add_id() const { return TypeInt::make(max_jint); }
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
diff --git a/src/share/vm/opto/adlcVMDeps.hpp b/src/share/vm/opto/adlcVMDeps.hpp
new file mode 100644
index 000000000..7d4f14ed9
--- /dev/null
+++ b/src/share/vm/opto/adlcVMDeps.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Declare commonly known constant and data structures between the
+// ADLC and the VM
+//
+
+class AdlcVMDeps : public AllStatic {
+ public:
+ // Mirror of TypeFunc types
+ enum { Control, I_O, Memory, FramePtr, ReturnAdr, Parms };
+
+ enum Cisc_Status { Not_cisc_spillable = -1 };
+
+ // Mirror of OptoReg::Name names
+ enum Name {
+ Physical = 0 // Start of physical regs
+ };
+
+ // relocInfo
+ static const char* oop_reloc_type() { return "relocInfo::oop_type"; }
+ static const char* none_reloc_type() { return "relocInfo::none"; }
+};
diff --git a/src/share/vm/opto/block.cpp b/src/share/vm/opto/block.cpp
new file mode 100644
index 000000000..c6b94a45a
--- /dev/null
+++ b/src/share/vm/opto/block.cpp
@@ -0,0 +1,952 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_block.cpp.incl"
+
+
+//-----------------------------------------------------------------------------
+void Block_Array::grow( uint i ) {
+ assert(i >= Max(), "must be an overflow");
+ debug_only(_limit = i+1);
+ if( i < _size ) return;
+ if( !_size ) {
+ _size = 1;
+ _blocks = (Block**)_arena->Amalloc( _size * sizeof(Block*) );
+ _blocks[0] = NULL;
+ }
+ uint old = _size;
+ while( i >= _size ) _size <<= 1; // Double to fit
+ _blocks = (Block**)_arena->Arealloc( _blocks, old*sizeof(Block*),_size*sizeof(Block*));
+ Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
+}
+
+//=============================================================================
+void Block_List::remove(uint i) {
+ assert(i < _cnt, "index out of bounds");
+ Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*)));
+ pop(); // shrink list by one block
+}
+
+void Block_List::insert(uint i, Block *b) {
+ push(b); // grow list by one block
+ Copy::conjoint_words_to_higher((HeapWord*)&_blocks[i], (HeapWord*)&_blocks[i+1], ((_cnt-i-1)*sizeof(Block*)));
+ _blocks[i] = b;
+}
+
+
+//=============================================================================
+
+uint Block::code_alignment() {
+ // Check for Root block
+ if( _pre_order == 0 ) return CodeEntryAlignment;
+ // Check for Start block
+ if( _pre_order == 1 ) return InteriorEntryAlignment;
+ // Check for loop alignment
+ Node *h = head();
+ if( h->is_Loop() && h->as_Loop()->is_inner_loop() ) {
+ // Pre- and post-loops have low trip count so do not bother with
+ // NOPs for align loop head. The constants are hidden from tuning
+ // but only because my "divide by 4" heuristic surely gets nearly
+ // all possible gain (a "do not align at all" heuristic has a
+ // chance of getting a really tiny gain).
+ if( h->is_CountedLoop() && (h->as_CountedLoop()->is_pre_loop() ||
+ h->as_CountedLoop()->is_post_loop()) )
+ return (OptoLoopAlignment > 4) ? (OptoLoopAlignment>>2) : 1;
+ // Loops with low backedge frequency should not be aligned.
+ Node *n = h->in(LoopNode::LoopBackControl)->in(0);
+ if( n->is_MachIf() && n->as_MachIf()->_prob < 0.01 ) {
+ return 1; // Loop does not loop, more often than not!
+ }
+ return OptoLoopAlignment; // Otherwise align loop head
+ }
+ return 1; // no particular alignment
+}
+
+//-----------------------------------------------------------------------------
+// Compute the size of first 'inst_cnt' instructions in this block.
+// Return the number of instructions left to compute if the block has
+// less then 'inst_cnt' instructions.
+uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,
+ PhaseRegAlloc* ra) {
+ uint last_inst = _nodes.size();
+ for( uint j = 0; j < last_inst && inst_cnt > 0; j++ ) {
+ uint inst_size = _nodes[j]->size(ra);
+ if( inst_size > 0 ) {
+ inst_cnt--;
+ uint sz = sum_size + inst_size;
+ if( sz <= (uint)OptoLoopAlignment ) {
+ // Compute size of instructions which fit into fetch buffer only
+ // since all inst_cnt instructions will not fit even if we align them.
+ sum_size = sz;
+ } else {
+ return 0;
+ }
+ }
+ }
+ return inst_cnt;
+}
+
+//-----------------------------------------------------------------------------
+uint Block::find_node( const Node *n ) const {
+ for( uint i = 0; i < _nodes.size(); i++ ) {
+ if( _nodes[i] == n )
+ return i;
+ }
+ ShouldNotReachHere();
+ return 0;
+}
+
+// Find and remove n from block list
+void Block::find_remove( const Node *n ) {
+ _nodes.remove(find_node(n));
+}
+
+//------------------------------is_Empty---------------------------------------
+// Return empty status of a block. Empty blocks contain only the head, other
+// ideal nodes, and an optional trailing goto.
+int Block::is_Empty() const {
+
+ // Root or start block is not considered empty
+ if (head()->is_Root() || head()->is_Start()) {
+ return not_empty;
+ }
+
+ int success_result = completely_empty;
+ int end_idx = _nodes.size()-1;
+
+ // Check for ending goto
+ if ((end_idx > 0) && (_nodes[end_idx]->is_Goto())) {
+ success_result = empty_with_goto;
+ end_idx--;
+ }
+
+ // Unreachable blocks are considered empty
+ if (num_preds() <= 1) {
+ return success_result;
+ }
+
+ // Ideal nodes are allowable in empty blocks: skip them Only MachNodes
+ // turn directly into code, because only MachNodes have non-trivial
+ // emit() functions.
+ while ((end_idx > 0) && !_nodes[end_idx]->is_Mach()) {
+ end_idx--;
+ }
+
+ // No room for any interesting instructions?
+ if (end_idx == 0) {
+ return success_result;
+ }
+
+ return not_empty;
+}
+
+//------------------------------has_uncommon_code------------------------------
+// Return true if the block's code implies that it is not likely to be
+// executed infrequently. Check to see if the block ends in a Halt or
+// a low probability call.
+bool Block::has_uncommon_code() const {
+ Node* en = end();
+
+ if (en->is_Goto())
+ en = en->in(0);
+ if (en->is_Catch())
+ en = en->in(0);
+ if (en->is_Proj() && en->in(0)->is_MachCall()) {
+ MachCallNode* call = en->in(0)->as_MachCall();
+ if (call->cnt() != COUNT_UNKNOWN && call->cnt() <= PROB_UNLIKELY_MAG(4)) {
+ // This is true for slow-path stubs like new_{instance,array},
+ // slow_arraycopy, complete_monitor_locking, uncommon_trap.
+ // The magic number corresponds to the probability of an uncommon_trap,
+ // even though it is a count not a probability.
+ return true;
+ }
+ }
+
+ int op = en->is_Mach() ? en->as_Mach()->ideal_Opcode() : en->Opcode();
+ return op == Op_Halt;
+}
+
+//------------------------------is_uncommon------------------------------------
+// True if block is low enough frequency or guarded by a test which
+// mostly does not go here.
+bool Block::is_uncommon( Block_Array &bbs ) const {
+ // Initial blocks must never be moved, so are never uncommon.
+ if (head()->is_Root() || head()->is_Start()) return false;
+
+ // Check for way-low freq
+ if( _freq < BLOCK_FREQUENCY(0.00001f) ) return true;
+
+ // Look for code shape indicating uncommon_trap or slow path
+ if (has_uncommon_code()) return true;
+
+ const float epsilon = 0.05f;
+ const float guard_factor = PROB_UNLIKELY_MAG(4) / (1.f - epsilon);
+ uint uncommon_preds = 0;
+ uint freq_preds = 0;
+ uint uncommon_for_freq_preds = 0;
+
+ for( uint i=1; i<num_preds(); i++ ) {
+ Block* guard = bbs[pred(i)->_idx];
+ // Check to see if this block follows its guard 1 time out of 10000
+ // or less.
+ //
+ // See list of magnitude-4 unlikely probabilities in cfgnode.hpp which
+ // we intend to be "uncommon", such as slow-path TLE allocation,
+ // predicted call failure, and uncommon trap triggers.
+ //
+ // Use an epsilon value of 5% to allow for variability in frequency
+ // predictions and floating point calculations. The net effect is
+ // that guard_factor is set to 9500.
+ //
+ // Ignore low-frequency blocks.
+ // The next check is (guard->_freq < 1.e-5 * 9500.).
+ if(guard->_freq*BLOCK_FREQUENCY(guard_factor) < BLOCK_FREQUENCY(0.00001f)) {
+ uncommon_preds++;
+ } else {
+ freq_preds++;
+ if( _freq < guard->_freq * guard_factor ) {
+ uncommon_for_freq_preds++;
+ }
+ }
+ }
+ if( num_preds() > 1 &&
+ // The block is uncommon if all preds are uncommon or
+ (uncommon_preds == (num_preds()-1) ||
+ // it is uncommon for all frequent preds.
+ uncommon_for_freq_preds == freq_preds) ) {
+ return true;
+ }
+ return false;
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void Block::dump_bidx(const Block* orig) const {
+ if (_pre_order) tty->print("B%d",_pre_order);
+ else tty->print("N%d", head()->_idx);
+
+ if (Verbose && orig != this) {
+ // Dump the original block's idx
+ tty->print(" (");
+ orig->dump_bidx(orig);
+ tty->print(")");
+ }
+}
+
+void Block::dump_pred(const Block_Array *bbs, Block* orig) const {
+ if (is_connector()) {
+ for (uint i=1; i<num_preds(); i++) {
+ Block *p = ((*bbs)[pred(i)->_idx]);
+ p->dump_pred(bbs, orig);
+ }
+ } else {
+ dump_bidx(orig);
+ tty->print(" ");
+ }
+}
+
+void Block::dump_head( const Block_Array *bbs ) const {
+ // Print the basic block
+ dump_bidx(this);
+ tty->print(": #\t");
+
+ // Print the incoming CFG edges and the outgoing CFG edges
+ for( uint i=0; i<_num_succs; i++ ) {
+ non_connector_successor(i)->dump_bidx(_succs[i]);
+ tty->print(" ");
+ }
+ tty->print("<- ");
+ if( head()->is_block_start() ) {
+ for (uint i=1; i<num_preds(); i++) {
+ Node *s = pred(i);
+ if (bbs) {
+ Block *p = (*bbs)[s->_idx];
+ p->dump_pred(bbs, p);
+ } else {
+ while (!s->is_block_start())
+ s = s->in(0);
+ tty->print("N%d ", s->_idx );
+ }
+ }
+ } else
+ tty->print("BLOCK HEAD IS JUNK ");
+
+ // Print loop, if any
+ const Block *bhead = this; // Head of self-loop
+ Node *bh = bhead->head();
+ if( bbs && bh->is_Loop() && !head()->is_Root() ) {
+ LoopNode *loop = bh->as_Loop();
+ const Block *bx = (*bbs)[loop->in(LoopNode::LoopBackControl)->_idx];
+ while (bx->is_connector()) {
+ bx = (*bbs)[bx->pred(1)->_idx];
+ }
+ tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order);
+ // Dump any loop-specific bits, especially for CountedLoops.
+ loop->dump_spec(tty);
+ }
+ tty->print(" Freq: %g",_freq);
+ if( Verbose || WizardMode ) {
+ tty->print(" IDom: %d/#%d", _idom ? _idom->_pre_order : 0, _dom_depth);
+ tty->print(" RegPressure: %d",_reg_pressure);
+ tty->print(" IHRP Index: %d",_ihrp_index);
+ tty->print(" FRegPressure: %d",_freg_pressure);
+ tty->print(" FHRP Index: %d",_fhrp_index);
+ }
+ tty->print_cr("");
+}
+
+void Block::dump() const { dump(0); }
+
+void Block::dump( const Block_Array *bbs ) const {
+ dump_head(bbs);
+ uint cnt = _nodes.size();
+ for( uint i=0; i<cnt; i++ )
+ _nodes[i]->dump();
+ tty->print("\n");
+}
+#endif
+
+//=============================================================================
+//------------------------------PhaseCFG---------------------------------------
+PhaseCFG::PhaseCFG( Arena *a, RootNode *r, Matcher &m ) :
+ Phase(CFG),
+ _bbs(a),
+ _root(r)
+#ifndef PRODUCT
+ , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
+#endif
+{
+ ResourceMark rm;
+ // I'll need a few machine-specific GotoNodes. Make an Ideal GotoNode,
+ // then Match it into a machine-specific Node. Then clone the machine
+ // Node on demand.
+ Node *x = new (C, 1) GotoNode(NULL);
+ x->init_req(0, x);
+ _goto = m.match_tree(x);
+ assert(_goto != NULL, "");
+ _goto->set_req(0,_goto);
+
+ // Build the CFG in Reverse Post Order
+ _num_blocks = build_cfg();
+ _broot = _bbs[_root->_idx];
+}
+
+//------------------------------build_cfg--------------------------------------
+// Build a proper looking CFG. Make every block begin with either a StartNode
+// or a RegionNode. Make every block end with either a Goto, If or Return.
+// The RootNode both starts and ends it's own block. Do this with a recursive
+// backwards walk over the control edges.
+uint PhaseCFG::build_cfg() {
+ Arena *a = Thread::current()->resource_area();
+ VectorSet visited(a);
+
+ // Allocate stack with enough space to avoid frequent realloc
+ Node_Stack nstack(a, C->unique() >> 1);
+ nstack.push(_root, 0);
+ uint sum = 0; // Counter for blocks
+
+ while (nstack.is_nonempty()) {
+ // node and in's index from stack's top
+ // 'np' is _root (see above) or RegionNode, StartNode: we push on stack
+ // only nodes which point to the start of basic block (see below).
+ Node *np = nstack.node();
+ // idx > 0, except for the first node (_root) pushed on stack
+ // at the beginning when idx == 0.
+ // We will use the condition (idx == 0) later to end the build.
+ uint idx = nstack.index();
+ Node *proj = np->in(idx);
+ const Node *x = proj->is_block_proj();
+ // Does the block end with a proper block-ending Node? One of Return,
+ // If or Goto? (This check should be done for visited nodes also).
+ if (x == NULL) { // Does not end right...
+ Node *g = _goto->clone(); // Force it to end in a Goto
+ g->set_req(0, proj);
+ np->set_req(idx, g);
+ x = proj = g;
+ }
+ if (!visited.test_set(x->_idx)) { // Visit this block once
+ // Skip any control-pinned middle'in stuff
+ Node *p = proj;
+ do {
+ proj = p; // Update pointer to last Control
+ p = p->in(0); // Move control forward
+ } while( !p->is_block_proj() &&
+ !p->is_block_start() );
+ // Make the block begin with one of Region or StartNode.
+ if( !p->is_block_start() ) {
+ RegionNode *r = new (C, 2) RegionNode( 2 );
+ r->init_req(1, p); // Insert RegionNode in the way
+ proj->set_req(0, r); // Insert RegionNode in the way
+ p = r;
+ }
+ // 'p' now points to the start of this basic block
+
+ // Put self in array of basic blocks
+ Block *bb = new (_bbs._arena) Block(_bbs._arena,p);
+ _bbs.map(p->_idx,bb);
+ _bbs.map(x->_idx,bb);
+ if( x != p ) // Only for root is x == p
+ bb->_nodes.push((Node*)x);
+
+ // Now handle predecessors
+ ++sum; // Count 1 for self block
+ uint cnt = bb->num_preds();
+ for (int i = (cnt - 1); i > 0; i-- ) { // For all predecessors
+ Node *prevproj = p->in(i); // Get prior input
+ assert( !prevproj->is_Con(), "dead input not removed" );
+ // Check to see if p->in(i) is a "control-dependent" CFG edge -
+ // i.e., it splits at the source (via an IF or SWITCH) and merges
+ // at the destination (via a many-input Region).
+ // This breaks critical edges. The RegionNode to start the block
+ // will be added when <p,i> is pulled off the node stack
+ if ( cnt > 2 ) { // Merging many things?
+ assert( prevproj== bb->pred(i),"");
+ if(prevproj->is_block_proj() != prevproj) { // Control-dependent edge?
+ // Force a block on the control-dependent edge
+ Node *g = _goto->clone(); // Force it to end in a Goto
+ g->set_req(0,prevproj);
+ p->set_req(i,g);
+ }
+ }
+ nstack.push(p, i); // 'p' is RegionNode or StartNode
+ }
+ } else { // Post-processing visited nodes
+ nstack.pop(); // remove node from stack
+ // Check if it the fist node pushed on stack at the beginning.
+ if (idx == 0) break; // end of the build
+ // Find predecessor basic block
+ Block *pb = _bbs[x->_idx];
+ // Insert into nodes array, if not already there
+ if( !_bbs.lookup(proj->_idx) ) {
+ assert( x != proj, "" );
+ // Map basic block of projection
+ _bbs.map(proj->_idx,pb);
+ pb->_nodes.push(proj);
+ }
+ // Insert self as a child of my predecessor block
+ pb->_succs.map(pb->_num_succs++, _bbs[np->_idx]);
+ assert( pb->_nodes[ pb->_nodes.size() - pb->_num_succs ]->is_block_proj(),
+ "too many control users, not a CFG?" );
+ }
+ }
+ // Return number of basic blocks for all children and self
+ return sum;
+}
+
+//------------------------------insert_goto_at---------------------------------
+// Inserts a goto & corresponding basic block between
+// block[block_no] and its succ_no'th successor block
+void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
+ // get block with block_no
+ assert(block_no < _num_blocks, "illegal block number");
+ Block* in = _blocks[block_no];
+ // get successor block succ_no
+ assert(succ_no < in->_num_succs, "illegal successor number");
+ Block* out = in->_succs[succ_no];
+ // get ProjNode corresponding to the succ_no'th successor of the in block
+ ProjNode* proj = in->_nodes[in->_nodes.size() - in->_num_succs + succ_no]->as_Proj();
+ // create region for basic block
+ RegionNode* region = new (C, 2) RegionNode(2);
+ region->init_req(1, proj);
+ // setup corresponding basic block
+ Block* block = new (_bbs._arena) Block(_bbs._arena, region);
+ _bbs.map(region->_idx, block);
+ C->regalloc()->set_bad(region->_idx);
+ // add a goto node
+ Node* gto = _goto->clone(); // get a new goto node
+ gto->set_req(0, region);
+ // add it to the basic block
+ block->_nodes.push(gto);
+ _bbs.map(gto->_idx, block);
+ C->regalloc()->set_bad(gto->_idx);
+ // hook up successor block
+ block->_succs.map(block->_num_succs++, out);
+ // remap successor's predecessors if necessary
+ for (uint i = 1; i < out->num_preds(); i++) {
+ if (out->pred(i) == proj) out->head()->set_req(i, gto);
+ }
+ // remap predecessor's successor to new block
+ in->_succs.map(succ_no, block);
+ // add new basic block to basic block list
+ _blocks.insert(block_no + 1, block);
+ _num_blocks++;
+}
+
+//------------------------------no_flip_branch---------------------------------
+// Does this block end in a multiway branch that cannot have the default case
+// flipped for another case?
+static bool no_flip_branch( Block *b ) {
+ int branch_idx = b->_nodes.size() - b->_num_succs-1;
+ if( branch_idx < 1 ) return false;
+ Node *bra = b->_nodes[branch_idx];
+ if( bra->is_Catch() ) return true;
+ if( bra->is_Mach() ) {
+ if( bra->is_MachNullCheck() ) return true;
+ int iop = bra->as_Mach()->ideal_Opcode();
+ if( iop == Op_FastLock || iop == Op_FastUnlock )
+ return true;
+ }
+ return false;
+}
+
+//------------------------------convert_NeverBranch_to_Goto--------------------
+// Check for NeverBranch at block end. This needs to become a GOTO to the
+// true target. NeverBranch are treated as a conditional branch that always
+// goes the same direction for most of the optimizer and are used to give a
+// fake exit path to infinite loops. At this late stage they need to turn
+// into Goto's so that when you enter the infinite loop you indeed hang.
+void PhaseCFG::convert_NeverBranch_to_Goto(Block *b) {
+ // Find true target
+ int end_idx = b->end_idx();
+ int idx = b->_nodes[end_idx+1]->as_Proj()->_con;
+ Block *succ = b->_succs[idx];
+ Node* gto = _goto->clone(); // get a new goto node
+ gto->set_req(0, b->head());
+ Node *bp = b->_nodes[end_idx];
+ b->_nodes.map(end_idx,gto); // Slam over NeverBranch
+ _bbs.map(gto->_idx, b);
+ C->regalloc()->set_bad(gto->_idx);
+ b->_nodes.pop(); // Yank projections
+ b->_nodes.pop(); // Yank projections
+ b->_succs.map(0,succ); // Map only successor
+ b->_num_succs = 1;
+ // remap successor's predecessors if necessary
+ uint j;
+ for( j = 1; j < succ->num_preds(); j++)
+ if( succ->pred(j)->in(0) == bp )
+ succ->head()->set_req(j, gto);
+ // Kill alternate exit path
+ Block *dead = b->_succs[1-idx];
+ for( j = 1; j < dead->num_preds(); j++)
+ if( dead->pred(j)->in(0) == bp )
+ break;
+ // Scan through block, yanking dead path from
+ // all regions and phis.
+ dead->head()->del_req(j);
+ for( int k = 1; dead->_nodes[k]->is_Phi(); k++ )
+ dead->_nodes[k]->del_req(j);
+}
+
+//------------------------------MoveToNext-------------------------------------
+// Helper function to move block bx to the slot following b_index. Return
+// true if the move is successful, otherwise false
+bool PhaseCFG::MoveToNext(Block* bx, uint b_index) {
+ if (bx == NULL) return false;
+
+ // Return false if bx is already scheduled.
+ uint bx_index = bx->_pre_order;
+ if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) {
+ return false;
+ }
+
+ // Find the current index of block bx on the block list
+ bx_index = b_index + 1;
+ while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++;
+ assert(_blocks[bx_index] == bx, "block not found");
+
+ // If the previous block conditionally falls into bx, return false,
+ // because moving bx will create an extra jump.
+ for(uint k = 1; k < bx->num_preds(); k++ ) {
+ Block* pred = _bbs[bx->pred(k)->_idx];
+ if (pred == _blocks[bx_index-1]) {
+ if (pred->_num_succs != 1) {
+ return false;
+ }
+ }
+ }
+
+ // Reinsert bx just past block 'b'
+ _blocks.remove(bx_index);
+ _blocks.insert(b_index + 1, bx);
+ return true;
+}
+
+//------------------------------MoveToEnd--------------------------------------
+// Move empty and uncommon blocks to the end.
+void PhaseCFG::MoveToEnd(Block *b, uint i) {
+ int e = b->is_Empty();
+ if (e != Block::not_empty) {
+ if (e == Block::empty_with_goto) {
+ // Remove the goto, but leave the block.
+ b->_nodes.pop();
+ }
+ // Mark this block as a connector block, which will cause it to be
+ // ignored in certain functions such as non_connector_successor().
+ b->set_connector();
+ }
+ // Move the empty block to the end, and don't recheck.
+ _blocks.remove(i);
+ _blocks.push(b);
+}
+
+//------------------------------RemoveEmpty------------------------------------
+// Remove empty basic blocks and useless branches.
+void PhaseCFG::RemoveEmpty() {
+ // Move uncommon blocks to the end
+ uint last = _num_blocks;
+ uint i;
+ assert( _blocks[0] == _broot, "" );
+ for( i = 1; i < last; i++ ) {
+ Block *b = _blocks[i];
+
+ // Check for NeverBranch at block end. This needs to become a GOTO to the
+ // true target. NeverBranch are treated as a conditional branch that
+ // always goes the same direction for most of the optimizer and are used
+ // to give a fake exit path to infinite loops. At this late stage they
+ // need to turn into Goto's so that when you enter the infinite loop you
+ // indeed hang.
+ if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch )
+ convert_NeverBranch_to_Goto(b);
+
+ // Look for uncommon blocks and move to end.
+ if( b->is_uncommon(_bbs) ) {
+ MoveToEnd(b, i);
+ last--; // No longer check for being uncommon!
+ if( no_flip_branch(b) ) { // Fall-thru case must follow?
+ b = _blocks[i]; // Find the fall-thru block
+ MoveToEnd(b, i);
+ last--;
+ }
+ i--; // backup block counter post-increment
+ }
+ }
+
+ // Remove empty blocks
+ uint j1;
+ last = _num_blocks;
+ for( i=0; i < last; i++ ) {
+ Block *b = _blocks[i];
+ if (i > 0) {
+ if (b->is_Empty() != Block::not_empty) {
+ MoveToEnd(b, i);
+ last--;
+ i--;
+ }
+ }
+ } // End of for all blocks
+
+ // Fixup final control flow for the blocks. Remove jump-to-next
+ // block. If neither arm of a IF follows the conditional branch, we
+ // have to add a second jump after the conditional. We place the
+ // TRUE branch target in succs[0] for both GOTOs and IFs.
+ for( i=0; i < _num_blocks; i++ ) {
+ Block *b = _blocks[i];
+ b->_pre_order = i; // turn pre-order into block-index
+
+ // Connector blocks need no further processing.
+ if (b->is_connector()) {
+ assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(),
+ "All connector blocks should sink to the end");
+ continue;
+ }
+ assert(b->is_Empty() != Block::completely_empty,
+ "Empty blocks should be connectors");
+
+ Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL;
+ Block *bs0 = b->non_connector_successor(0);
+
+ // Check for multi-way branches where I cannot negate the test to
+ // exchange the true and false targets.
+ if( no_flip_branch( b ) ) {
+ // Find fall through case - if must fall into its target
+ int branch_idx = b->_nodes.size() - b->_num_succs;
+ for (uint j2 = 0; j2 < b->_num_succs; j2++) {
+ const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj();
+ if (p->_con == 0) {
+ // successor j2 is fall through case
+ if (b->non_connector_successor(j2) != bnext) {
+ // but it is not the next block => insert a goto
+ insert_goto_at(i, j2);
+ }
+ // Put taken branch in slot 0
+ if( j2 == 0 && b->_num_succs == 2) {
+ // Flip targets in succs map
+ Block *tbs0 = b->_succs[0];
+ Block *tbs1 = b->_succs[1];
+ b->_succs.map( 0, tbs1 );
+ b->_succs.map( 1, tbs0 );
+ }
+ break;
+ }
+ }
+ // Remove all CatchProjs
+ for (j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
+
+ } else if (b->_num_succs == 1) {
+ // Block ends in a Goto?
+ if (bnext == bs0) {
+ // We fall into next block; remove the Goto
+ b->_nodes.pop();
+ }
+
+ } else if( b->_num_succs == 2 ) { // Block ends in a If?
+ // Get opcode of 1st projection (matches _succs[0])
+ // Note: Since this basic block has 2 exits, the last 2 nodes must
+ // be projections (in any order), the 3rd last node must be
+ // the IfNode (we have excluded other 2-way exits such as
+ // CatchNodes already).
+ MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach();
+ ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
+ ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
+
+ // Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].
+ assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0");
+ assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1");
+
+ Block *bs1 = b->non_connector_successor(1);
+
+ // Check for neither successor block following the current
+ // block ending in a conditional. If so, move one of the
+ // successors after the current one, provided that the
+ // successor was previously unscheduled, but moveable
+ // (i.e., all paths to it involve a branch).
+ if( bnext != bs0 && bnext != bs1 ) {
+
+ // Choose the more common successor based on the probability
+ // of the conditional branch.
+ Block *bx = bs0;
+ Block *by = bs1;
+
+ // _prob is the probability of taking the true path. Make
+ // p the probability of taking successor #1.
+ float p = iff->as_MachIf()->_prob;
+ if( proj0->Opcode() == Op_IfTrue ) {
+ p = 1.0 - p;
+ }
+
+ // Prefer successor #1 if p > 0.5
+ if (p > PROB_FAIR) {
+ bx = bs1;
+ by = bs0;
+ }
+
+ // Attempt the more common successor first
+ if (MoveToNext(bx, i)) {
+ bnext = bx;
+ } else if (MoveToNext(by, i)) {
+ bnext = by;
+ }
+ }
+
+ // Check for conditional branching the wrong way. Negate
+ // conditional, if needed, so it falls into the following block
+ // and branches to the not-following block.
+
+ // Check for the next block being in succs[0]. We are going to branch
+ // to succs[0], so we want the fall-thru case as the next block in
+ // succs[1].
+ if (bnext == bs0) {
+ // Fall-thru case in succs[0], so flip targets in succs map
+ Block *tbs0 = b->_succs[0];
+ Block *tbs1 = b->_succs[1];
+ b->_succs.map( 0, tbs1 );
+ b->_succs.map( 1, tbs0 );
+ // Flip projection for each target
+ { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
+
+ } else if( bnext == bs1 ) { // Fall-thru is already in succs[1]
+
+ } else { // Else need a double-branch
+
+ // The existing conditional branch need not change.
+ // Add a unconditional branch to the false target.
+ // Alas, it must appear in its own block and adding a
+ // block this late in the game is complicated. Sigh.
+ insert_goto_at(i, 1);
+ }
+
+ // Make sure we TRUE branch to the target
+ if( proj0->Opcode() == Op_IfFalse )
+ iff->negate();
+
+ b->_nodes.pop(); // Remove IfFalse & IfTrue projections
+ b->_nodes.pop();
+
+ } else {
+ // Multi-exit block, e.g. a switch statement
+ // But we don't need to do anything here
+ }
+
+ } // End of for all blocks
+
+}
+
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited ) const {
+ const Node *x = end->is_block_proj();
+ assert( x, "not a CFG" );
+
+ // Do not visit this block again
+ if( visited.test_set(x->_idx) ) return;
+
+ // Skip through this block
+ const Node *p = x;
+ do {
+ p = p->in(0); // Move control forward
+ assert( !p->is_block_proj() || p->is_Root(), "not a CFG" );
+ } while( !p->is_block_start() );
+
+ // Recursively visit
+ for( uint i=1; i<p->req(); i++ )
+ _dump_cfg(p->in(i),visited);
+
+ // Dump the block
+ _bbs[p->_idx]->dump(&_bbs);
+}
+
+void PhaseCFG::dump( ) const {
+ tty->print("\n--- CFG --- %d BBs\n",_num_blocks);
+ if( _blocks.size() ) { // Did we do basic-block layout?
+ for( uint i=0; i<_num_blocks; i++ )
+ _blocks[i]->dump(&_bbs);
+ } else { // Else do it with a DFS
+ VectorSet visited(_bbs._arena);
+ _dump_cfg(_root,visited);
+ }
+}
+
+void PhaseCFG::dump_headers() {
+ for( uint i = 0; i < _num_blocks; i++ ) {
+ if( _blocks[i] == NULL ) continue;
+ _blocks[i]->dump_head(&_bbs);
+ }
+}
+
+void PhaseCFG::verify( ) const {
+ // Verify sane CFG
+ for( uint i = 0; i < _num_blocks; i++ ) {
+ Block *b = _blocks[i];
+ uint cnt = b->_nodes.size();
+ uint j;
+ for( j = 0; j < cnt; j++ ) {
+ Node *n = b->_nodes[j];
+ assert( _bbs[n->_idx] == b, "" );
+ if( j >= 1 && n->is_Mach() &&
+ n->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+ assert( j == 1 || b->_nodes[j-1]->is_Phi(),
+ "CreateEx must be first instruction in block" );
+ }
+ for( uint k = 0; k < n->req(); k++ ) {
+ Node *use = n->in(k);
+ if( use && use != n ) {
+ assert( _bbs[use->_idx] || use->is_Con(),
+ "must have block; constants for debug info ok" );
+ }
+ }
+ }
+
+ j = b->end_idx();
+ Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj();
+ assert( bp, "last instruction must be a block proj" );
+ assert( bp == b->_nodes[j], "wrong number of successors for this block" );
+ if( bp->is_Catch() ) {
+ while( b->_nodes[--j]->Opcode() == Op_MachProj ) ;
+ assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" );
+ }
+ else if( bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If ) {
+ assert( b->_num_succs == 2, "Conditional branch must have two targets");
+ }
+ }
+}
+#endif
+
+//=============================================================================
+//------------------------------UnionFind--------------------------------------
+UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {
+ Copy::zero_to_bytes( _indices, sizeof(uint)*max );
+}
+
+void UnionFind::extend( uint from_idx, uint to_idx ) {
+ _nesting.check();
+ if( from_idx >= _max ) {
+ uint size = 16;
+ while( size <= from_idx ) size <<=1;
+ _indices = REALLOC_RESOURCE_ARRAY( uint, _indices, _max, size );
+ _max = size;
+ }
+ while( _cnt <= from_idx ) _indices[_cnt++] = 0;
+ _indices[from_idx] = to_idx;
+}
+
+void UnionFind::reset( uint max ) {
+ assert( max <= max_uint, "Must fit within uint" );
+ // Force the Union-Find mapping to be at least this large
+ extend(max,0);
+ // Initialize to be the ID mapping.
+ for( uint i=0; i<_max; i++ ) map(i,i);
+}
+
+//------------------------------Find_compress----------------------------------
+// Straight out of Tarjan's union-find algorithm
+uint UnionFind::Find_compress( uint idx ) {
+ uint cur = idx;
+ uint next = lookup(cur);
+ while( next != cur ) { // Scan chain of equivalences
+ assert( next < cur, "always union smaller" );
+ cur = next; // until find a fixed-point
+ next = lookup(cur);
+ }
+ // Core of union-find algorithm: update chain of
+ // equivalences to be equal to the root.
+ while( idx != next ) {
+ uint tmp = lookup(idx);
+ map(idx, next);
+ idx = tmp;
+ }
+ return idx;
+}
+
+//------------------------------Find_const-------------------------------------
+// Like Find above, but no path compress, so bad asymptotic behavior
+uint UnionFind::Find_const( uint idx ) const {
+ if( idx == 0 ) return idx; // Ignore the zero idx
+ // Off the end? This can happen during debugging dumps
+ // when data structures have not finished being updated.
+ if( idx >= _max ) return idx;
+ uint next = lookup(idx);
+ while( next != idx ) { // Scan chain of equivalences
+ assert( next < idx, "always union smaller" );
+ idx = next; // until find a fixed-point
+ next = lookup(idx);
+ }
+ return next;
+}
+
+//------------------------------Union------------------------------------------
+// union 2 sets together.
+void UnionFind::Union( uint idx1, uint idx2 ) {
+ uint src = Find(idx1);
+ uint dst = Find(idx2);
+ assert( src, "" );
+ assert( dst, "" );
+ assert( src < _max, "oob" );
+ assert( dst < _max, "oob" );
+ assert( src < dst, "always union smaller" );
+ map(dst,src);
+}
diff --git a/src/share/vm/opto/block.hpp b/src/share/vm/opto/block.hpp
new file mode 100644
index 000000000..8708a4ded
--- /dev/null
+++ b/src/share/vm/opto/block.hpp
@@ -0,0 +1,510 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+class Block;
+class CFGLoop;
+class MachCallNode;
+class Matcher;
+class RootNode;
+class VectorSet;
+struct Tarjan;
+
+//------------------------------Block_Array------------------------------------
+// Map dense integer indices to Blocks. Uses classic doubling-array trick.
+// Abstractly provides an infinite array of Block*'s, initialized to NULL.
+// Note that the constructor just zeros things, and since I use Arena
+// allocation I do not need a destructor to reclaim storage.
+class Block_Array : public ResourceObj {
+ uint _size; // allocated size, as opposed to formal limit
+ debug_only(uint _limit;) // limit to formal domain
+protected:
+ Block **_blocks;
+ void grow( uint i ); // Grow array node to fit
+
+public:
+ Arena *_arena; // Arena to allocate in
+
+ Block_Array(Arena *a) : _arena(a), _size(OptoBlockListSize) {
+ debug_only(_limit=0);
+ _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize );
+ for( int i = 0; i < OptoBlockListSize; i++ ) {
+ _blocks[i] = NULL;
+ }
+ }
+ Block *lookup( uint i ) const // Lookup, or NULL for not mapped
+ { return (i<Max()) ? _blocks[i] : (Block*)NULL; }
+ Block *operator[] ( uint i ) const // Lookup, or assert for not mapped
+ { assert( i < Max(), "oob" ); return _blocks[i]; }
+ // Extend the mapping: index i maps to Block *n.
+ void map( uint i, Block *n ) { if( i>=Max() ) grow(i); _blocks[i] = n; }
+ uint Max() const { debug_only(return _limit); return _size; }
+};
+
+
+class Block_List : public Block_Array {
+public:
+ uint _cnt;
+ Block_List() : Block_Array(Thread::current()->resource_area()), _cnt(0) {}
+ void push( Block *b ) { map(_cnt++,b); }
+ Block *pop() { return _blocks[--_cnt]; }
+ Block *rpop() { Block *b = _blocks[0]; _blocks[0]=_blocks[--_cnt]; return b;}
+ void remove( uint i );
+ void insert( uint i, Block *n );
+ uint size() const { return _cnt; }
+ void reset() { _cnt = 0; }
+};
+
+
+class CFGElement : public ResourceObj {
+ public:
+ float _freq; // Execution frequency (estimate)
+
+ CFGElement() : _freq(0.0f) {}
+ virtual bool is_block() { return false; }
+ virtual bool is_loop() { return false; }
+ Block* as_Block() { assert(is_block(), "must be block"); return (Block*)this; }
+ CFGLoop* as_CFGLoop() { assert(is_loop(), "must be loop"); return (CFGLoop*)this; }
+};
+
+//------------------------------Block------------------------------------------
+// This class defines a Basic Block.
+// Basic blocks are used during the output routines, and are not used during
+// any optimization pass. They are created late in the game.
+class Block : public CFGElement {
+ public:
+ // Nodes in this block, in order
+ Node_List _nodes;
+
+ // Basic blocks have a Node which defines Control for all Nodes pinned in
+ // this block. This Node is a RegionNode. Exception-causing Nodes
+ // (division, subroutines) and Phi functions are always pinned. Later,
+ // every Node will get pinned to some block.
+ Node *head() const { return _nodes[0]; }
+
+ // CAUTION: num_preds() is ONE based, so that predecessor numbers match
+ // input edges to Regions and Phis.
+ uint num_preds() const { return head()->req(); }
+ Node *pred(uint i) const { return head()->in(i); }
+
+ // Array of successor blocks, same size as projs array
+ Block_Array _succs;
+
+ // Basic blocks have some number of Nodes which split control to all
+ // following blocks. These Nodes are always Projections. The field in
+ // the Projection and the block-ending Node determine which Block follows.
+ uint _num_succs;
+
+ // Basic blocks also carry all sorts of good old fashioned DFS information
+ // used to find loops, loop nesting depth, dominators, etc.
+ uint _pre_order; // Pre-order DFS number
+
+ // Dominator tree
+ uint _dom_depth; // Depth in dominator tree for fast LCA
+ Block* _idom; // Immediate dominator block
+
+ CFGLoop *_loop; // Loop to which this block belongs
+ uint _rpo; // Number in reverse post order walk
+
+ virtual bool is_block() { return true; }
+ float succ_prob(uint i); // return probability of i'th successor
+
+ Block* dom_lca(Block* that); // Compute LCA in dominator tree.
+#ifdef ASSERT
+ bool dominates(Block* that) {
+ int dom_diff = this->_dom_depth - that->_dom_depth;
+ if (dom_diff > 0) return false;
+ for (; dom_diff < 0; dom_diff++) that = that->_idom;
+ return this == that;
+ }
+#endif
+
+ // Report the alignment required by this block. Must be a power of 2.
+ // The previous block will insert nops to get this alignment.
+ uint code_alignment();
+
+ // BLOCK_FREQUENCY is a sentinel to mark uses of constant block frequencies.
+ // It is currently also used to scale such frequencies relative to
+ // FreqCountInvocations relative to the old value of 1500.
+#define BLOCK_FREQUENCY(f) ((f * (float) 1500) / FreqCountInvocations)
+
+ // Register Pressure (estimate) for Splitting heuristic
+ uint _reg_pressure;
+ uint _ihrp_index;
+ uint _freg_pressure;
+ uint _fhrp_index;
+
+ // Mark and visited bits for an LCA calculation in insert_anti_dependences.
+ // Since they hold unique node indexes, they do not need reinitialization.
+ node_idx_t _raise_LCA_mark;
+ void set_raise_LCA_mark(node_idx_t x) { _raise_LCA_mark = x; }
+ node_idx_t raise_LCA_mark() const { return _raise_LCA_mark; }
+ node_idx_t _raise_LCA_visited;
+ void set_raise_LCA_visited(node_idx_t x) { _raise_LCA_visited = x; }
+ node_idx_t raise_LCA_visited() const { return _raise_LCA_visited; }
+
+ // Estimated size in bytes of first instructions in a loop.
+ uint _first_inst_size;
+ uint first_inst_size() const { return _first_inst_size; }
+ void set_first_inst_size(uint s) { _first_inst_size = s; }
+
+ // Compute the size of first instructions in this block.
+ uint compute_first_inst_size(uint& sum_size, uint inst_cnt, PhaseRegAlloc* ra);
+
+ // Compute alignment padding if the block needs it.
+ // Align a loop if loop's padding is less or equal to padding limit
+ // or the size of first instructions in the loop > padding.
+ uint alignment_padding(int current_offset) {
+ int block_alignment = code_alignment();
+ int max_pad = block_alignment-relocInfo::addr_unit();
+ if( max_pad > 0 ) {
+ assert(is_power_of_2(max_pad+relocInfo::addr_unit()), "");
+ int current_alignment = current_offset & max_pad;
+ if( current_alignment != 0 ) {
+ uint padding = (block_alignment-current_alignment) & max_pad;
+ if( !head()->is_Loop() ||
+ padding <= (uint)MaxLoopPad ||
+ first_inst_size() > padding ) {
+ return padding;
+ }
+ }
+ }
+ return 0;
+ }
+
+ // Connector blocks. Connector blocks are basic blocks devoid of
+ // instructions, but may have relevant non-instruction Nodes, such as
+ // Phis or MergeMems. Such blocks are discovered and marked during the
+ // RemoveEmpty phase, and elided during Output.
+ bool _connector;
+ void set_connector() { _connector = true; }
+ bool is_connector() const { return _connector; };
+
+ // Create a new Block with given head Node.
+ // Creates the (empty) predecessor arrays.
+ Block( Arena *a, Node *headnode )
+ : CFGElement(),
+ _nodes(a),
+ _succs(a),
+ _num_succs(0),
+ _pre_order(0),
+ _idom(0),
+ _loop(NULL),
+ _reg_pressure(0),
+ _ihrp_index(1),
+ _freg_pressure(0),
+ _fhrp_index(1),
+ _raise_LCA_mark(0),
+ _raise_LCA_visited(0),
+ _first_inst_size(999999),
+ _connector(false) {
+ _nodes.push(headnode);
+ }
+
+ // Index of 'end' Node
+ uint end_idx() const {
+ // %%%%% add a proj after every goto
+ // so (last->is_block_proj() != last) always, then simplify this code
+ // This will not give correct end_idx for block 0 when it only contains root.
+ int last_idx = _nodes.size() - 1;
+ Node *last = _nodes[last_idx];
+ assert(last->is_block_proj() == last || last->is_block_proj() == _nodes[last_idx - _num_succs], "");
+ return (last->is_block_proj() == last) ? last_idx : (last_idx - _num_succs);
+ }
+
+ // Basic blocks have a Node which ends them. This Node determines which
+ // basic block follows this one in the program flow. This Node is either an
+ // IfNode, a GotoNode, a JmpNode, or a ReturnNode.
+ Node *end() const { return _nodes[end_idx()]; }
+
+ // Add an instruction to an existing block. It must go after the head
+ // instruction and before the end instruction.
+ void add_inst( Node *n ) { _nodes.insert(end_idx(),n); }
+ // Find node in block
+ uint find_node( const Node *n ) const;
+ // Find and remove n from block list
+ void find_remove( const Node *n );
+
+ // Schedule a call next in the block
+ uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call);
+
+ // Perform basic-block local scheduling
+ Node *select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot);
+ void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs );
+ void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
+ bool schedule_local(PhaseCFG *cfg, Matcher &m, int *ready_cnt, VectorSet &next_call);
+ // Cleanup if any code lands between a Call and his Catch
+ void call_catch_cleanup(Block_Array &bbs);
+ // Detect implicit-null-check opportunities. Basically, find NULL checks
+ // with suitable memory ops nearby. Use the memory op to do the NULL check.
+ // I can generate a memory op if there is not one nearby.
+ void implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons);
+
+ // Return the empty status of a block
+ enum { not_empty, empty_with_goto, completely_empty };
+ int is_Empty() const;
+
+ // Forward through connectors
+ Block* non_connector() {
+ Block* s = this;
+ while (s->is_connector()) {
+ s = s->_succs[0];
+ }
+ return s;
+ }
+
+ // Successor block, after forwarding through connectors
+ Block* non_connector_successor(int i) const {
+ return _succs[i]->non_connector();
+ }
+
+ // Examine block's code shape to predict if it is not commonly executed.
+ bool has_uncommon_code() const;
+
+ // Use frequency calculations and code shape to predict if the block
+ // is uncommon.
+ bool is_uncommon( Block_Array &bbs ) const;
+
+#ifndef PRODUCT
+ // Debugging print of basic block
+ void dump_bidx(const Block* orig) const;
+ void dump_pred(const Block_Array *bbs, Block* orig) const;
+ void dump_head( const Block_Array *bbs ) const;
+ void dump( ) const;
+ void dump( const Block_Array *bbs ) const;
+#endif
+};
+
+
+//------------------------------PhaseCFG---------------------------------------
+// Build an array of Basic Block pointers, one per Node.
+class PhaseCFG : public Phase {
+ private:
+ // Build a proper looking cfg. Return count of basic blocks
+ uint build_cfg();
+
+ // Perform DFS search.
+ // Setup 'vertex' as DFS to vertex mapping.
+ // Setup 'semi' as vertex to DFS mapping.
+ // Set 'parent' to DFS parent.
+ uint DFS( Tarjan *tarjan );
+
+ // Helper function to insert a node into a block
+ void schedule_node_into_block( Node *n, Block *b );
+
+ // Set the basic block for pinned Nodes
+ void schedule_pinned_nodes( VectorSet &visited );
+
+ // I'll need a few machine-specific GotoNodes. Clone from this one.
+ MachNode *_goto;
+ void insert_goto_at(uint block_no, uint succ_no);
+
+ Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
+ void verify_anti_dependences(Block* LCA, Node* load) {
+ assert(LCA == _bbs[load->_idx], "should already be scheduled");
+ insert_anti_dependences(LCA, load, true);
+ }
+
+ public:
+ PhaseCFG( Arena *a, RootNode *r, Matcher &m );
+
+ uint _num_blocks; // Count of basic blocks
+ Block_List _blocks; // List of basic blocks
+ RootNode *_root; // Root of whole program
+ Block_Array _bbs; // Map Nodes to owning Basic Block
+ Block *_broot; // Basic block of root
+ uint _rpo_ctr;
+ CFGLoop* _root_loop;
+
+ // Per node latency estimation, valid only during GCM
+ GrowableArray<uint> _node_latency;
+
+#ifndef PRODUCT
+ bool _trace_opto_pipelining; // tracing flag
+#endif
+
+ // Build dominators
+ void Dominators();
+
+ // Estimate block frequencies based on IfNode probabilities
+ void Estimate_Block_Frequency();
+
+ // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific
+ // basic blocks; i.e. _bbs now maps _idx for all Nodes to some Block.
+ void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
+
+ // Compute the (backwards) latency of a node from the uses
+ void latency_from_uses(Node *n);
+
+ // Compute the (backwards) latency of a node from a single use
+ int latency_from_use(Node *n, const Node *def, Node *use);
+
+ // Compute the (backwards) latency of a node from the uses of this instruction
+ void partial_latency_of_defs(Node *n);
+
+ // Schedule Nodes early in their basic blocks.
+ bool schedule_early(VectorSet &visited, Node_List &roots);
+
+ // For each node, find the latest block it can be scheduled into
+ // and then select the cheapest block between the latest and earliest
+ // block to place the node.
+ void schedule_late(VectorSet &visited, Node_List &stack);
+
+ // Pick a block between early and late that is a cheaper alternative
+ // to late. Helper for schedule_late.
+ Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
+
+ // Compute the instruction global latency with a backwards walk
+ void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
+
+ // Remove empty basic blocks
+ void RemoveEmpty();
+ bool MoveToNext(Block* bx, uint b_index);
+ void MoveToEnd(Block* bx, uint b_index);
+
+ // Check for NeverBranch at block end. This needs to become a GOTO to the
+ // true target. NeverBranch are treated as a conditional branch that always
+ // goes the same direction for most of the optimizer and are used to give a
+ // fake exit path to infinite loops. At this late stage they need to turn
+ // into Goto's so that when you enter the infinite loop you indeed hang.
+ void convert_NeverBranch_to_Goto(Block *b);
+
+ CFGLoop* create_loop_tree();
+
+ // Insert a node into a block, and update the _bbs
+ void insert( Block *b, uint idx, Node *n ) {
+ b->_nodes.insert( idx, n );
+ _bbs.map( n->_idx, b );
+ }
+
+#ifndef PRODUCT
+ bool trace_opto_pipelining() const { return _trace_opto_pipelining; }
+
+ // Debugging print of CFG
+ void dump( ) const; // CFG only
+ void _dump_cfg( const Node *end, VectorSet &visited ) const;
+ void verify() const;
+ void dump_headers();
+#else
+ bool trace_opto_pipelining() const { return false; }
+#endif
+};
+
+
+//------------------------------UnionFindInfo----------------------------------
+// Map Block indices to a block-index for a cfg-cover.
+// Array lookup in the optimized case.
+class UnionFind : public ResourceObj {
+ uint _cnt, _max;
+ uint* _indices;
+ ReallocMark _nesting; // assertion check for reallocations
+public:
+ UnionFind( uint max );
+ void reset( uint max ); // Reset to identity map for [0..max]
+
+ uint lookup( uint nidx ) const {
+ return _indices[nidx];
+ }
+ uint operator[] (uint nidx) const { return lookup(nidx); }
+
+ void map( uint from_idx, uint to_idx ) {
+ assert( from_idx < _cnt, "oob" );
+ _indices[from_idx] = to_idx;
+ }
+ void extend( uint from_idx, uint to_idx );
+
+ uint Size() const { return _cnt; }
+
+ uint Find( uint idx ) {
+ assert( idx < 65536, "Must fit into uint");
+ uint uf_idx = lookup(idx);
+ return (uf_idx == idx) ? uf_idx : Find_compress(idx);
+ }
+ uint Find_compress( uint idx );
+ uint Find_const( uint idx ) const;
+ void Union( uint idx1, uint idx2 );
+
+};
+
+//----------------------------BlockProbPair---------------------------
+// Ordered pair of Node*.
+class BlockProbPair VALUE_OBJ_CLASS_SPEC {
+protected:
+ Block* _target; // block target
+ float _prob; // probability of edge to block
+public:
+ BlockProbPair() : _target(NULL), _prob(0.0) {}
+ BlockProbPair(Block* b, float p) : _target(b), _prob(p) {}
+
+ Block* get_target() const { return _target; }
+ float get_prob() const { return _prob; }
+};
+
+//------------------------------CFGLoop-------------------------------------------
+class CFGLoop : public CFGElement {
+ int _id;
+ int _depth;
+ CFGLoop *_parent; // root of loop tree is the method level "pseudo" loop, it's parent is null
+ CFGLoop *_sibling; // null terminated list
+ CFGLoop *_child; // first child, use child's sibling to visit all immediately nested loops
+ GrowableArray<CFGElement*> _members; // list of members of loop
+ GrowableArray<BlockProbPair> _exits; // list of successor blocks and their probabilities
+ float _exit_prob; // probability any loop exit is taken on a single loop iteration
+ void update_succ_freq(Block* b, float freq);
+
+ public:
+ CFGLoop(int id) :
+ CFGElement(),
+ _id(id),
+ _depth(0),
+ _parent(NULL),
+ _sibling(NULL),
+ _child(NULL),
+ _exit_prob(1.0f) {}
+ CFGLoop* parent() { return _parent; }
+ void push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk);
+ void add_member(CFGElement *s) { _members.push(s); }
+ void add_nested_loop(CFGLoop* cl);
+ Block* head() {
+ assert(_members.at(0)->is_block(), "head must be a block");
+ Block* hd = _members.at(0)->as_Block();
+ assert(hd->_loop == this, "just checking");
+ assert(hd->head()->is_Loop(), "must begin with loop head node");
+ return hd;
+ }
+ Block* backedge_block(); // Return the block on the backedge of the loop (else NULL)
+ void compute_loop_depth(int depth);
+ void compute_freq(); // compute frequency with loop assuming head freq 1.0f
+ void scale_freq(); // scale frequency by loop trip count (including outer loops)
+ bool in_loop_nest(Block* b);
+ float trip_count() const { return 1.0f / _exit_prob; }
+ virtual bool is_loop() { return true; }
+ int id() { return _id; }
+
+#ifndef PRODUCT
+ void dump( ) const;
+ void dump_tree() const;
+#endif
+};
diff --git a/src/share/vm/opto/buildOopMap.cpp b/src/share/vm/opto/buildOopMap.cpp
new file mode 100644
index 000000000..2116c404d
--- /dev/null
+++ b/src/share/vm/opto/buildOopMap.cpp
@@ -0,0 +1,623 @@
+/*
+ * Copyright 2002-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_buildOopMap.cpp.incl"
+
+// The functions in this file builds OopMaps after all scheduling is done.
+//
+// OopMaps contain a list of all registers and stack-slots containing oops (so
+// they can be updated by GC). OopMaps also contain a list of derived-pointer
+// base-pointer pairs. When the base is moved, the derived pointer moves to
+// follow it. Finally, any registers holding callee-save values are also
+// recorded. These might contain oops, but only the caller knows.
+//
+// BuildOopMaps implements a simple forward reaching-defs solution. At each
+// GC point we'll have the reaching-def Nodes. If the reaching Nodes are
+// typed as pointers (no offset), then they are oops. Pointers+offsets are
+// derived pointers, and bases can be found from them. Finally, we'll also
+// track reaching callee-save values. Note that a copy of a callee-save value
+// "kills" it's source, so that only 1 copy of a callee-save value is alive at
+// a time.
+//
+// We run a simple bitvector liveness pass to help trim out dead oops. Due to
+// irreducible loops, we can have a reaching def of an oop that only reaches
+// along one path and no way to know if it's valid or not on the other path.
+// The bitvectors are quite dense and the liveness pass is fast.
+//
+// At GC points, we consult this information to build OopMaps. All reaching
+// defs typed as oops are added to the OopMap. Only 1 instance of a
+// callee-save register can be recorded. For derived pointers, we'll have to
+// find and record the register holding the base.
+//
+// The reaching def's is a simple 1-pass worklist approach. I tried a clever
+// breadth-first approach but it was worse (showed O(n^2) in the
+// pick-next-block code).
+//
+// The relevent data is kept in a struct of arrays (it could just as well be
+// an array of structs, but the struct-of-arrays is generally a little more
+// efficient). The arrays are indexed by register number (including
+// stack-slots as registers) and so is bounded by 200 to 300 elements in
+// practice. One array will map to a reaching def Node (or NULL for
+// conflict/dead). The other array will map to a callee-saved register or
+// OptoReg::Bad for not-callee-saved.
+
+
+//------------------------------OopFlow----------------------------------------
+// Structure to pass around
+struct OopFlow : public ResourceObj {
+ short *_callees; // Array mapping register to callee-saved
+ Node **_defs; // array mapping register to reaching def
+ // or NULL if dead/conflict
+ // OopFlow structs, when not being actively modified, describe the _end_ of
+ // this block.
+ Block *_b; // Block for this struct
+ OopFlow *_next; // Next free OopFlow
+
+ OopFlow( short *callees, Node **defs ) : _callees(callees), _defs(defs),
+ _b(NULL), _next(NULL) { }
+
+ // Given reaching-defs for this block start, compute it for this block end
+ void compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash );
+
+ // Merge these two OopFlows into the 'this' pointer.
+ void merge( OopFlow *flow, int max_reg );
+
+ // Copy a 'flow' over an existing flow
+ void clone( OopFlow *flow, int max_size);
+
+ // Make a new OopFlow from scratch
+ static OopFlow *make( Arena *A, int max_size );
+
+ // Build an oopmap from the current flow info
+ OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
+};
+
+//------------------------------compute_reach----------------------------------
+// Given reaching-defs for this block start, compute it for this block end
+void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) {
+
+ for( uint i=0; i<_b->_nodes.size(); i++ ) {
+ Node *n = _b->_nodes[i];
+
+ if( n->jvms() ) { // Build an OopMap here?
+ JVMState *jvms = n->jvms();
+ // no map needed for leaf calls
+ if( n->is_MachSafePoint() && !n->is_MachCallLeaf() ) {
+ int *live = (int*) (*safehash)[n];
+ assert( live, "must find live" );
+ n->as_MachSafePoint()->set_oop_map( build_oop_map(n,max_reg,regalloc, live) );
+ }
+ }
+
+ // Assign new reaching def's.
+ // Note that I padded the _defs and _callees arrays so it's legal
+ // to index at _defs[OptoReg::Bad].
+ OptoReg::Name first = regalloc->get_reg_first(n);
+ OptoReg::Name second = regalloc->get_reg_second(n);
+ _defs[first] = n;
+ _defs[second] = n;
+
+ // Pass callee-save info around copies
+ int idx = n->is_Copy();
+ if( idx ) { // Copies move callee-save info
+ OptoReg::Name old_first = regalloc->get_reg_first(n->in(idx));
+ OptoReg::Name old_second = regalloc->get_reg_second(n->in(idx));
+ int tmp_first = _callees[old_first];
+ int tmp_second = _callees[old_second];
+ _callees[old_first] = OptoReg::Bad; // callee-save is moved, dead in old location
+ _callees[old_second] = OptoReg::Bad;
+ _callees[first] = tmp_first;
+ _callees[second] = tmp_second;
+ } else if( n->is_Phi() ) { // Phis do not mod callee-saves
+ assert( _callees[first] == _callees[regalloc->get_reg_first(n->in(1))], "" );
+ assert( _callees[second] == _callees[regalloc->get_reg_second(n->in(1))], "" );
+ assert( _callees[first] == _callees[regalloc->get_reg_first(n->in(n->req()-1))], "" );
+ assert( _callees[second] == _callees[regalloc->get_reg_second(n->in(n->req()-1))], "" );
+ } else {
+ _callees[first] = OptoReg::Bad; // No longer holding a callee-save value
+ _callees[second] = OptoReg::Bad;
+
+ // Find base case for callee saves
+ if( n->is_Proj() && n->in(0)->is_Start() ) {
+ if( OptoReg::is_reg(first) &&
+ regalloc->_matcher.is_save_on_entry(first) )
+ _callees[first] = first;
+ if( OptoReg::is_reg(second) &&
+ regalloc->_matcher.is_save_on_entry(second) )
+ _callees[second] = second;
+ }
+ }
+ }
+}
+
+//------------------------------merge------------------------------------------
+// Merge the given flow into the 'this' flow
+void OopFlow::merge( OopFlow *flow, int max_reg ) {
+ assert( _b == NULL, "merging into a happy flow" );
+ assert( flow->_b, "this flow is still alive" );
+ assert( flow != this, "no self flow" );
+
+ // Do the merge. If there are any differences, drop to 'bottom' which
+ // is OptoReg::Bad or NULL depending.
+ for( int i=0; i<max_reg; i++ ) {
+ // Merge the callee-save's
+ if( _callees[i] != flow->_callees[i] )
+ _callees[i] = OptoReg::Bad;
+ // Merge the reaching defs
+ if( _defs[i] != flow->_defs[i] )
+ _defs[i] = NULL;
+ }
+
+}
+
+//------------------------------clone------------------------------------------
+void OopFlow::clone( OopFlow *flow, int max_size ) {
+ _b = flow->_b;
+ memcpy( _callees, flow->_callees, sizeof(short)*max_size);
+ memcpy( _defs , flow->_defs , sizeof(Node*)*max_size);
+}
+
+//------------------------------make-------------------------------------------
+OopFlow *OopFlow::make( Arena *A, int max_size ) {
+ short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
+ Node **defs = NEW_ARENA_ARRAY(A,Node*,max_size+1);
+ debug_only( memset(defs,0,(max_size+1)*sizeof(Node*)) );
+ OopFlow *flow = new (A) OopFlow(callees+1, defs+1);
+ assert( &flow->_callees[OptoReg::Bad] == callees, "Ok to index at OptoReg::Bad" );
+ assert( &flow->_defs [OptoReg::Bad] == defs , "Ok to index at OptoReg::Bad" );
+ return flow;
+}
+
+//------------------------------bit twiddlers----------------------------------
+static int get_live_bit( int *live, int reg ) {
+ return live[reg>>LogBitsPerInt] & (1<<(reg&(BitsPerInt-1))); }
+static void set_live_bit( int *live, int reg ) {
+ live[reg>>LogBitsPerInt] |= (1<<(reg&(BitsPerInt-1))); }
+static void clr_live_bit( int *live, int reg ) {
+ live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); }
+
+//------------------------------build_oop_map----------------------------------
+// Build an oopmap from the current flow info
+OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) {
+ int framesize = regalloc->_framesize;
+ int max_inarg_slot = OptoReg::reg2stack(regalloc->_matcher._new_SP);
+ debug_only( char *dup_check = NEW_RESOURCE_ARRAY(char,OptoReg::stack0());
+ memset(dup_check,0,OptoReg::stack0()) );
+
+ OopMap *omap = new OopMap( framesize, max_inarg_slot );
+ MachCallNode *mcall = n->is_MachCall() ? n->as_MachCall() : NULL;
+ JVMState* jvms = n->jvms();
+
+ // For all registers do...
+ for( int reg=0; reg<max_reg; reg++ ) {
+ if( get_live_bit(live,reg) == 0 )
+ continue; // Ignore if not live
+
+ // %%% C2 can use 2 OptoRegs when the physical register is only one 64bit
+ // register in that case we'll get an non-concrete register for the second
+ // half. We only need to tell the map the register once!
+ //
+ // However for the moment we disable this change and leave things as they
+ // were.
+
+ VMReg r = OptoReg::as_VMReg(OptoReg::Name(reg), framesize, max_inarg_slot);
+
+ if (false && r->is_reg() && !r->is_concrete()) {
+ continue;
+ }
+
+ // See if dead (no reaching def).
+ Node *def = _defs[reg]; // Get reaching def
+ assert( def, "since live better have reaching def" );
+
+ // Classify the reaching def as oop, derived, callee-save, dead, or other
+ const Type *t = def->bottom_type();
+ if( t->isa_oop_ptr() ) { // Oop or derived?
+ assert( !OptoReg::is_valid(_callees[reg]), "oop can't be callee save" );
+#ifdef _LP64
+ // 64-bit pointers record oop-ishness on 2 aligned adjacent registers.
+ // Make sure both are record from the same reaching def, but do not
+ // put both into the oopmap.
+ if( (reg&1) == 1 ) { // High half of oop-pair?
+ assert( _defs[reg-1] == _defs[reg], "both halves from same reaching def" );
+ continue; // Do not record high parts in oopmap
+ }
+#endif
+
+ // Check for a legal reg name in the oopMap and bailout if it is not.
+ if (!omap->legal_vm_reg_name(r)) {
+ regalloc->C->record_method_not_compilable("illegal oopMap register name");
+ continue;
+ }
+ if( t->is_ptr()->_offset == 0 ) { // Not derived?
+ if( mcall ) {
+ // Outgoing argument GC mask responsibility belongs to the callee,
+ // not the caller. Inspect the inputs to the call, to see if
+ // this live-range is one of them.
+ uint cnt = mcall->tf()->domain()->cnt();
+ uint j;
+ for( j = TypeFunc::Parms; j < cnt; j++)
+ if( mcall->in(j) == def )
+ break; // reaching def is an argument oop
+ if( j < cnt ) // arg oops dont go in GC map
+ continue; // Continue on to the next register
+ }
+ omap->set_oop(r);
+ } else { // Else it's derived.
+ // Find the base of the derived value.
+ uint i;
+ // Fast, common case, scan
+ for( i = jvms->oopoff(); i < n->req(); i+=2 )
+ if( n->in(i) == def ) break; // Common case
+ if( i == n->req() ) { // Missed, try a more generous scan
+ // Scan again, but this time peek through copies
+ for( i = jvms->oopoff(); i < n->req(); i+=2 ) {
+ Node *m = n->in(i); // Get initial derived value
+ while( 1 ) {
+ Node *d = def; // Get initial reaching def
+ while( 1 ) { // Follow copies of reaching def to end
+ if( m == d ) goto found; // breaks 3 loops
+ int idx = d->is_Copy();
+ if( !idx ) break;
+ d = d->in(idx); // Link through copy
+ }
+ int idx = m->is_Copy();
+ if( !idx ) break;
+ m = m->in(idx);
+ }
+ }
+ guarantee( 0, "must find derived/base pair" );
+ }
+ found: ;
+ Node *base = n->in(i+1); // Base is other half of pair
+ int breg = regalloc->get_reg_first(base);
+ VMReg b = OptoReg::as_VMReg(OptoReg::Name(breg), framesize, max_inarg_slot);
+
+ // I record liveness at safepoints BEFORE I make the inputs
+ // live. This is because argument oops are NOT live at a
+ // safepoint (or at least they cannot appear in the oopmap).
+ // Thus bases of base/derived pairs might not be in the
+ // liveness data but they need to appear in the oopmap.
+ if( get_live_bit(live,breg) == 0 ) {// Not live?
+ // Flag it, so next derived pointer won't re-insert into oopmap
+ set_live_bit(live,breg);
+ // Already missed our turn?
+ if( breg < reg ) {
+ if (b->is_stack() || b->is_concrete() || true ) {
+ omap->set_oop( b);
+ }
+ }
+ }
+ if (b->is_stack() || b->is_concrete() || true ) {
+ omap->set_derived_oop( r, b);
+ }
+ }
+
+ } else if( OptoReg::is_valid(_callees[reg])) { // callee-save?
+ // It's a callee-save value
+ assert( dup_check[_callees[reg]]==0, "trying to callee save same reg twice" );
+ debug_only( dup_check[_callees[reg]]=1; )
+ VMReg callee = OptoReg::as_VMReg(OptoReg::Name(_callees[reg]));
+ if ( callee->is_concrete() || true ) {
+ omap->set_callee_saved( r, callee);
+ }
+
+ } else {
+ // Other - some reaching non-oop value
+ omap->set_value( r);
+ }
+
+ }
+
+#ifdef ASSERT
+ /* Nice, Intel-only assert
+ int cnt_callee_saves=0;
+ int reg2 = 0;
+ while (OptoReg::is_reg(reg2)) {
+ if( dup_check[reg2] != 0) cnt_callee_saves++;
+ assert( cnt_callee_saves==3 || cnt_callee_saves==5, "missed some callee-save" );
+ reg2++;
+ }
+ */
+#endif
+
+ return omap;
+}
+
+//------------------------------do_liveness------------------------------------
+// Compute backwards liveness on registers
+static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) {
+ int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints);
+ int *tmp_live = &live[cfg->_num_blocks * max_reg_ints];
+ Node *root = cfg->C->root();
+ // On CISC platforms, get the node representing the stack pointer that regalloc
+ // used for spills
+ Node *fp = NodeSentinel;
+ if (UseCISCSpill && root->req() > 1) {
+ fp = root->in(1)->in(TypeFunc::FramePtr);
+ }
+ memset( live, 0, cfg->_num_blocks * (max_reg_ints<<LogBytesPerInt) );
+ // Push preds onto worklist
+ for( uint i=1; i<root->req(); i++ )
+ worklist->push(cfg->_bbs[root->in(i)->_idx]);
+
+ // ZKM.jar includes tiny infinite loops which are unreached from below.
+ // If we missed any blocks, we'll retry here after pushing all missed
+ // blocks on the worklist. Normally this outer loop never trips more
+ // than once.
+ while( 1 ) {
+
+ while( worklist->size() ) { // Standard worklist algorithm
+ Block *b = worklist->rpop();
+
+ // Copy first successor into my tmp_live space
+ int s0num = b->_succs[0]->_pre_order;
+ int *t = &live[s0num*max_reg_ints];
+ for( int i=0; i<max_reg_ints; i++ )
+ tmp_live[i] = t[i];
+
+ // OR in the remaining live registers
+ for( uint j=1; j<b->_num_succs; j++ ) {
+ uint sjnum = b->_succs[j]->_pre_order;
+ int *t = &live[sjnum*max_reg_ints];
+ for( int i=0; i<max_reg_ints; i++ )
+ tmp_live[i] |= t[i];
+ }
+
+ // Now walk tmp_live up the block backwards, computing live
+ for( int k=b->_nodes.size()-1; k>=0; k-- ) {
+ Node *n = b->_nodes[k];
+ // KILL def'd bits
+ int first = regalloc->get_reg_first(n);
+ int second = regalloc->get_reg_second(n);
+ if( OptoReg::is_valid(first) ) clr_live_bit(tmp_live,first);
+ if( OptoReg::is_valid(second) ) clr_live_bit(tmp_live,second);
+
+ MachNode *m = n->is_Mach() ? n->as_Mach() : NULL;
+
+ // Check if m is potentially a CISC alternate instruction (i.e, possibly
+ // synthesized by RegAlloc from a conventional instruction and a
+ // spilled input)
+ bool is_cisc_alternate = false;
+ if (UseCISCSpill && m) {
+ is_cisc_alternate = m->is_cisc_alternate();
+ }
+
+ // GEN use'd bits
+ for( uint l=1; l<n->req(); l++ ) {
+ Node *def = n->in(l);
+ assert(def != 0, "input edge required");
+ int first = regalloc->get_reg_first(def);
+ int second = regalloc->get_reg_second(def);
+ if( OptoReg::is_valid(first) ) set_live_bit(tmp_live,first);
+ if( OptoReg::is_valid(second) ) set_live_bit(tmp_live,second);
+ // If we use the stack pointer in a cisc-alternative instruction,
+ // check for use as a memory operand. Then reconstruct the RegName
+ // for this stack location, and set the appropriate bit in the
+ // live vector 4987749.
+ if (is_cisc_alternate && def == fp) {
+ const TypePtr *adr_type = NULL;
+ intptr_t offset;
+ const Node* base = m->get_base_and_disp(offset, adr_type);
+ if (base == NodeSentinel) {
+ // Machnode has multiple memory inputs. We are unable to reason
+ // with these, but are presuming (with trepidation) that not any of
+ // them are oops. This can be fixed by making get_base_and_disp()
+ // look at a specific input instead of all inputs.
+ assert(!def->bottom_type()->isa_oop_ptr(), "expecting non-oop mem input");
+ } else if (base != fp || offset == Type::OffsetBot) {
+ // Do nothing: the fp operand is either not from a memory use
+ // (base == NULL) OR the fp is used in a non-memory context
+ // (base is some other register) OR the offset is not constant,
+ // so it is not a stack slot.
+ } else {
+ assert(offset >= 0, "unexpected negative offset");
+ offset -= (offset % jintSize); // count the whole word
+ int stack_reg = regalloc->offset2reg(offset);
+ if (OptoReg::is_stack(stack_reg)) {
+ set_live_bit(tmp_live, stack_reg);
+ } else {
+ assert(false, "stack_reg not on stack?");
+ }
+ }
+ }
+ }
+
+ if( n->jvms() ) { // Record liveness at safepoint
+
+ // This placement of this stanza means inputs to calls are
+ // considered live at the callsite's OopMap. Argument oops are
+ // hence live, but NOT included in the oopmap. See cutout in
+ // build_oop_map. Debug oops are live (and in OopMap).
+ int *n_live = NEW_ARENA_ARRAY(A, int, max_reg_ints);
+ for( int l=0; l<max_reg_ints; l++ )
+ n_live[l] = tmp_live[l];
+ safehash->Insert(n,n_live);
+ }
+
+ }
+
+ // Now at block top, see if we have any changes. If so, propagate
+ // to prior blocks.
+ int *old_live = &live[b->_pre_order*max_reg_ints];
+ int l;
+ for( l=0; l<max_reg_ints; l++ )
+ if( tmp_live[l] != old_live[l] )
+ break;
+ if( l<max_reg_ints ) { // Change!
+ // Copy in new value
+ for( l=0; l<max_reg_ints; l++ )
+ old_live[l] = tmp_live[l];
+ // Push preds onto worklist
+ for( l=1; l<(int)b->num_preds(); l++ )
+ worklist->push(cfg->_bbs[b->pred(l)->_idx]);
+ }
+ }
+
+ // Scan for any missing safepoints. Happens to infinite loops
+ // ala ZKM.jar
+ uint i;
+ for( i=1; i<cfg->_num_blocks; i++ ) {
+ Block *b = cfg->_blocks[i];
+ uint j;
+ for( j=1; j<b->_nodes.size(); j++ )
+ if( b->_nodes[j]->jvms() &&
+ (*safehash)[b->_nodes[j]] == NULL )
+ break;
+ if( j<b->_nodes.size() ) break;
+ }
+ if( i == cfg->_num_blocks )
+ break; // Got 'em all
+#ifndef PRODUCT
+ if( PrintOpto && Verbose )
+ tty->print_cr("retripping live calc");
+#endif
+ // Force the issue (expensively): recheck everybody
+ for( i=1; i<cfg->_num_blocks; i++ )
+ worklist->push(cfg->_blocks[i]);
+ }
+
+}
+
+//------------------------------BuildOopMaps-----------------------------------
+// Collect GC mask info - where are all the OOPs?
+void Compile::BuildOopMaps() {
+ NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
+ // Can't resource-mark because I need to leave all those OopMaps around,
+ // or else I need to resource-mark some arena other than the default.
+ // ResourceMark rm; // Reclaim all OopFlows when done
+ int max_reg = _regalloc->_max_reg; // Current array extent
+
+ Arena *A = Thread::current()->resource_area();
+ Block_List worklist; // Worklist of pending blocks
+
+ int max_reg_ints = round_to(max_reg, BitsPerInt)>>LogBitsPerInt;
+ Dict *safehash = NULL; // Used for assert only
+ // Compute a backwards liveness per register. Needs a bitarray of
+ // #blocks x (#registers, rounded up to ints)
+ safehash = new Dict(cmpkey,hashkey,A);
+ do_liveness( _regalloc, _cfg, &worklist, max_reg_ints, A, safehash );
+ OopFlow *free_list = NULL; // Free, unused
+
+ // Array mapping blocks to completed oopflows
+ OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks);
+ memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) );
+
+
+ // Do the first block 'by hand' to prime the worklist
+ Block *entry = _cfg->_blocks[1];
+ OopFlow *rootflow = OopFlow::make(A,max_reg);
+ // Initialize to 'bottom' (not 'top')
+ memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
+ memset( rootflow->_defs , 0, max_reg*sizeof(Node*) );
+ flows[entry->_pre_order] = rootflow;
+
+ // Do the first block 'by hand' to prime the worklist
+ rootflow->_b = entry;
+ rootflow->compute_reach( _regalloc, max_reg, safehash );
+ for( uint i=0; i<entry->_num_succs; i++ )
+ worklist.push(entry->_succs[i]);
+
+ // Now worklist contains blocks which have some, but perhaps not all,
+ // predecessors visited.
+ while( worklist.size() ) {
+ // Scan for a block with all predecessors visited, or any randoms slob
+ // otherwise. All-preds-visited order allows me to recycle OopFlow
+ // structures rapidly and cut down on the memory footprint.
+ // Note: not all predecessors might be visited yet (must happen for
+ // irreducible loops). This is OK, since every live value must have the
+ // SAME reaching def for the block, so any reaching def is OK.
+ uint i;
+
+ Block *b = worklist.pop();
+ // Ignore root block
+ if( b == _cfg->_broot ) continue;
+ // Block is already done? Happens if block has several predecessors,
+ // he can get on the worklist more than once.
+ if( flows[b->_pre_order] ) continue;
+
+ // If this block has a visited predecessor AND that predecessor has this
+ // last block as his only undone child, we can move the OopFlow from the
+ // pred to this block. Otherwise we have to grab a new OopFlow.
+ OopFlow *flow = NULL; // Flag for finding optimized flow
+ Block *pred = (Block*)0xdeadbeef;
+ uint j;
+ // Scan this block's preds to find a done predecessor
+ for( j=1; j<b->num_preds(); j++ ) {
+ Block *p = _cfg->_bbs[b->pred(j)->_idx];
+ OopFlow *p_flow = flows[p->_pre_order];
+ if( p_flow ) { // Predecessor is done
+ assert( p_flow->_b == p, "cross check" );
+ pred = p; // Record some predecessor
+ // If all successors of p are done except for 'b', then we can carry
+ // p_flow forward to 'b' without copying, otherwise we have to draw
+ // from the free_list and clone data.
+ uint k;
+ for( k=0; k<p->_num_succs; k++ )
+ if( !flows[p->_succs[k]->_pre_order] &&
+ p->_succs[k] != b )
+ break;
+
+ // Either carry-forward the now-unused OopFlow for b's use
+ // or draw a new one from the free list
+ if( k==p->_num_succs ) {
+ flow = p_flow;
+ break; // Found an ideal pred, use him
+ }
+ }
+ }
+
+ if( flow ) {
+ // We have an OopFlow that's the last-use of a predecessor.
+ // Carry it forward.
+ } else { // Draw a new OopFlow from the freelist
+ if( !free_list )
+ free_list = OopFlow::make(A,max_reg);
+ flow = free_list;
+ assert( flow->_b == NULL, "oopFlow is not free" );
+ free_list = flow->_next;
+ flow->_next = NULL;
+
+ // Copy/clone over the data
+ flow->clone(flows[pred->_pre_order], max_reg);
+ }
+
+ // Mark flow for block. Blocks can only be flowed over once,
+ // because after the first time they are guarded from entering
+ // this code again.
+ assert( flow->_b == pred, "have some prior flow" );
+ flow->_b = NULL;
+
+ // Now push flow forward
+ flows[b->_pre_order] = flow;// Mark flow for this block
+ flow->_b = b;
+ flow->compute_reach( _regalloc, max_reg, safehash );
+
+ // Now push children onto worklist
+ for( i=0; i<b->_num_succs; i++ )
+ worklist.push(b->_succs[i]);
+
+ }
+}
diff --git a/src/share/vm/opto/bytecodeInfo.cpp b/src/share/vm/opto/bytecodeInfo.cpp
new file mode 100644
index 000000000..10648c654
--- /dev/null
+++ b/src/share/vm/opto/bytecodeInfo.cpp
@@ -0,0 +1,490 @@
+/*
+ * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_bytecodeInfo.cpp.incl"
+
+// These variables are declared in parse1.cpp
+extern int explicit_null_checks_inserted;
+extern int explicit_null_checks_elided;
+extern int explicit_null_checks_inserted_old;
+extern int explicit_null_checks_elided_old;
+extern int nodes_created_old;
+extern int nodes_created;
+extern int methods_parsed_old;
+extern int methods_parsed;
+extern int methods_seen;
+extern int methods_seen_old;
+
+
+//=============================================================================
+//------------------------------InlineTree-------------------------------------
+InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
+: C(c), _caller_jvms(caller_jvms),
+ _caller_tree((InlineTree*)caller_tree),
+ _method(callee), _site_invoke_ratio(site_invoke_ratio),
+ _count_inline_bcs(method()->code_size()) {
+ NOT_PRODUCT(_count_inlines = 0;)
+ if (_caller_jvms != NULL) {
+ // Keep a private copy of the caller_jvms:
+ _caller_jvms = new (C) JVMState(caller_jvms->method(), caller_tree->caller_jvms());
+ _caller_jvms->set_bci(caller_jvms->bci());
+ }
+ assert(_caller_jvms->same_calls_as(caller_jvms), "consistent JVMS");
+ assert((caller_tree == NULL ? 0 : caller_tree->inline_depth() + 1) == inline_depth(), "correct (redundant) depth parameter");
+ assert(caller_bci == this->caller_bci(), "correct (redundant) bci parameter");
+ if (UseOldInlining) {
+ // Update hierarchical counts, count_inline_bcs() and count_inlines()
+ InlineTree *caller = (InlineTree *)caller_tree;
+ for( ; caller != NULL; caller = ((InlineTree *)(caller->caller_tree())) ) {
+ caller->_count_inline_bcs += count_inline_bcs();
+ NOT_PRODUCT(caller->_count_inlines++;)
+ }
+ }
+}
+
+InlineTree::InlineTree(Compile* c, ciMethod* callee_method, JVMState* caller_jvms, float site_invoke_ratio)
+: C(c), _caller_jvms(caller_jvms), _caller_tree(NULL),
+ _method(callee_method), _site_invoke_ratio(site_invoke_ratio),
+ _count_inline_bcs(method()->code_size()) {
+ NOT_PRODUCT(_count_inlines = 0;)
+ assert(!UseOldInlining, "do not use for old stuff");
+}
+
+
+
+static void print_indent(int depth) {
+ tty->print(" ");
+ for (int i = depth; i != 0; --i) tty->print(" ");
+}
+
+// positive filter: should send be inlined? returns NULL, if yes, or rejection msg
+const char* InlineTree::shouldInline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const {
+ // Allows targeted inlining
+ if(callee_method->should_inline()) {
+ *wci_result = *(WarmCallInfo::always_hot());
+ if (PrintInlining && Verbose) {
+ print_indent(inline_depth());
+ tty->print_cr("Inlined method is hot: ");
+ }
+ return NULL;
+ }
+
+ // positive filter: should send be inlined? returns NULL (--> yes)
+ // or rejection msg
+ int max_size = C->max_inline_size();
+ int size = callee_method->code_size();
+
+ // Check for too many throws (and not too huge)
+ if(callee_method->interpreter_throwout_count() > InlineThrowCount && size < InlineThrowMaxSize ) {
+ wci_result->set_profit(wci_result->profit() * 100);
+ if (PrintInlining && Verbose) {
+ print_indent(inline_depth());
+ tty->print_cr("Inlined method with many throws (throws=%d):", callee_method->interpreter_throwout_count());
+ }
+ return NULL;
+ }
+
+ if (!UseOldInlining) {
+ return NULL; // size and frequency are represented in a new way
+ }
+
+ int call_site_count = method()->scale_count(profile.count());
+ int invoke_count = method()->interpreter_invocation_count();
+ assert( invoke_count != 0, "Require invokation count greater than zero");
+ int freq = call_site_count/invoke_count;
+ // bump the max size if the call is frequent
+ if ((freq >= InlineFrequencyRatio) || (call_site_count >= InlineFrequencyCount)) {
+ max_size = C->freq_inline_size();
+ if (size <= max_size && TraceFrequencyInlining) {
+ print_indent(inline_depth());
+ tty->print_cr("Inlined frequent method (freq=%d count=%d):", freq, call_site_count);
+ print_indent(inline_depth());
+ callee_method->print();
+ tty->cr();
+ }
+ } else {
+ // Not hot. Check for medium-sized pre-existing nmethod at cold sites.
+ if (callee_method->has_compiled_code() && callee_method->instructions_size() > InlineSmallCode/4)
+ return "already compiled into a medium method";
+ }
+ if (size > max_size) {
+ if (max_size > C->max_inline_size())
+ return "hot method too big";
+ return "too big";
+ }
+ return NULL;
+}
+
+
+// negative filter: should send NOT be inlined? returns NULL, ok to inline, or rejection msg
+const char* InlineTree::shouldNotInline(ciMethod *callee_method, WarmCallInfo* wci_result) const {
+ // negative filter: should send NOT be inlined? returns NULL (--> inline) or rejection msg
+ if (!UseOldInlining) {
+ const char* fail = NULL;
+ if (callee_method->is_abstract()) fail = "abstract method";
+ // note: we allow ik->is_abstract()
+ if (!callee_method->holder()->is_initialized()) fail = "method holder not initialized";
+ if (callee_method->is_native()) fail = "native method";
+
+ if (fail) {
+ *wci_result = *(WarmCallInfo::always_cold());
+ return fail;
+ }
+
+ if (callee_method->has_unloaded_classes_in_signature()) {
+ wci_result->set_profit(wci_result->profit() * 0.1);
+ }
+
+ // don't inline exception code unless the top method belongs to an
+ // exception class
+ if (callee_method->holder()->is_subclass_of(C->env()->Throwable_klass())) {
+ ciMethod* top_method = caller_jvms() ? caller_jvms()->of_depth(1)->method() : method();
+ if (!top_method->holder()->is_subclass_of(C->env()->Throwable_klass())) {
+ wci_result->set_profit(wci_result->profit() * 0.1);
+ }
+ }
+
+ if (callee_method->has_compiled_code() && callee_method->instructions_size() > InlineSmallCode) {
+ wci_result->set_profit(wci_result->profit() * 0.1);
+ // %%% adjust wci_result->size()?
+ }
+
+ return NULL;
+ }
+
+ // First check all inlining restrictions which are required for correctness
+ if (callee_method->is_abstract()) return "abstract method";
+ // note: we allow ik->is_abstract()
+ if (!callee_method->holder()->is_initialized()) return "method holder not initialized";
+ if (callee_method->is_native()) return "native method";
+ if (callee_method->has_unloaded_classes_in_signature()) return "unloaded signature classes";
+
+ if (callee_method->should_inline()) {
+ // ignore heuristic controls on inlining
+ return NULL;
+ }
+
+ // Now perform checks which are heuristic
+
+ if( callee_method->has_compiled_code() && callee_method->instructions_size() > InlineSmallCode )
+ return "already compiled into a big method";
+
+ // don't inline exception code unless the top method belongs to an
+ // exception class
+ if (caller_tree() != NULL &&
+ callee_method->holder()->is_subclass_of(C->env()->Throwable_klass())) {
+ const InlineTree *top = this;
+ while (top->caller_tree() != NULL) top = top->caller_tree();
+ ciInstanceKlass* k = top->method()->holder();
+ if (!k->is_subclass_of(C->env()->Throwable_klass()))
+ return "exception method";
+ }
+
+ // use frequency-based objections only for non-trivial methods
+ if (callee_method->code_size() <= MaxTrivialSize) return NULL;
+ if (UseInterpreter && !CompileTheWorld) { // don't use counts with -Xcomp or CTW
+ if (!callee_method->has_compiled_code() && !callee_method->was_executed_more_than(0)) return "never executed";
+ if (!callee_method->was_executed_more_than(MIN2(MinInliningThreshold, CompileThreshold >> 1))) return "executed < MinInliningThreshold times";
+ }
+
+ if (callee_method->should_not_inline()) {
+ return "disallowed by CompilerOracle";
+ }
+
+ return NULL;
+}
+
+//-----------------------------try_to_inline-----------------------------------
+// return NULL if ok, reason for not inlining otherwise
+// Relocated from "InliningClosure::try_to_inline"
+const char* InlineTree::try_to_inline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) {
+ ciMethod* caller_method = method();
+
+ // Old algorithm had funny accumulating BC-size counters
+ if (UseOldInlining && ClipInlining
+ && (int)count_inline_bcs() >= DesiredMethodLimit) {
+ return "size > DesiredMethodLimit";
+ }
+
+ const char *msg = NULL;
+ if ((msg = shouldInline(callee_method, caller_bci, profile, wci_result)) != NULL) return msg;
+ if ((msg = shouldNotInline(callee_method, wci_result)) != NULL) return msg;
+
+ bool is_accessor = InlineAccessors && callee_method->is_accessor();
+
+ // suppress a few checks for accessors and trivial methods
+ if (!is_accessor && callee_method->code_size() > MaxTrivialSize) {
+ // don't inline into giant methods
+ if (C->unique() > (uint)NodeCountInliningCutoff) return "NodeCountInliningCutoff";
+
+ // don't inline unreached call sites
+ if (profile.count() == 0) return "call site not reached";
+ }
+
+ if (!C->do_inlining() && InlineAccessors && !is_accessor) return "not an accessor";
+
+ if( inline_depth() > MaxInlineLevel ) return "inlining too deep";
+ if( method() == callee_method &&
+ inline_depth() > MaxRecursiveInlineLevel ) return "recursively inlining too deep";
+
+ int size = callee_method->code_size();
+
+ if (UseOldInlining && ClipInlining
+ && (int)count_inline_bcs() + size >= DesiredMethodLimit) {
+ return "size > DesiredMethodLimit";
+ }
+
+ // ok, inline this method
+ return NULL;
+}
+
+//------------------------------pass_initial_checks----------------------------
+bool pass_initial_checks(ciMethod* caller_method, int caller_bci, ciMethod* callee_method) {
+ ciInstanceKlass *callee_holder = callee_method ? callee_method->holder() : NULL;
+ // Check if a callee_method was suggested
+ if( callee_method == NULL ) return false;
+ // Check if klass of callee_method is loaded
+ if( !callee_holder->is_loaded() ) return false;
+ if( !callee_holder->is_initialized() ) return false;
+ if( !UseInterpreter || CompileTheWorld /* running Xcomp or CTW */ ) {
+ // Checks that constant pool's call site has been visited
+ // stricter than callee_holder->is_initialized()
+ ciBytecodeStream iter(caller_method);
+ iter.force_bci(caller_bci);
+ int index = iter.get_index_big();
+ if( !caller_method->is_klass_loaded(index, true) ) {
+ return false;
+ }
+ // Try to do constant pool resolution if running Xcomp
+ Bytecodes::Code call_bc = iter.cur_bc();
+ if( !caller_method->check_call(index, call_bc == Bytecodes::_invokestatic) ) {
+ return false;
+ }
+ }
+ // We will attempt to see if a class/field/etc got properly loaded. If it
+ // did not, it may attempt to throw an exception during our probing. Catch
+ // and ignore such exceptions and do not attempt to compile the method.
+ if( callee_method->should_exclude() ) return false;
+
+ return true;
+}
+
+#ifndef PRODUCT
+//------------------------------print_inlining---------------------------------
+// Really, the failure_msg can be a success message also.
+void InlineTree::print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const {
+ print_indent(inline_depth());
+ tty->print("@ %d ", caller_bci);
+ if( callee_method ) callee_method->print_short_name();
+ else tty->print(" callee not monotonic or profiled");
+ tty->print(" %s", (failure_msg ? failure_msg : "inline"));
+ if( Verbose && callee_method ) {
+ const InlineTree *top = this;
+ while( top->caller_tree() != NULL ) { top = top->caller_tree(); }
+ tty->print(" bcs: %d+%d invoked: %d", top->count_inline_bcs(), callee_method->code_size(), callee_method->interpreter_invocation_count());
+ }
+ tty->cr();
+}
+#endif
+
+//------------------------------ok_to_inline-----------------------------------
+WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) {
+ assert(callee_method != NULL, "caller checks for optimized virtual!");
+#ifdef ASSERT
+ // Make sure the incoming jvms has the same information content as me.
+ // This means that we can eventually make this whole class AllStatic.
+ if (jvms->caller() == NULL) {
+ assert(_caller_jvms == NULL, "redundant instance state");
+ } else {
+ assert(_caller_jvms->same_calls_as(jvms->caller()), "redundant instance state");
+ }
+ assert(_method == jvms->method(), "redundant instance state");
+#endif
+ const char *failure_msg = NULL;
+ int caller_bci = jvms->bci();
+ ciMethod *caller_method = jvms->method();
+
+ if( !pass_initial_checks(caller_method, caller_bci, callee_method)) {
+ if( PrintInlining ) {
+ failure_msg = "failed_initial_checks";
+ print_inlining( callee_method, caller_bci, failure_msg);
+ }
+ return NULL;
+ }
+
+ // Check if inlining policy says no.
+ WarmCallInfo wci = *(initial_wci);
+ failure_msg = try_to_inline(callee_method, caller_bci, profile, &wci);
+ if (failure_msg != NULL && C->log() != NULL) {
+ C->log()->begin_elem("inline_fail reason='");
+ C->log()->text("%s", failure_msg);
+ C->log()->end_elem("'");
+ }
+
+#ifndef PRODUCT
+ if (UseOldInlining && InlineWarmCalls
+ && (PrintOpto || PrintOptoInlining || PrintInlining)) {
+ bool cold = wci.is_cold();
+ bool hot = !cold && wci.is_hot();
+ bool old_cold = (failure_msg != NULL);
+ if (old_cold != cold || (Verbose || WizardMode)) {
+ tty->print(" OldInlining= %4s : %s\n WCI=",
+ old_cold ? "cold" : "hot", failure_msg ? failure_msg : "OK");
+ wci.print();
+ }
+ }
+#endif
+ if (UseOldInlining) {
+ if (failure_msg == NULL)
+ wci = *(WarmCallInfo::always_hot());
+ else
+ wci = *(WarmCallInfo::always_cold());
+ }
+ if (!InlineWarmCalls) {
+ if (!wci.is_cold() && !wci.is_hot()) {
+ // Do not inline the warm calls.
+ wci = *(WarmCallInfo::always_cold());
+ }
+ }
+
+ if (!wci.is_cold()) {
+ // In -UseOldInlining, the failure_msg may also be a success message.
+ if (failure_msg == NULL) failure_msg = "inline (hot)";
+
+ // Inline!
+ if( PrintInlining ) print_inlining( callee_method, caller_bci, failure_msg);
+ if (UseOldInlining)
+ build_inline_tree_for_callee(callee_method, jvms, caller_bci);
+ if (InlineWarmCalls && !wci.is_hot())
+ return new (C) WarmCallInfo(wci); // copy to heap
+ return WarmCallInfo::always_hot();
+ }
+
+ // Do not inline
+ if (failure_msg == NULL) failure_msg = "too cold to inline";
+ if( PrintInlining ) print_inlining( callee_method, caller_bci, failure_msg);
+ return NULL;
+}
+
+//------------------------------compute_callee_frequency-----------------------
+float InlineTree::compute_callee_frequency( int caller_bci ) const {
+ int count = method()->interpreter_call_site_count(caller_bci);
+ int invcnt = method()->interpreter_invocation_count();
+ float freq = (float)count/(float)invcnt;
+ // Call-site count / interpreter invocation count, scaled recursively.
+ // Always between 0.0 and 1.0. Represents the percentage of the method's
+ // total execution time used at this call site.
+
+ return freq;
+}
+
+//------------------------------build_inline_tree_for_callee-------------------
+InlineTree *InlineTree::build_inline_tree_for_callee( ciMethod* callee_method, JVMState* caller_jvms, int caller_bci) {
+ float recur_frequency = _site_invoke_ratio * compute_callee_frequency(caller_bci);
+ // Attempt inlining.
+ InlineTree* old_ilt = callee_at(caller_bci, callee_method);
+ if (old_ilt != NULL) {
+ return old_ilt;
+ }
+ InlineTree *ilt = new InlineTree( C, this, callee_method, caller_jvms, caller_bci, recur_frequency );
+ _subtrees.append( ilt );
+
+ NOT_PRODUCT( _count_inlines += 1; )
+
+ return ilt;
+}
+
+
+//---------------------------------------callee_at-----------------------------
+InlineTree *InlineTree::callee_at(int bci, ciMethod* callee) const {
+ for (int i = 0; i < _subtrees.length(); i++) {
+ InlineTree* sub = _subtrees.at(i);
+ if (sub->caller_bci() == bci && callee == sub->method()) {
+ return sub;
+ }
+ }
+ return NULL;
+}
+
+
+//------------------------------build_inline_tree_root-------------------------
+InlineTree *InlineTree::build_inline_tree_root() {
+ Compile* C = Compile::current();
+
+ // Root of inline tree
+ InlineTree *ilt = new InlineTree(C, NULL, C->method(), NULL, -1, 1.0F);
+
+ return ilt;
+}
+
+
+//-------------------------find_subtree_from_root-----------------------------
+// Given a jvms, which determines a call chain from the root method,
+// find the corresponding inline tree.
+// Note: This method will be removed or replaced as InlineTree goes away.
+InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee, bool create_if_not_found) {
+ InlineTree* iltp = root;
+ uint depth = jvms && jvms->has_method() ? jvms->depth() : 0;
+ for (uint d = 1; d <= depth; d++) {
+ JVMState* jvmsp = jvms->of_depth(d);
+ // Select the corresponding subtree for this bci.
+ assert(jvmsp->method() == iltp->method(), "tree still in sync");
+ ciMethod* d_callee = (d == depth) ? callee : jvms->of_depth(d+1)->method();
+ InlineTree* sub = iltp->callee_at(jvmsp->bci(), d_callee);
+ if (!sub) {
+ if (create_if_not_found && d == depth) {
+ return iltp->build_inline_tree_for_callee(d_callee, jvmsp, jvmsp->bci());
+ }
+ assert(sub != NULL, "should be a sub-ilt here");
+ return NULL;
+ }
+ iltp = sub;
+ }
+ return iltp;
+}
+
+// ----------------------------------------------------------------------------
+#ifndef PRODUCT
+
+static void per_method_stats() {
+ // Compute difference between this method's cumulative totals and old totals
+ int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
+ int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
+
+ // Print differences
+ if( explicit_null_checks_cur )
+ tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
+ if( elided_null_checks_cur )
+ tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
+
+ // Store the current cumulative totals
+ nodes_created_old = nodes_created;
+ methods_parsed_old = methods_parsed;
+ methods_seen_old = methods_seen;
+ explicit_null_checks_inserted_old = explicit_null_checks_inserted;
+ explicit_null_checks_elided_old = explicit_null_checks_elided;
+}
+
+#endif
diff --git a/src/share/vm/opto/c2_globals.cpp b/src/share/vm/opto/c2_globals.cpp
new file mode 100644
index 000000000..5715b24ba
--- /dev/null
+++ b/src/share/vm/opto/c2_globals.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2000-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_c2_globals.cpp.incl"
+
+C2_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp
new file mode 100644
index 000000000..360300255
--- /dev/null
+++ b/src/share/vm/opto/c2_globals.hpp
@@ -0,0 +1,382 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Defines all globals flags used by the server compiler.
+//
+
+#define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
+ \
+ notproduct(intx, CompileZapFirst, 0, \
+ "If +ZapDeadCompiledLocals, " \
+ "skip this many before compiling in zap calls") \
+ \
+ notproduct(intx, CompileZapLast, -1, \
+ "If +ZapDeadCompiledLocals, " \
+ "compile this many after skipping (incl. skip count, -1 = all)") \
+ \
+ notproduct(intx, ZapDeadCompiledLocalsFirst, 0, \
+ "If +ZapDeadCompiledLocals, " \
+ "skip this many before really doing it") \
+ \
+ notproduct(intx, ZapDeadCompiledLocalsLast, -1, \
+ "If +ZapDeadCompiledLocals, " \
+ "do this many after skipping (incl. skip count, -1 = all)") \
+ \
+ develop(intx, OptoPrologueNops, 0, \
+ "Insert this many extra nop instructions " \
+ "in the prologue of every nmethod") \
+ \
+ product_pd(intx, InteriorEntryAlignment, \
+ "Code alignment for interior entry points " \
+ "in generated code (in bytes)") \
+ \
+ product_pd(intx, OptoLoopAlignment, \
+ "Align inner loops to zero relative to this modulus") \
+ \
+ product(intx, MaxLoopPad, (OptoLoopAlignment-1), \
+ "Align a loop if padding size in bytes is less or equal to this value") \
+ \
+ product(intx, NumberOfLoopInstrToAlign, 4, \
+ "Number of first instructions in a loop to align") \
+ \
+ notproduct(intx, IndexSetWatch, 0, \
+ "Trace all operations on this IndexSet (-1 means all, 0 none)") \
+ \
+ develop(intx, OptoNodeListSize, 4, \
+ "Starting allocation size of Node_List data structures") \
+ \
+ develop(intx, OptoBlockListSize, 8, \
+ "Starting allocation size of Block_List data structures") \
+ \
+ develop(intx, OptoPeepholeAt, -1, \
+ "Apply peephole optimizations to this peephole rule") \
+ \
+ notproduct(bool, PrintIdeal, false, \
+ "Print ideal graph before code generation") \
+ \
+ notproduct(bool, PrintOpto, false, \
+ "Print compiler2 attempts") \
+ \
+ notproduct(bool, PrintOptoInlining, false, \
+ "Print compiler2 inlining decisions") \
+ \
+ notproduct(bool, VerifyOpto, false, \
+ "Apply more time consuming verification during compilation") \
+ \
+ notproduct(bool, VerifyOptoOopOffsets, false, \
+ "Check types of base addresses in field references") \
+ \
+ develop(bool, IdealizedNumerics, false, \
+ "Check performance difference allowing FP " \
+ "associativity and commutativity...") \
+ \
+ develop(bool, OptoBreakpoint, false, \
+ "insert breakpoint at method entry") \
+ \
+ notproduct(bool, OptoBreakpointOSR, false, \
+ "insert breakpoint at osr method entry") \
+ \
+ notproduct(intx, BreakAtNode, 0, \
+ "Break at construction of this Node (either _idx or _debug_idx)") \
+ \
+ notproduct(bool, OptoBreakpointC2R, false, \
+ "insert breakpoint at runtime stub entry") \
+ \
+ notproduct(bool, OptoNoExecute, false, \
+ "Attempt to parse and compile but do not execute generated code") \
+ \
+ notproduct(bool, PrintOptoStatistics, false, \
+ "Print New compiler statistics") \
+ \
+ notproduct(bool, PrintOptoAssembly, false, \
+ "Print New compiler assembly output") \
+ \
+ develop_pd(bool, OptoPeephole, \
+ "Apply peephole optimizations after register allocation") \
+ \
+ develop(bool, OptoRemoveUseless, true, \
+ "Remove useless nodes after parsing") \
+ \
+ notproduct(bool, PrintFrameConverterAssembly, false, \
+ "Print New compiler assembly output for frame converters") \
+ \
+ notproduct(bool, PrintParseStatistics, false, \
+ "Print nodes, transforms and new values made per bytecode parsed")\
+ \
+ notproduct(bool, PrintOptoPeephole, false, \
+ "Print New compiler peephole replacements") \
+ \
+ develop(bool, PrintCFGBlockFreq, false, \
+ "Print CFG block freqencies") \
+ \
+ develop(bool, TraceOptoParse, false, \
+ "Trace bytecode parse and control-flow merge") \
+ \
+ product_pd(intx, LoopUnrollLimit, \
+ "Unroll loop bodies with node count less than this") \
+ \
+ product(intx, LoopUnrollMin, 4, \
+ "Minimum number of unroll loop bodies before checking progress" \
+ "of rounds of unroll,optimize,..") \
+ \
+ develop(intx, UnrollLimitForProfileCheck, 1, \
+ "Don't use profile_trip_cnt() to restrict unrolling until " \
+ "unrolling would push the number of unrolled iterations above " \
+ "UnrollLimitForProfileCheck. A higher value allows more " \
+ "unrolling. Zero acts as a very large value." ) \
+ \
+ product(intx, MultiArrayExpandLimit, 6, \
+ "Maximum number of individual allocations in an inline-expanded " \
+ "multianewarray instruction") \
+ \
+ notproduct(bool, TraceProfileTripCount, false, \
+ "Trace profile loop trip count information") \
+ \
+ develop(bool, OptoCoalesce, true, \
+ "Use Conservative Copy Coalescing in the Register Allocator") \
+ \
+ develop(bool, UseUniqueSubclasses, true, \
+ "Narrow an abstract reference to the unique concrete subclass") \
+ \
+ develop(bool, UseExactTypes, true, \
+ "Use exact types to eliminate array store checks and v-calls") \
+ \
+ product(intx, TrackedInitializationLimit, 50, \
+ "When initializing fields, track up to this many words") \
+ \
+ product(bool, ReduceFieldZeroing, true, \
+ "When initializing fields, try to avoid needless zeroing") \
+ \
+ product(bool, ReduceInitialCardMarks, true, \
+ "When initializing fields, try to avoid needless card marks") \
+ \
+ product(bool, ReduceBulkZeroing, true, \
+ "When bulk-initializing, try to avoid needless zeroing") \
+ \
+ develop_pd(intx, RegisterCostAreaRatio, \
+ "Spill selection in reg allocator: scale area by (X/64K) before " \
+ "adding cost") \
+ \
+ develop_pd(bool, UseCISCSpill, \
+ "Use ADLC supplied cisc instructions during allocation") \
+ \
+ notproduct(bool, VerifyGraphEdges , false, \
+ "Verify Bi-directional Edges") \
+ \
+ notproduct(bool, VerifyDUIterators, true, \
+ "Verify the safety of all iterations of Bi-directional Edges") \
+ \
+ notproduct(bool, VerifyHashTableKeys, true, \
+ "Verify the immutability of keys in the VN hash tables") \
+ \
+ develop_pd(intx, FLOATPRESSURE, \
+ "Number of float LRG's that constitute high register pressure") \
+ \
+ develop_pd(intx, INTPRESSURE, \
+ "Number of integer LRG's that constitute high register pressure") \
+ \
+ notproduct(bool, TraceOptoPipelining, false, \
+ "Trace pipelining information") \
+ \
+ notproduct(bool, TraceOptoOutput, false, \
+ "Trace pipelining information") \
+ \
+ product_pd(bool, OptoScheduling, \
+ "Instruction Scheduling after register allocation") \
+ \
+ product(bool, PartialPeelLoop, true, \
+ "Partial peel (rotate) loops") \
+ \
+ product(intx, PartialPeelNewPhiDelta, 0, \
+ "Additional phis that can be created by partial peeling") \
+ \
+ notproduct(bool, TracePartialPeeling, false, \
+ "Trace partial peeling (loop rotation) information") \
+ \
+ product(bool, PartialPeelAtUnsignedTests, true, \
+ "Partial peel at unsigned tests if no signed test exists") \
+ \
+ product(bool, ReassociateInvariants, true, \
+ "Enable reassociation of expressions with loop invariants.") \
+ \
+ product(bool, LoopUnswitching, true, \
+ "Enable loop unswitching (a form of invariant test hoisting)") \
+ \
+ notproduct(bool, TraceLoopUnswitching, false, \
+ "Trace loop unswitching") \
+ \
+ product(bool, UseSuperWord, true, \
+ "Transform scalar operations into superword operations") \
+ \
+ develop(bool, SuperWordRTDepCheck, false, \
+ "Enable runtime dependency checks.") \
+ \
+ product(bool, TraceSuperWord, false, \
+ "Trace superword transforms") \
+ \
+ product_pd(bool, OptoBundling, \
+ "Generate nops to fill i-cache lines") \
+ \
+ product_pd(intx, ConditionalMoveLimit, \
+ "Limit of ops to make speculative when using CMOVE") \
+ \
+ /* Set BranchOnRegister == false. See 4965987. */ \
+ product(bool, BranchOnRegister, false, \
+ "Use Sparc V9 branch-on-register opcodes") \
+ \
+ develop(bool, SparcV9RegsHiBitsZero, true, \
+ "Assume Sparc V9 I&L registers on V8+ systems are zero-extended") \
+ \
+ develop(intx, PrintIdealGraphLevel, 0, \
+ "Print ideal graph to XML file / network interface. " \
+ "By default attempts to connect to the visualizer on a socket.") \
+ \
+ develop(intx, PrintIdealGraphPort, 4444, \
+ "Ideal graph printer to network port") \
+ \
+ develop(ccstr, PrintIdealGraphAddress, "127.0.0.1", \
+ "IP address to connect to visualizer") \
+ \
+ develop(ccstr, PrintIdealGraphFile, NULL, \
+ "File to dump ideal graph to. If set overrides the " \
+ "use of the network") \
+ \
+ product(bool, UseOldInlining, true, \
+ "Enable the 1.3 inlining strategy") \
+ \
+ product(bool, UseBimorphicInlining, true, \
+ "Profiling based inlining for two receivers") \
+ \
+ product(bool, UseOnlyInlinedBimorphic, true, \
+ "Don't use BimorphicInlining if can't inline a second method") \
+ \
+ product(bool, InsertMemBarAfterArraycopy, true, \
+ "Insert memory barrier after arraycopy call") \
+ \
+ /* controls for tier 1 compilations */ \
+ \
+ develop(bool, Tier1CountInvocations, true, \
+ "Generate code, during tier 1, to update invocation counter") \
+ \
+ product(intx, Tier1Inline, false, \
+ "enable inlining during tier 1") \
+ \
+ product(intx, Tier1MaxInlineSize, 8, \
+ "maximum bytecode size of a method to be inlined, during tier 1") \
+ \
+ product(intx, Tier1FreqInlineSize, 35, \
+ "max bytecode size of a frequent method to be inlined, tier 1") \
+ \
+ develop(intx, ImplicitNullCheckThreshold, 3, \
+ "Don't do implicit null checks if NPE's in a method exceeds limit") \
+ \
+ /* controls for loop optimization */ \
+ product(intx, Tier1LoopOptsCount, 0, \
+ "Set level of loop optimization for tier 1 compiles") \
+ \
+ product(intx, LoopOptsCount, 43, \
+ "Set level of loop optimization for tier 1 compiles") \
+ \
+ /* controls for heat-based inlining */ \
+ \
+ develop(intx, NodeCountInliningCutoff, 18000, \
+ "If parser node generation exceeds limit stop inlining") \
+ \
+ develop(intx, NodeCountInliningStep, 1000, \
+ "Target size of warm calls inlined between optimization passes") \
+ \
+ develop(bool, InlineWarmCalls, false, \
+ "Use a heat-based priority queue to govern inlining") \
+ \
+ develop(intx, HotCallCountThreshold, 999999, \
+ "large numbers of calls (per method invocation) force hotness") \
+ \
+ develop(intx, HotCallProfitThreshold, 999999, \
+ "highly profitable inlining opportunities force hotness") \
+ \
+ develop(intx, HotCallTrivialWork, -1, \
+ "trivial execution time (no larger than this) forces hotness") \
+ \
+ develop(intx, HotCallTrivialSize, -1, \
+ "trivial methods (no larger than this) force calls to be hot") \
+ \
+ develop(intx, WarmCallMinCount, -1, \
+ "number of calls (per method invocation) to enable inlining") \
+ \
+ develop(intx, WarmCallMinProfit, -1, \
+ "number of calls (per method invocation) to enable inlining") \
+ \
+ develop(intx, WarmCallMaxWork, 999999, \
+ "execution time of the largest inlinable method") \
+ \
+ develop(intx, WarmCallMaxSize, 999999, \
+ "size of the largest inlinable method") \
+ \
+ product(intx, MaxNodeLimit, 65000, \
+ "Maximum number of nodes") \
+ \
+ product(intx, NodeLimitFudgeFactor, 1000, \
+ "Fudge Factor for certain optimizations") \
+ \
+ product(bool, UseJumpTables, true, \
+ "Use JumpTables instead of a binary search tree for switches") \
+ \
+ product(bool, UseDivMod, true, \
+ "Use combined DivMod instruction if available") \
+ \
+ product(intx, MinJumpTableSize, 18, \
+ "Minimum number of targets in a generated jump table") \
+ \
+ product(intx, MaxJumpTableSize, 65000, \
+ "Maximum number of targets in a generated jump table") \
+ \
+ product(intx, MaxJumpTableSparseness, 5, \
+ "Maximum sparseness for jumptables") \
+ \
+ product(bool, EliminateLocks, true, \
+ "Coarsen locks when possible") \
+ \
+ notproduct(bool, PrintLockStatistics, false, \
+ "Print precise statistics on the dynamic lock usage") \
+ \
+ diagnostic(bool, PrintPreciseBiasedLockingStatistics, false, \
+ "Print per-lock-site statistics of biased locking in JVM") \
+ \
+ notproduct(bool, PrintEliminateLocks, false, \
+ "Print out when locks are eliminated") \
+ \
+ product(bool, DoEscapeAnalysis, false, \
+ "Perform escape analysis") \
+ \
+ notproduct(bool, PrintEscapeAnalysis, false, \
+ "Print the results of escape analysis") \
+ \
+ product(bool, EliminateAllocations, true, \
+ "Use escape analysis to eliminate allocations") \
+ \
+ product(intx, MaxLabelRootDepth, 1100, \
+ "Maximum times call Label_Root to prevent stack overflow") \
+
+C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
diff --git a/src/share/vm/opto/c2compiler.cpp b/src/share/vm/opto/c2compiler.cpp
new file mode 100644
index 000000000..6543e692b
--- /dev/null
+++ b/src/share/vm/opto/c2compiler.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_c2compiler.cpp.incl"
+
+
+volatile int C2Compiler::_runtimes = uninitialized;
+
+// register information defined by ADLC
+extern const char register_save_policy[];
+extern const int register_save_type[];
+
+const char* C2Compiler::retry_no_subsuming_loads() {
+ return "retry without subsuming loads";
+}
+void C2Compiler::initialize_runtime() {
+
+ // Check assumptions used while running ADLC
+ Compile::adlc_verification();
+ assert(REG_COUNT <= ConcreteRegisterImpl::number_of_registers, "incompatible register counts");
+
+ for (int i = 0; i < ConcreteRegisterImpl::number_of_registers ; i++ ) {
+ OptoReg::vm2opto[i] = OptoReg::Bad;
+ }
+
+ for( OptoReg::Name i=OptoReg::Name(0); i<OptoReg::Name(REG_COUNT); i = OptoReg::add(i,1) ) {
+ VMReg r = OptoReg::as_VMReg(i);
+ if (r->is_valid()) {
+ OptoReg::vm2opto[r->value()] = i;
+ }
+ }
+
+ // Check that runtime and architecture description agree on callee-saved-floats
+ bool callee_saved_floats = false;
+ for( OptoReg::Name i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
+ // Is there a callee-saved float or double?
+ if( register_save_policy[i] == 'E' /* callee-saved */ &&
+ (register_save_type[i] == Op_RegF || register_save_type[i] == Op_RegD) ) {
+ callee_saved_floats = true;
+ }
+ }
+
+ DEBUG_ONLY( Node::init_NodeProperty(); )
+
+ Compile::pd_compiler2_init();
+
+ CompilerThread* thread = CompilerThread::current();
+
+ HandleMark handle_mark(thread);
+
+ OptoRuntime::generate(thread->env());
+
+}
+
+
+void C2Compiler::initialize() {
+
+ // This method can only be called once per C2Compiler object
+ // The first compiler thread that gets here will initialize the
+ // small amount of global state (and runtime stubs) that c2 needs.
+
+ // There is a race possible once at startup and then we're fine
+
+ // Note that this is being called from a compiler thread not the
+ // main startup thread.
+
+ if (_runtimes != initialized) {
+ initialize_runtimes( initialize_runtime, &_runtimes);
+ }
+
+ // Mark this compiler object as ready to roll
+ mark_initialized();
+}
+
+void C2Compiler::compile_method(ciEnv* env,
+ ciMethod* target,
+ int entry_bci) {
+ if (!is_initialized()) {
+ initialize();
+ }
+ bool subsume_loads = true;
+ while (!env->failing()) {
+ // Attempt to compile while subsuming loads into machine instructions.
+ Compile C(env, this, target, entry_bci, subsume_loads);
+
+ // Check result and retry if appropriate.
+ if (C.failure_reason() != NULL) {
+ if (C.failure_reason_is(retry_no_subsuming_loads())) {
+ assert(subsume_loads, "must make progress");
+ subsume_loads = false;
+ continue; // retry
+ }
+ // Pass any other failure reason up to the ciEnv.
+ // Note that serious, irreversible failures are already logged
+ // on the ciEnv via env->record_method_not_compilable().
+ env->record_failure(C.failure_reason());
+ }
+
+ // No retry; just break the loop.
+ break;
+ }
+}
+
+
+void C2Compiler::print_timers() {
+ // do nothing
+}
diff --git a/src/share/vm/opto/c2compiler.hpp b/src/share/vm/opto/c2compiler.hpp
new file mode 100644
index 000000000..dc5851082
--- /dev/null
+++ b/src/share/vm/opto/c2compiler.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class C2Compiler : public AbstractCompiler {
+private:
+
+ static void initialize_runtime();
+
+public:
+ // Name
+ const char *name() { return "C2"; }
+
+ static volatile int _runtimes;
+
+#ifdef TIERED
+ virtual bool is_c2() { return true; };
+#endif // TIERED
+
+ // Customization
+ bool needs_adapters () { return true; }
+ bool needs_stubs () { return true; }
+
+ void initialize();
+
+ // Compilation entry point for methods
+ void compile_method(ciEnv* env,
+ ciMethod* target,
+ int entry_bci);
+
+ // sentinel value used to trigger backtracking in compile_method().
+ static const char* retry_no_subsuming_loads();
+
+ // Print compilation timers and statistics
+ void print_timers();
+};
diff --git a/src/share/vm/opto/callGenerator.cpp b/src/share/vm/opto/callGenerator.cpp
new file mode 100644
index 000000000..3131cf6b3
--- /dev/null
+++ b/src/share/vm/opto/callGenerator.cpp
@@ -0,0 +1,744 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_callGenerator.cpp.incl"
+
+CallGenerator::CallGenerator(ciMethod* method) {
+ _method = method;
+}
+
+// Utility function.
+const TypeFunc* CallGenerator::tf() const {
+ return TypeFunc::make(method());
+}
+
+//-----------------------------ParseGenerator---------------------------------
+// Internal class which handles all direct bytecode traversal.
+class ParseGenerator : public InlineCallGenerator {
+private:
+ bool _is_osr;
+ float _expected_uses;
+
+public:
+ ParseGenerator(ciMethod* method, float expected_uses, bool is_osr = false)
+ : InlineCallGenerator(method)
+ {
+ _is_osr = is_osr;
+ _expected_uses = expected_uses;
+ assert(can_parse(method, is_osr), "parse must be possible");
+ }
+
+ // Can we build either an OSR or a regular parser for this method?
+ static bool can_parse(ciMethod* method, int is_osr = false);
+
+ virtual bool is_parse() const { return true; }
+ virtual JVMState* generate(JVMState* jvms);
+ int is_osr() { return _is_osr; }
+
+};
+
+JVMState* ParseGenerator::generate(JVMState* jvms) {
+ Compile* C = Compile::current();
+
+ if (is_osr()) {
+ // The JVMS for a OSR has a single argument (see its TypeFunc).
+ assert(jvms->depth() == 1, "no inline OSR");
+ }
+
+ if (C->failing()) {
+ return NULL; // bailing out of the compile; do not try to parse
+ }
+
+ Parse parser(jvms, method(), _expected_uses);
+ // Grab signature for matching/allocation
+#ifdef ASSERT
+ if (parser.tf() != (parser.depth() == 1 ? C->tf() : tf())) {
+ MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
+ assert(C->env()->system_dictionary_modification_counter_changed(),
+ "Must invalidate if TypeFuncs differ");
+ }
+#endif
+
+ GraphKit& exits = parser.exits();
+
+ if (C->failing()) {
+ while (exits.pop_exception_state() != NULL) ;
+ return NULL;
+ }
+
+ assert(exits.jvms()->same_calls_as(jvms), "sanity");
+
+ // Simply return the exit state of the parser,
+ // augmented by any exceptional states.
+ return exits.transfer_exceptions_into_jvms();
+}
+
+//---------------------------DirectCallGenerator------------------------------
+// Internal class which handles all out-of-line calls w/o receiver type checks.
+class DirectCallGenerator : public CallGenerator {
+public:
+ DirectCallGenerator(ciMethod* method)
+ : CallGenerator(method)
+ {
+ }
+ virtual JVMState* generate(JVMState* jvms);
+};
+
+JVMState* DirectCallGenerator::generate(JVMState* jvms) {
+ GraphKit kit(jvms);
+ bool is_static = method()->is_static();
+ address target = is_static ? SharedRuntime::get_resolve_static_call_stub()
+ : SharedRuntime::get_resolve_opt_virtual_call_stub();
+
+ if (kit.C->log() != NULL) {
+ kit.C->log()->elem("direct_call bci='%d'", jvms->bci());
+ }
+
+ CallStaticJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), target, method(), kit.bci());
+ if (!is_static) {
+ // Make an explicit receiver null_check as part of this call.
+ // Since we share a map with the caller, his JVMS gets adjusted.
+ kit.null_check_receiver(method());
+ if (kit.stopped()) {
+ // And dump it back to the caller, decorated with any exceptions:
+ return kit.transfer_exceptions_into_jvms();
+ }
+ // Mark the call node as virtual, sort of:
+ call->set_optimized_virtual(true);
+ }
+ kit.set_arguments_for_java_call(call);
+ kit.set_edges_for_java_call(call);
+ Node* ret = kit.set_results_for_java_call(call);
+ kit.push_node(method()->return_type()->basic_type(), ret);
+ return kit.transfer_exceptions_into_jvms();
+}
+
+class VirtualCallGenerator : public CallGenerator {
+private:
+ int _vtable_index;
+public:
+ VirtualCallGenerator(ciMethod* method, int vtable_index)
+ : CallGenerator(method), _vtable_index(vtable_index)
+ {
+ assert(vtable_index == methodOopDesc::invalid_vtable_index ||
+ vtable_index >= 0, "either invalid or usable");
+ }
+ virtual bool is_virtual() const { return true; }
+ virtual JVMState* generate(JVMState* jvms);
+};
+
+//--------------------------VirtualCallGenerator------------------------------
+// Internal class which handles all out-of-line calls checking receiver type.
+JVMState* VirtualCallGenerator::generate(JVMState* jvms) {
+ GraphKit kit(jvms);
+ Node* receiver = kit.argument(0);
+
+ if (kit.C->log() != NULL) {
+ kit.C->log()->elem("virtual_call bci='%d'", jvms->bci());
+ }
+
+ // If the receiver is a constant null, do not torture the system
+ // by attempting to call through it. The compile will proceed
+ // correctly, but may bail out in final_graph_reshaping, because
+ // the call instruction will have a seemingly deficient out-count.
+ // (The bailout says something misleading about an "infinite loop".)
+ if (kit.gvn().type(receiver)->higher_equal(TypePtr::NULL_PTR)) {
+ kit.inc_sp(method()->arg_size()); // restore arguments
+ kit.uncommon_trap(Deoptimization::Reason_null_check,
+ Deoptimization::Action_none,
+ NULL, "null receiver");
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ // Ideally we would unconditionally do a null check here and let it
+ // be converted to an implicit check based on profile information.
+ // However currently the conversion to implicit null checks in
+ // Block::implicit_null_check() only looks for loads and stores, not calls.
+ ciMethod *caller = kit.method();
+ ciMethodData *caller_md = (caller == NULL) ? NULL : caller->method_data();
+ if (!UseInlineCaches || !ImplicitNullChecks ||
+ ((ImplicitNullCheckThreshold > 0) && caller_md &&
+ (caller_md->trap_count(Deoptimization::Reason_null_check)
+ >= (uint)ImplicitNullCheckThreshold))) {
+ // Make an explicit receiver null_check as part of this call.
+ // Since we share a map with the caller, his JVMS gets adjusted.
+ receiver = kit.null_check_receiver(method());
+ if (kit.stopped()) {
+ // And dump it back to the caller, decorated with any exceptions:
+ return kit.transfer_exceptions_into_jvms();
+ }
+ }
+
+ assert(!method()->is_static(), "virtual call must not be to static");
+ assert(!method()->is_final(), "virtual call should not be to final");
+ assert(!method()->is_private(), "virtual call should not be to private");
+ assert(_vtable_index == methodOopDesc::invalid_vtable_index || !UseInlineCaches,
+ "no vtable calls if +UseInlineCaches ");
+ address target = SharedRuntime::get_resolve_virtual_call_stub();
+ // Normal inline cache used for call
+ CallDynamicJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallDynamicJavaNode(tf(), target, method(), _vtable_index, kit.bci());
+ kit.set_arguments_for_java_call(call);
+ kit.set_edges_for_java_call(call);
+ Node* ret = kit.set_results_for_java_call(call);
+ kit.push_node(method()->return_type()->basic_type(), ret);
+
+ // Represent the effect of an implicit receiver null_check
+ // as part of this call. Since we share a map with the caller,
+ // his JVMS gets adjusted.
+ kit.cast_not_null(receiver);
+ return kit.transfer_exceptions_into_jvms();
+}
+
+bool ParseGenerator::can_parse(ciMethod* m, int entry_bci) {
+ // Certain methods cannot be parsed at all:
+ if (!m->can_be_compiled()) return false;
+ if (!m->has_balanced_monitors()) return false;
+ if (m->get_flow_analysis()->failing()) return false;
+
+ // (Methods may bail out for other reasons, after the parser is run.
+ // We try to avoid this, but if forced, we must return (Node*)NULL.
+ // The user of the CallGenerator must check for this condition.)
+ return true;
+}
+
+CallGenerator* CallGenerator::for_inline(ciMethod* m, float expected_uses) {
+ if (!ParseGenerator::can_parse(m)) return NULL;
+ return new ParseGenerator(m, expected_uses);
+}
+
+// As a special case, the JVMS passed to this CallGenerator is
+// for the method execution already in progress, not just the JVMS
+// of the caller. Thus, this CallGenerator cannot be mixed with others!
+CallGenerator* CallGenerator::for_osr(ciMethod* m, int osr_bci) {
+ if (!ParseGenerator::can_parse(m, true)) return NULL;
+ float past_uses = m->interpreter_invocation_count();
+ float expected_uses = past_uses;
+ return new ParseGenerator(m, expected_uses, true);
+}
+
+CallGenerator* CallGenerator::for_direct_call(ciMethod* m) {
+ assert(!m->is_abstract(), "for_direct_call mismatch");
+ return new DirectCallGenerator(m);
+}
+
+CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) {
+ assert(!m->is_static(), "for_virtual_call mismatch");
+ return new VirtualCallGenerator(m, vtable_index);
+}
+
+
+//---------------------------WarmCallGenerator--------------------------------
+// Internal class which handles initial deferral of inlining decisions.
+class WarmCallGenerator : public CallGenerator {
+ WarmCallInfo* _call_info;
+ CallGenerator* _if_cold;
+ CallGenerator* _if_hot;
+ bool _is_virtual; // caches virtuality of if_cold
+ bool _is_inline; // caches inline-ness of if_hot
+
+public:
+ WarmCallGenerator(WarmCallInfo* ci,
+ CallGenerator* if_cold,
+ CallGenerator* if_hot)
+ : CallGenerator(if_cold->method())
+ {
+ assert(method() == if_hot->method(), "consistent choices");
+ _call_info = ci;
+ _if_cold = if_cold;
+ _if_hot = if_hot;
+ _is_virtual = if_cold->is_virtual();
+ _is_inline = if_hot->is_inline();
+ }
+
+ virtual bool is_inline() const { return _is_inline; }
+ virtual bool is_virtual() const { return _is_virtual; }
+ virtual bool is_deferred() const { return true; }
+
+ virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_warm_call(WarmCallInfo* ci,
+ CallGenerator* if_cold,
+ CallGenerator* if_hot) {
+ return new WarmCallGenerator(ci, if_cold, if_hot);
+}
+
+JVMState* WarmCallGenerator::generate(JVMState* jvms) {
+ Compile* C = Compile::current();
+ if (C->log() != NULL) {
+ C->log()->elem("warm_call bci='%d'", jvms->bci());
+ }
+ jvms = _if_cold->generate(jvms);
+ if (jvms != NULL) {
+ Node* m = jvms->map()->control();
+ if (m->is_CatchProj()) m = m->in(0); else m = C->top();
+ if (m->is_Catch()) m = m->in(0); else m = C->top();
+ if (m->is_Proj()) m = m->in(0); else m = C->top();
+ if (m->is_CallJava()) {
+ _call_info->set_call(m->as_Call());
+ _call_info->set_hot_cg(_if_hot);
+#ifndef PRODUCT
+ if (PrintOpto || PrintOptoInlining) {
+ tty->print_cr("Queueing for warm inlining at bci %d:", jvms->bci());
+ tty->print("WCI: ");
+ _call_info->print();
+ }
+#endif
+ _call_info->set_heat(_call_info->compute_heat());
+ C->set_warm_calls(_call_info->insert_into(C->warm_calls()));
+ }
+ }
+ return jvms;
+}
+
+void WarmCallInfo::make_hot() {
+ Compile* C = Compile::current();
+ // Replace the callnode with something better.
+ CallJavaNode* call = this->call()->as_CallJava();
+ ciMethod* method = call->method();
+ int nargs = method->arg_size();
+ JVMState* jvms = call->jvms()->clone_shallow(C);
+ uint size = TypeFunc::Parms + MAX2(2, nargs);
+ SafePointNode* map = new (C, size) SafePointNode(size, jvms);
+ for (uint i1 = 0; i1 < (uint)(TypeFunc::Parms + nargs); i1++) {
+ map->init_req(i1, call->in(i1));
+ }
+ jvms->set_map(map);
+ jvms->set_offsets(map->req());
+ jvms->set_locoff(TypeFunc::Parms);
+ jvms->set_stkoff(TypeFunc::Parms);
+ GraphKit kit(jvms);
+
+ JVMState* new_jvms = _hot_cg->generate(kit.jvms());
+ if (new_jvms == NULL) return; // no change
+ if (C->failing()) return;
+
+ kit.set_jvms(new_jvms);
+ Node* res = C->top();
+ int res_size = method->return_type()->size();
+ if (res_size != 0) {
+ kit.inc_sp(-res_size);
+ res = kit.argument(0);
+ }
+ GraphKit ekit(kit.combine_and_pop_all_exception_states()->jvms());
+
+ // Replace the call:
+ for (DUIterator i = call->outs(); call->has_out(i); i++) {
+ Node* n = call->out(i);
+ Node* nn = NULL; // replacement
+ if (n->is_Proj()) {
+ ProjNode* nproj = n->as_Proj();
+ assert(nproj->_con < (uint)(TypeFunc::Parms + (res_size ? 1 : 0)), "sane proj");
+ if (nproj->_con == TypeFunc::Parms) {
+ nn = res;
+ } else {
+ nn = kit.map()->in(nproj->_con);
+ }
+ if (nproj->_con == TypeFunc::I_O) {
+ for (DUIterator j = nproj->outs(); nproj->has_out(j); j++) {
+ Node* e = nproj->out(j);
+ if (e->Opcode() == Op_CreateEx) {
+ e->replace_by(ekit.argument(0));
+ } else if (e->Opcode() == Op_Catch) {
+ for (DUIterator k = e->outs(); e->has_out(k); k++) {
+ CatchProjNode* p = e->out(j)->as_CatchProj();
+ if (p->is_handler_proj()) {
+ p->replace_by(ekit.control());
+ } else {
+ p->replace_by(kit.control());
+ }
+ }
+ }
+ }
+ }
+ }
+ NOT_PRODUCT(if (!nn) n->dump(2));
+ assert(nn != NULL, "don't know what to do with this user");
+ n->replace_by(nn);
+ }
+}
+
+void WarmCallInfo::make_cold() {
+ // No action: Just dequeue.
+}
+
+
+//------------------------PredictedCallGenerator------------------------------
+// Internal class which handles all out-of-line calls checking receiver type.
+class PredictedCallGenerator : public CallGenerator {
+ ciKlass* _predicted_receiver;
+ CallGenerator* _if_missed;
+ CallGenerator* _if_hit;
+ float _hit_prob;
+
+public:
+ PredictedCallGenerator(ciKlass* predicted_receiver,
+ CallGenerator* if_missed,
+ CallGenerator* if_hit, float hit_prob)
+ : CallGenerator(if_missed->method())
+ {
+ // The call profile data may predict the hit_prob as extreme as 0 or 1.
+ // Remove the extremes values from the range.
+ if (hit_prob > PROB_MAX) hit_prob = PROB_MAX;
+ if (hit_prob < PROB_MIN) hit_prob = PROB_MIN;
+
+ _predicted_receiver = predicted_receiver;
+ _if_missed = if_missed;
+ _if_hit = if_hit;
+ _hit_prob = hit_prob;
+ }
+
+ virtual bool is_virtual() const { return true; }
+ virtual bool is_inline() const { return _if_hit->is_inline(); }
+ virtual bool is_deferred() const { return _if_hit->is_deferred(); }
+
+ virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_predicted_call(ciKlass* predicted_receiver,
+ CallGenerator* if_missed,
+ CallGenerator* if_hit,
+ float hit_prob) {
+ return new PredictedCallGenerator(predicted_receiver, if_missed, if_hit, hit_prob);
+}
+
+
+JVMState* PredictedCallGenerator::generate(JVMState* jvms) {
+ GraphKit kit(jvms);
+ PhaseGVN& gvn = kit.gvn();
+ // We need an explicit receiver null_check before checking its type.
+ // We share a map with the caller, so his JVMS gets adjusted.
+ Node* receiver = kit.argument(0);
+
+ CompileLog* log = kit.C->log();
+ if (log != NULL) {
+ log->elem("predicted_call bci='%d' klass='%d'",
+ jvms->bci(), log->identify(_predicted_receiver));
+ }
+
+ receiver = kit.null_check_receiver(method());
+ if (kit.stopped()) {
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ Node* exact_receiver = receiver; // will get updated in place...
+ Node* slow_ctl = kit.type_check_receiver(receiver,
+ _predicted_receiver, _hit_prob,
+ &exact_receiver);
+
+ SafePointNode* slow_map = NULL;
+ JVMState* slow_jvms;
+ { PreserveJVMState pjvms(&kit);
+ kit.set_control(slow_ctl);
+ if (!kit.stopped()) {
+ slow_jvms = _if_missed->generate(kit.sync_jvms());
+ assert(slow_jvms != NULL, "miss path must not fail to generate");
+ kit.add_exception_states_from(slow_jvms);
+ kit.set_map(slow_jvms->map());
+ if (!kit.stopped())
+ slow_map = kit.stop();
+ }
+ }
+
+ // fall through if the instance exactly matches the desired type
+ kit.replace_in_map(receiver, exact_receiver);
+
+ // Make the hot call:
+ JVMState* new_jvms = _if_hit->generate(kit.sync_jvms());
+ if (new_jvms == NULL) {
+ // Inline failed, so make a direct call.
+ assert(_if_hit->is_inline(), "must have been a failed inline");
+ CallGenerator* cg = CallGenerator::for_direct_call(_if_hit->method());
+ new_jvms = cg->generate(kit.sync_jvms());
+ }
+ kit.add_exception_states_from(new_jvms);
+ kit.set_jvms(new_jvms);
+
+ // Need to merge slow and fast?
+ if (slow_map == NULL) {
+ // The fast path is the only path remaining.
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ if (kit.stopped()) {
+ // Inlined method threw an exception, so it's just the slow path after all.
+ kit.set_jvms(slow_jvms);
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ // Finish the diamond.
+ kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+ RegionNode* region = new (kit.C, 3) RegionNode(3);
+ region->init_req(1, kit.control());
+ region->init_req(2, slow_map->control());
+ kit.set_control(gvn.transform(region));
+ Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+ iophi->set_req(2, slow_map->i_o());
+ kit.set_i_o(gvn.transform(iophi));
+ kit.merge_memory(slow_map->merged_memory(), region, 2);
+ uint tos = kit.jvms()->stkoff() + kit.sp();
+ uint limit = slow_map->req();
+ for (uint i = TypeFunc::Parms; i < limit; i++) {
+ // Skip unused stack slots; fast forward to monoff();
+ if (i == tos) {
+ i = kit.jvms()->monoff();
+ if( i >= limit ) break;
+ }
+ Node* m = kit.map()->in(i);
+ Node* n = slow_map->in(i);
+ if (m != n) {
+ const Type* t = gvn.type(m)->meet(gvn.type(n));
+ Node* phi = PhiNode::make(region, m, t);
+ phi->set_req(2, n);
+ kit.map()->set_req(i, gvn.transform(phi));
+ }
+ }
+ return kit.transfer_exceptions_into_jvms();
+}
+
+
+//-------------------------UncommonTrapCallGenerator-----------------------------
+// Internal class which handles all out-of-line calls checking receiver type.
+class UncommonTrapCallGenerator : public CallGenerator {
+ Deoptimization::DeoptReason _reason;
+ Deoptimization::DeoptAction _action;
+
+public:
+ UncommonTrapCallGenerator(ciMethod* m,
+ Deoptimization::DeoptReason reason,
+ Deoptimization::DeoptAction action)
+ : CallGenerator(m)
+ {
+ _reason = reason;
+ _action = action;
+ }
+
+ virtual bool is_virtual() const { ShouldNotReachHere(); return false; }
+ virtual bool is_trap() const { return true; }
+
+ virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator*
+CallGenerator::for_uncommon_trap(ciMethod* m,
+ Deoptimization::DeoptReason reason,
+ Deoptimization::DeoptAction action) {
+ return new UncommonTrapCallGenerator(m, reason, action);
+}
+
+
+JVMState* UncommonTrapCallGenerator::generate(JVMState* jvms) {
+ GraphKit kit(jvms);
+ // Take the trap with arguments pushed on the stack. (Cf. null_check_receiver).
+ int nargs = method()->arg_size();
+ kit.inc_sp(nargs);
+ assert(nargs <= kit.sp() && kit.sp() <= jvms->stk_size(), "sane sp w/ args pushed");
+ if (_reason == Deoptimization::Reason_class_check &&
+ _action == Deoptimization::Action_maybe_recompile) {
+ // Temp fix for 6529811
+ // Don't allow uncommon_trap to override our decision to recompile in the event
+ // of a class cast failure for a monomorphic call as it will never let us convert
+ // the call to either bi-morphic or megamorphic and can lead to unc-trap loops
+ bool keep_exact_action = true;
+ kit.uncommon_trap(_reason, _action, NULL, "monomorphic vcall checkcast", false, keep_exact_action);
+ } else {
+ kit.uncommon_trap(_reason, _action);
+ }
+ return kit.transfer_exceptions_into_jvms();
+}
+
+// (Note: Moved hook_up_call to GraphKit::set_edges_for_java_call.)
+
+// (Node: Merged hook_up_exits into ParseGenerator::generate.)
+
+#define NODES_OVERHEAD_PER_METHOD (30.0)
+#define NODES_PER_BYTECODE (9.5)
+
+void WarmCallInfo::init(JVMState* call_site, ciMethod* call_method, ciCallProfile& profile, float prof_factor) {
+ int call_count = profile.count();
+ int code_size = call_method->code_size();
+
+ // Expected execution count is based on the historical count:
+ _count = call_count < 0 ? 1 : call_site->method()->scale_count(call_count, prof_factor);
+
+ // Expected profit from inlining, in units of simple call-overheads.
+ _profit = 1.0;
+
+ // Expected work performed by the call in units of call-overheads.
+ // %%% need an empirical curve fit for "work" (time in call)
+ float bytecodes_per_call = 3;
+ _work = 1.0 + code_size / bytecodes_per_call;
+
+ // Expected size of compilation graph:
+ // -XX:+PrintParseStatistics once reported:
+ // Methods seen: 9184 Methods parsed: 9184 Nodes created: 1582391
+ // Histogram of 144298 parsed bytecodes:
+ // %%% Need an better predictor for graph size.
+ _size = NODES_OVERHEAD_PER_METHOD + (NODES_PER_BYTECODE * code_size);
+}
+
+// is_cold: Return true if the node should never be inlined.
+// This is true if any of the key metrics are extreme.
+bool WarmCallInfo::is_cold() const {
+ if (count() < WarmCallMinCount) return true;
+ if (profit() < WarmCallMinProfit) return true;
+ if (work() > WarmCallMaxWork) return true;
+ if (size() > WarmCallMaxSize) return true;
+ return false;
+}
+
+// is_hot: Return true if the node should be inlined immediately.
+// This is true if any of the key metrics are extreme.
+bool WarmCallInfo::is_hot() const {
+ assert(!is_cold(), "eliminate is_cold cases before testing is_hot");
+ if (count() >= HotCallCountThreshold) return true;
+ if (profit() >= HotCallProfitThreshold) return true;
+ if (work() <= HotCallTrivialWork) return true;
+ if (size() <= HotCallTrivialSize) return true;
+ return false;
+}
+
+// compute_heat:
+float WarmCallInfo::compute_heat() const {
+ assert(!is_cold(), "compute heat only on warm nodes");
+ assert(!is_hot(), "compute heat only on warm nodes");
+ int min_size = MAX2(0, (int)HotCallTrivialSize);
+ int max_size = MIN2(500, (int)WarmCallMaxSize);
+ float method_size = (size() - min_size) / MAX2(1, max_size - min_size);
+ float size_factor;
+ if (method_size < 0.05) size_factor = 4; // 2 sigmas better than avg.
+ else if (method_size < 0.15) size_factor = 2; // 1 sigma better than avg.
+ else if (method_size < 0.5) size_factor = 1; // better than avg.
+ else size_factor = 0.5; // worse than avg.
+ return (count() * profit() * size_factor);
+}
+
+bool WarmCallInfo::warmer_than(WarmCallInfo* that) {
+ assert(this != that, "compare only different WCIs");
+ assert(this->heat() != 0 && that->heat() != 0, "call compute_heat 1st");
+ if (this->heat() > that->heat()) return true;
+ if (this->heat() < that->heat()) return false;
+ assert(this->heat() == that->heat(), "no NaN heat allowed");
+ // Equal heat. Break the tie some other way.
+ if (!this->call() || !that->call()) return (address)this > (address)that;
+ return this->call()->_idx > that->call()->_idx;
+}
+
+//#define UNINIT_NEXT ((WarmCallInfo*)badAddress)
+#define UNINIT_NEXT ((WarmCallInfo*)NULL)
+
+WarmCallInfo* WarmCallInfo::insert_into(WarmCallInfo* head) {
+ assert(next() == UNINIT_NEXT, "not yet on any list");
+ WarmCallInfo* prev_p = NULL;
+ WarmCallInfo* next_p = head;
+ while (next_p != NULL && next_p->warmer_than(this)) {
+ prev_p = next_p;
+ next_p = prev_p->next();
+ }
+ // Install this between prev_p and next_p.
+ this->set_next(next_p);
+ if (prev_p == NULL)
+ head = this;
+ else
+ prev_p->set_next(this);
+ return head;
+}
+
+WarmCallInfo* WarmCallInfo::remove_from(WarmCallInfo* head) {
+ WarmCallInfo* prev_p = NULL;
+ WarmCallInfo* next_p = head;
+ while (next_p != this) {
+ assert(next_p != NULL, "this must be in the list somewhere");
+ prev_p = next_p;
+ next_p = prev_p->next();
+ }
+ next_p = this->next();
+ debug_only(this->set_next(UNINIT_NEXT));
+ // Remove this from between prev_p and next_p.
+ if (prev_p == NULL)
+ head = next_p;
+ else
+ prev_p->set_next(next_p);
+ return head;
+}
+
+WarmCallInfo* WarmCallInfo::_always_hot = NULL;
+WarmCallInfo* WarmCallInfo::_always_cold = NULL;
+
+WarmCallInfo* WarmCallInfo::always_hot() {
+ if (_always_hot == NULL) {
+ static double bits[sizeof(WarmCallInfo) / sizeof(double) + 1] = {0};
+ WarmCallInfo* ci = (WarmCallInfo*) bits;
+ ci->_profit = ci->_count = MAX_VALUE();
+ ci->_work = ci->_size = MIN_VALUE();
+ _always_hot = ci;
+ }
+ assert(_always_hot->is_hot(), "must always be hot");
+ return _always_hot;
+}
+
+WarmCallInfo* WarmCallInfo::always_cold() {
+ if (_always_cold == NULL) {
+ static double bits[sizeof(WarmCallInfo) / sizeof(double) + 1] = {0};
+ WarmCallInfo* ci = (WarmCallInfo*) bits;
+ ci->_profit = ci->_count = MIN_VALUE();
+ ci->_work = ci->_size = MAX_VALUE();
+ _always_cold = ci;
+ }
+ assert(_always_cold->is_cold(), "must always be cold");
+ return _always_cold;
+}
+
+
+#ifndef PRODUCT
+
+void WarmCallInfo::print() const {
+ tty->print("%s : C=%6.1f P=%6.1f W=%6.1f S=%6.1f H=%6.1f -> %p",
+ is_cold() ? "cold" : is_hot() ? "hot " : "warm",
+ count(), profit(), work(), size(), compute_heat(), next());
+ tty->cr();
+ if (call() != NULL) call()->dump();
+}
+
+void print_wci(WarmCallInfo* ci) {
+ ci->print();
+}
+
+void WarmCallInfo::print_all() const {
+ for (const WarmCallInfo* p = this; p != NULL; p = p->next())
+ p->print();
+}
+
+int WarmCallInfo::count_all() const {
+ int cnt = 0;
+ for (const WarmCallInfo* p = this; p != NULL; p = p->next())
+ cnt++;
+ return cnt;
+}
+
+#endif //PRODUCT
diff --git a/src/share/vm/opto/callGenerator.hpp b/src/share/vm/opto/callGenerator.hpp
new file mode 100644
index 000000000..bbd47ca4a
--- /dev/null
+++ b/src/share/vm/opto/callGenerator.hpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2000-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//---------------------------CallGenerator-------------------------------------
+// The subclasses of this class handle generation of ideal nodes for
+// call sites and method entry points.
+
+class CallGenerator : public ResourceObj {
+ public:
+ enum {
+ xxxunusedxxx
+ };
+
+ private:
+ ciMethod* _method; // The method being called.
+
+ protected:
+ CallGenerator(ciMethod* method);
+
+ public:
+ // Accessors
+ ciMethod* method() const { return _method; }
+
+ // is_inline: At least some code implementing the method is copied here.
+ virtual bool is_inline() const { return false; }
+ // is_intrinsic: There's a method-specific way of generating the inline code.
+ virtual bool is_intrinsic() const { return false; }
+ // is_parse: Bytecodes implementing the specific method are copied here.
+ virtual bool is_parse() const { return false; }
+ // is_virtual: The call uses the receiver type to select or check the method.
+ virtual bool is_virtual() const { return false; }
+ // is_deferred: The decision whether to inline or not is deferred.
+ virtual bool is_deferred() const { return false; }
+ // is_predicted: Uses an explicit check against a predicted type.
+ virtual bool is_predicted() const { return false; }
+ // is_trap: Does not return to the caller. (E.g., uncommon trap.)
+ virtual bool is_trap() const { return false; }
+
+ // Note: It is possible for a CG to be both inline and virtual.
+ // (The hashCode intrinsic does a vtable check and an inlined fast path.)
+
+ // Utilities:
+ const TypeFunc* tf() const;
+
+ // The given jvms has state and arguments for a call to my method.
+ // Edges after jvms->argoff() carry all (pre-popped) argument values.
+ //
+ // Update the map with state and return values (if any) and return it.
+ // The return values (0, 1, or 2) must be pushed on the map's stack,
+ // and the sp of the jvms incremented accordingly.
+ //
+ // The jvms is returned on success. Alternatively, a copy of the
+ // given jvms, suitably updated, may be returned, in which case the
+ // caller should discard the original jvms.
+ //
+ // The non-Parm edges of the returned map will contain updated global state,
+ // and one or two edges before jvms->sp() will carry any return values.
+ // Other map edges may contain locals or monitors, and should not
+ // be changed in meaning.
+ //
+ // If the call traps, the returned map must have a control edge of top.
+ // If the call can throw, the returned map must report has_exceptions().
+ //
+ // If the result is NULL, it means that this CallGenerator was unable
+ // to handle the given call, and another CallGenerator should be consulted.
+ virtual JVMState* generate(JVMState* jvms) = 0;
+
+ // How to generate a call site that is inlined:
+ static CallGenerator* for_inline(ciMethod* m, float expected_uses = -1);
+ // How to generate code for an on-stack replacement handler.
+ static CallGenerator* for_osr(ciMethod* m, int osr_bci);
+
+ // How to generate vanilla out-of-line call sites:
+ static CallGenerator* for_direct_call(ciMethod* m); // static, special
+ static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface
+
+ // How to make a call but defer the decision whether to inline or not.
+ static CallGenerator* for_warm_call(WarmCallInfo* ci,
+ CallGenerator* if_cold,
+ CallGenerator* if_hot);
+
+ // How to make a call that optimistically assumes a receiver type:
+ static CallGenerator* for_predicted_call(ciKlass* predicted_receiver,
+ CallGenerator* if_missed,
+ CallGenerator* if_hit,
+ float hit_prob);
+
+ // How to make a call that gives up and goes back to the interpreter:
+ static CallGenerator* for_uncommon_trap(ciMethod* m,
+ Deoptimization::DeoptReason reason,
+ Deoptimization::DeoptAction action);
+
+ // Registry for intrinsics:
+ static CallGenerator* for_intrinsic(ciMethod* m);
+ static void register_intrinsic(ciMethod* m, CallGenerator* cg);
+};
+
+class InlineCallGenerator : public CallGenerator {
+ virtual bool is_inline() const { return true; }
+
+ protected:
+ InlineCallGenerator(ciMethod* method) : CallGenerator(method) { }
+};
+
+
+//---------------------------WarmCallInfo--------------------------------------
+// A struct to collect information about a given call site.
+// Helps sort call sites into "hot", "medium", and "cold".
+// Participates in the queueing of "medium" call sites for possible inlining.
+class WarmCallInfo : public ResourceObj {
+ private:
+
+ CallNode* _call; // The CallNode which may be inlined.
+ CallGenerator* _hot_cg;// CG for expanding the call node
+
+ // These are the metrics we use to evaluate call sites:
+
+ float _count; // How often do we expect to reach this site?
+ float _profit; // How much time do we expect to save by inlining?
+ float _work; // How long do we expect the average call to take?
+ float _size; // How big do we expect the inlined code to be?
+
+ float _heat; // Combined score inducing total order on call sites.
+ WarmCallInfo* _next; // Next cooler call info in pending queue.
+
+ // Count is the number of times this call site is expected to be executed.
+ // Large count is favorable for inlining, because the extra compilation
+ // work will be amortized more completely.
+
+ // Profit is a rough measure of the amount of time we expect to save
+ // per execution of this site if we inline it. (1.0 == call overhead)
+ // Large profit favors inlining. Negative profit disables inlining.
+
+ // Work is a rough measure of the amount of time a typical out-of-line
+ // call from this site is expected to take. (1.0 == call, no-op, return)
+ // Small work is somewhat favorable for inlining, since methods with
+ // short "hot" traces are more likely to inline smoothly.
+
+ // Size is the number of graph nodes we expect this method to produce,
+ // not counting the inlining of any further warm calls it may include.
+ // Small size favors inlining, since small methods are more likely to
+ // inline smoothly. The size is estimated by examining the native code
+ // if available. The method bytecodes are also examined, assuming
+ // empirically observed node counts for each kind of bytecode.
+
+ // Heat is the combined "goodness" of a site's inlining. If we were
+ // omniscient, it would be the difference of two sums of future execution
+ // times of code emitted for this site (amortized across multiple sites if
+ // sharing applies). The two sums are for versions of this call site with
+ // and without inlining.
+
+ // We approximate this mythical quantity by playing with averages,
+ // rough estimates, and assumptions that history repeats itself.
+ // The basic formula count * profit is heuristically adjusted
+ // by looking at the expected compilation and execution times of
+ // of the inlined call.
+
+ // Note: Some of these metrics may not be present in the final product,
+ // but exist in development builds to experiment with inline policy tuning.
+
+ // This heuristic framework does not model well the very significant
+ // effects of multiple-level inlining. It is possible to see no immediate
+ // profit from inlining X->Y, but to get great profit from a subsequent
+ // inlining X->Y->Z.
+
+ // This framework does not take well into account the problem of N**2 code
+ // size in a clique of mutually inlinable methods.
+
+ WarmCallInfo* next() const { return _next; }
+ void set_next(WarmCallInfo* n) { _next = n; }
+
+ static WarmCallInfo* _always_hot;
+ static WarmCallInfo* _always_cold;
+
+ public:
+ // Because WarmInfo objects live over the entire lifetime of the
+ // Compile object, they are allocated into the comp_arena, which
+ // does not get resource marked or reset during the compile process
+ void *operator new( size_t x, Compile* C ) { return C->comp_arena()->Amalloc(x); }
+ void operator delete( void * ) { } // fast deallocation
+
+ static WarmCallInfo* always_hot();
+ static WarmCallInfo* always_cold();
+
+ WarmCallInfo() {
+ _call = NULL;
+ _hot_cg = NULL;
+ _next = NULL;
+ _count = _profit = _work = _size = _heat = 0;
+ }
+
+ CallNode* call() const { return _call; }
+ float count() const { return _count; }
+ float size() const { return _size; }
+ float work() const { return _work; }
+ float profit() const { return _profit; }
+ float heat() const { return _heat; }
+
+ void set_count(float x) { _count = x; }
+ void set_size(float x) { _size = x; }
+ void set_work(float x) { _work = x; }
+ void set_profit(float x) { _profit = x; }
+ void set_heat(float x) { _heat = x; }
+
+ // Load initial heuristics from profiles, etc.
+ // The heuristics can be tweaked further by the caller.
+ void init(JVMState* call_site, ciMethod* call_method, ciCallProfile& profile, float prof_factor);
+
+ static float MAX_VALUE() { return +1.0e10; }
+ static float MIN_VALUE() { return -1.0e10; }
+
+ float compute_heat() const;
+
+ void set_call(CallNode* call) { _call = call; }
+ void set_hot_cg(CallGenerator* cg) { _hot_cg = cg; }
+
+ // Do not queue very hot or very cold calls.
+ // Make very cold ones out of line immediately.
+ // Inline very hot ones immediately.
+ // These queries apply various tunable limits
+ // to the above metrics in a systematic way.
+ // Test for coldness before testing for hotness.
+ bool is_cold() const;
+ bool is_hot() const;
+
+ // Force a warm call to be hot. This worklists the call node for inlining.
+ void make_hot();
+
+ // Force a warm call to be cold. This worklists the call node for out-of-lining.
+ void make_cold();
+
+ // A reproducible total ordering, in which heat is the major key.
+ bool warmer_than(WarmCallInfo* that);
+
+ // List management. These methods are called with the list head,
+ // and return the new list head, inserting or removing the receiver.
+ WarmCallInfo* insert_into(WarmCallInfo* head);
+ WarmCallInfo* remove_from(WarmCallInfo* head);
+
+#ifndef PRODUCT
+ void print() const;
+ void print_all() const;
+ int count_all() const;
+#endif
+};
diff --git a/src/share/vm/opto/callnode.cpp b/src/share/vm/opto/callnode.cpp
new file mode 100644
index 000000000..7fb600440
--- /dev/null
+++ b/src/share/vm/opto/callnode.cpp
@@ -0,0 +1,1311 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_callnode.cpp.incl"
+
+//=============================================================================
+uint StartNode::size_of() const { return sizeof(*this); }
+uint StartNode::cmp( const Node &n ) const
+{ return _domain == ((StartNode&)n)._domain; }
+const Type *StartNode::bottom_type() const { return _domain; }
+const Type *StartNode::Value(PhaseTransform *phase) const { return _domain; }
+#ifndef PRODUCT
+void StartNode::dump_spec(outputStream *st) const { st->print(" #"); _domain->dump_on(st);}
+#endif
+
+//------------------------------Ideal------------------------------------------
+Node *StartNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//------------------------------calling_convention-----------------------------
+void StartNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {
+ Matcher::calling_convention( sig_bt, parm_regs, argcnt, false );
+}
+
+//------------------------------Registers--------------------------------------
+const RegMask &StartNode::in_RegMask(uint) const {
+ return RegMask::Empty;
+}
+
+//------------------------------match------------------------------------------
+// Construct projections for incoming parameters, and their RegMask info
+Node *StartNode::match( const ProjNode *proj, const Matcher *match ) {
+ switch (proj->_con) {
+ case TypeFunc::Control:
+ case TypeFunc::I_O:
+ case TypeFunc::Memory:
+ return new (match->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
+ case TypeFunc::FramePtr:
+ return new (match->C, 1) MachProjNode(this,proj->_con,Matcher::c_frame_ptr_mask, Op_RegP);
+ case TypeFunc::ReturnAdr:
+ return new (match->C, 1) MachProjNode(this,proj->_con,match->_return_addr_mask,Op_RegP);
+ case TypeFunc::Parms:
+ default: {
+ uint parm_num = proj->_con - TypeFunc::Parms;
+ const Type *t = _domain->field_at(proj->_con);
+ if (t->base() == Type::Half) // 2nd half of Longs and Doubles
+ return new (match->C, 1) ConNode(Type::TOP);
+ uint ideal_reg = Matcher::base2reg[t->base()];
+ RegMask &rm = match->_calling_convention_mask[parm_num];
+ return new (match->C, 1) MachProjNode(this,proj->_con,rm,ideal_reg);
+ }
+ }
+ return NULL;
+}
+
+//------------------------------StartOSRNode----------------------------------
+// The method start node for an on stack replacement adapter
+
+//------------------------------osr_domain-----------------------------
+const TypeTuple *StartOSRNode::osr_domain() {
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // address of osr buffer
+
+ return TypeTuple::make(TypeFunc::Parms+1, fields);
+}
+
+//=============================================================================
+const char * const ParmNode::names[TypeFunc::Parms+1] = {
+ "Control", "I_O", "Memory", "FramePtr", "ReturnAdr", "Parms"
+};
+
+#ifndef PRODUCT
+void ParmNode::dump_spec(outputStream *st) const {
+ if( _con < TypeFunc::Parms ) {
+ st->print(names[_con]);
+ } else {
+ st->print("Parm%d: ",_con-TypeFunc::Parms);
+ // Verbose and WizardMode dump bottom_type for all nodes
+ if( !Verbose && !WizardMode ) bottom_type()->dump_on(st);
+ }
+}
+#endif
+
+uint ParmNode::ideal_reg() const {
+ switch( _con ) {
+ case TypeFunc::Control : // fall through
+ case TypeFunc::I_O : // fall through
+ case TypeFunc::Memory : return 0;
+ case TypeFunc::FramePtr : // fall through
+ case TypeFunc::ReturnAdr: return Op_RegP;
+ default : assert( _con > TypeFunc::Parms, "" );
+ // fall through
+ case TypeFunc::Parms : {
+ // Type of argument being passed
+ const Type *t = in(0)->as_Start()->_domain->field_at(_con);
+ return Matcher::base2reg[t->base()];
+ }
+ }
+ ShouldNotReachHere();
+ return 0;
+}
+
+//=============================================================================
+ReturnNode::ReturnNode(uint edges, Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *retadr ) : Node(edges) {
+ init_req(TypeFunc::Control,cntrl);
+ init_req(TypeFunc::I_O,i_o);
+ init_req(TypeFunc::Memory,memory);
+ init_req(TypeFunc::FramePtr,frameptr);
+ init_req(TypeFunc::ReturnAdr,retadr);
+}
+
+Node *ReturnNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+const Type *ReturnNode::Value( PhaseTransform *phase ) const {
+ return ( phase->type(in(TypeFunc::Control)) == Type::TOP)
+ ? Type::TOP
+ : Type::BOTTOM;
+}
+
+// Do we Match on this edge index or not? No edges on return nodes
+uint ReturnNode::match_edge(uint idx) const {
+ return 0;
+}
+
+
+#ifndef PRODUCT
+void ReturnNode::dump_req() const {
+ // Dump the required inputs, enclosed in '(' and ')'
+ uint i; // Exit value of loop
+ for( i=0; i<req(); i++ ) { // For all required inputs
+ if( i == TypeFunc::Parms ) tty->print("returns");
+ if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+ else tty->print("_ ");
+ }
+}
+#endif
+
+//=============================================================================
+RethrowNode::RethrowNode(
+ Node* cntrl,
+ Node* i_o,
+ Node* memory,
+ Node* frameptr,
+ Node* ret_adr,
+ Node* exception
+) : Node(TypeFunc::Parms + 1) {
+ init_req(TypeFunc::Control , cntrl );
+ init_req(TypeFunc::I_O , i_o );
+ init_req(TypeFunc::Memory , memory );
+ init_req(TypeFunc::FramePtr , frameptr );
+ init_req(TypeFunc::ReturnAdr, ret_adr);
+ init_req(TypeFunc::Parms , exception);
+}
+
+Node *RethrowNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+const Type *RethrowNode::Value( PhaseTransform *phase ) const {
+ return (phase->type(in(TypeFunc::Control)) == Type::TOP)
+ ? Type::TOP
+ : Type::BOTTOM;
+}
+
+uint RethrowNode::match_edge(uint idx) const {
+ return 0;
+}
+
+#ifndef PRODUCT
+void RethrowNode::dump_req() const {
+ // Dump the required inputs, enclosed in '(' and ')'
+ uint i; // Exit value of loop
+ for( i=0; i<req(); i++ ) { // For all required inputs
+ if( i == TypeFunc::Parms ) tty->print("exception");
+ if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+ else tty->print("_ ");
+ }
+}
+#endif
+
+//=============================================================================
+// Do we Match on this edge index or not? Match only target address & method
+uint TailCallNode::match_edge(uint idx) const {
+ return TypeFunc::Parms <= idx && idx <= TypeFunc::Parms+1;
+}
+
+//=============================================================================
+// Do we Match on this edge index or not? Match only target address & oop
+uint TailJumpNode::match_edge(uint idx) const {
+ return TypeFunc::Parms <= idx && idx <= TypeFunc::Parms+1;
+}
+
+//=============================================================================
+JVMState::JVMState(ciMethod* method, JVMState* caller) {
+ assert(method != NULL, "must be valid call site");
+ _method = method;
+ debug_only(_bci = -99); // random garbage value
+ debug_only(_map = (SafePointNode*)-1);
+ _caller = caller;
+ _depth = 1 + (caller == NULL ? 0 : caller->depth());
+ _locoff = TypeFunc::Parms;
+ _stkoff = _locoff + _method->max_locals();
+ _monoff = _stkoff + _method->max_stack();
+ _endoff = _monoff;
+ _sp = 0;
+}
+JVMState::JVMState(int stack_size) {
+ _method = NULL;
+ _bci = InvocationEntryBci;
+ debug_only(_map = (SafePointNode*)-1);
+ _caller = NULL;
+ _depth = 1;
+ _locoff = TypeFunc::Parms;
+ _stkoff = _locoff;
+ _monoff = _stkoff + stack_size;
+ _endoff = _monoff;
+ _sp = 0;
+}
+
+//--------------------------------of_depth-------------------------------------
+JVMState* JVMState::of_depth(int d) const {
+ const JVMState* jvmp = this;
+ assert(0 < d && (uint)d <= depth(), "oob");
+ for (int skip = depth() - d; skip > 0; skip--) {
+ jvmp = jvmp->caller();
+ }
+ assert(jvmp->depth() == (uint)d, "found the right one");
+ return (JVMState*)jvmp;
+}
+
+//-----------------------------same_calls_as-----------------------------------
+bool JVMState::same_calls_as(const JVMState* that) const {
+ if (this == that) return true;
+ if (this->depth() != that->depth()) return false;
+ const JVMState* p = this;
+ const JVMState* q = that;
+ for (;;) {
+ if (p->_method != q->_method) return false;
+ if (p->_method == NULL) return true; // bci is irrelevant
+ if (p->_bci != q->_bci) return false;
+ p = p->caller();
+ q = q->caller();
+ if (p == q) return true;
+ assert(p != NULL && q != NULL, "depth check ensures we don't run off end");
+ }
+}
+
+//------------------------------debug_start------------------------------------
+uint JVMState::debug_start() const {
+ debug_only(JVMState* jvmroot = of_depth(1));
+ assert(jvmroot->locoff() <= this->locoff(), "youngest JVMState must be last");
+ return of_depth(1)->locoff();
+}
+
+//-------------------------------debug_end-------------------------------------
+uint JVMState::debug_end() const {
+ debug_only(JVMState* jvmroot = of_depth(1));
+ assert(jvmroot->endoff() <= this->endoff(), "youngest JVMState must be last");
+ return endoff();
+}
+
+//------------------------------debug_depth------------------------------------
+uint JVMState::debug_depth() const {
+ uint total = 0;
+ for (const JVMState* jvmp = this; jvmp != NULL; jvmp = jvmp->caller()) {
+ total += jvmp->debug_size();
+ }
+ return total;
+}
+
+//------------------------------format_helper----------------------------------
+// Given an allocation (a Chaitin object) and a Node decide if the Node carries
+// any defined value or not. If it does, print out the register or constant.
+#ifndef PRODUCT
+static void format_helper( PhaseRegAlloc *regalloc, outputStream* st, Node *n, const char *msg, uint i ) {
+ if (n == NULL) { st->print(" NULL"); return; }
+ if( OptoReg::is_valid(regalloc->get_reg_first(n))) { // Check for undefined
+ char buf[50];
+ regalloc->dump_register(n,buf);
+ st->print(" %s%d]=%s",msg,i,buf);
+ } else { // No register, but might be constant
+ const Type *t = n->bottom_type();
+ switch (t->base()) {
+ case Type::Int:
+ st->print(" %s%d]=#"INT32_FORMAT,msg,i,t->is_int()->get_con());
+ break;
+ case Type::AnyPtr:
+ assert( t == TypePtr::NULL_PTR, "" );
+ st->print(" %s%d]=#NULL",msg,i);
+ break;
+ case Type::AryPtr:
+ case Type::KlassPtr:
+ case Type::InstPtr:
+ st->print(" %s%d]=#Ptr" INTPTR_FORMAT,msg,i,t->isa_oopptr()->const_oop());
+ break;
+ case Type::RawPtr:
+ st->print(" %s%d]=#Raw" INTPTR_FORMAT,msg,i,t->is_rawptr());
+ break;
+ case Type::DoubleCon:
+ st->print(" %s%d]=#%fD",msg,i,t->is_double_constant()->_d);
+ break;
+ case Type::FloatCon:
+ st->print(" %s%d]=#%fF",msg,i,t->is_float_constant()->_f);
+ break;
+ case Type::Long:
+ st->print(" %s%d]=#"INT64_FORMAT,msg,i,t->is_long()->get_con());
+ break;
+ case Type::Half:
+ case Type::Top:
+ st->print(" %s%d]=_",msg,i);
+ break;
+ default: ShouldNotReachHere();
+ }
+ }
+}
+#endif
+
+//------------------------------format-----------------------------------------
+#ifndef PRODUCT
+void JVMState::format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const {
+ st->print(" #");
+ if( _method ) {
+ _method->print_short_name(st);
+ st->print(" @ bci:%d ",_bci);
+ } else {
+ st->print_cr(" runtime stub ");
+ return;
+ }
+ if (n->is_MachSafePoint()) {
+ MachSafePointNode *mcall = n->as_MachSafePoint();
+ uint i;
+ // Print locals
+ for( i = 0; i < (uint)loc_size(); i++ )
+ format_helper( regalloc, st, mcall->local(this, i), "L[", i );
+ // Print stack
+ for (i = 0; i < (uint)stk_size(); i++) {
+ if ((uint)(_stkoff + i) >= mcall->len())
+ st->print(" oob ");
+ else
+ format_helper( regalloc, st, mcall->stack(this, i), "STK[", i );
+ }
+ for (i = 0; (int)i < nof_monitors(); i++) {
+ Node *box = mcall->monitor_box(this, i);
+ Node *obj = mcall->monitor_obj(this, i);
+ if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) {
+ while( !box->is_BoxLock() ) box = box->in(1);
+ format_helper( regalloc, st, box, "MON-BOX[", i );
+ } else {
+ OptoReg::Name box_reg = BoxLockNode::stack_slot(box);
+ st->print(" MON-BOX%d=%s+%d",
+ i,
+ OptoReg::regname(OptoReg::c_frame_pointer),
+ regalloc->reg2offset(box_reg));
+ }
+ format_helper( regalloc, st, obj, "MON-OBJ[", i );
+ }
+ }
+ st->print_cr("");
+ if (caller() != NULL) caller()->format(regalloc, n, st);
+}
+#endif
+
+#ifndef PRODUCT
+void JVMState::dump_spec(outputStream *st) const {
+ if (_method != NULL) {
+ bool printed = false;
+ if (!Verbose) {
+ // The JVMS dumps make really, really long lines.
+ // Take out the most boring parts, which are the package prefixes.
+ char buf[500];
+ stringStream namest(buf, sizeof(buf));
+ _method->print_short_name(&namest);
+ if (namest.count() < sizeof(buf)) {
+ const char* name = namest.base();
+ if (name[0] == ' ') ++name;
+ const char* endcn = strchr(name, ':'); // end of class name
+ if (endcn == NULL) endcn = strchr(name, '(');
+ if (endcn == NULL) endcn = name + strlen(name);
+ while (endcn > name && endcn[-1] != '.' && endcn[-1] != '/')
+ --endcn;
+ st->print(" %s", endcn);
+ printed = true;
+ }
+ }
+ if (!printed)
+ _method->print_short_name(st);
+ st->print(" @ bci:%d",_bci);
+ } else {
+ st->print(" runtime stub");
+ }
+ if (caller() != NULL) caller()->dump_spec(st);
+}
+#endif
+
+#ifndef PRODUCT
+void JVMState::dump_on(outputStream* st) const {
+ if (_map && !((uintptr_t)_map & 1)) {
+ if (_map->len() > _map->req()) { // _map->has_exceptions()
+ Node* ex = _map->in(_map->req()); // _map->next_exception()
+ // skip the first one; it's already being printed
+ while (ex != NULL && ex->len() > ex->req()) {
+ ex = ex->in(ex->req()); // ex->next_exception()
+ ex->dump(1);
+ }
+ }
+ _map->dump(2);
+ }
+ st->print("JVMS depth=%d loc=%d stk=%d mon=%d end=%d mondepth=%d sp=%d bci=%d method=",
+ depth(), locoff(), stkoff(), monoff(), endoff(), monitor_depth(), sp(), bci());
+ if (_method == NULL) {
+ st->print_cr("(none)");
+ } else {
+ _method->print_name(st);
+ st->cr();
+ if (bci() >= 0 && bci() < _method->code_size()) {
+ st->print(" bc: ");
+ _method->print_codes_on(bci(), bci()+1, st);
+ }
+ }
+ if (caller() != NULL) {
+ caller()->dump_on(st);
+ }
+}
+
+// Extra way to dump a jvms from the debugger,
+// to avoid a bug with C++ member function calls.
+void dump_jvms(JVMState* jvms) {
+ jvms->dump();
+}
+#endif
+
+//--------------------------clone_shallow--------------------------------------
+JVMState* JVMState::clone_shallow(Compile* C) const {
+ JVMState* n = has_method() ? new (C) JVMState(_method, _caller) : new (C) JVMState(0);
+ n->set_bci(_bci);
+ n->set_locoff(_locoff);
+ n->set_stkoff(_stkoff);
+ n->set_monoff(_monoff);
+ n->set_endoff(_endoff);
+ n->set_sp(_sp);
+ n->set_map(_map);
+ return n;
+}
+
+//---------------------------clone_deep----------------------------------------
+JVMState* JVMState::clone_deep(Compile* C) const {
+ JVMState* n = clone_shallow(C);
+ for (JVMState* p = n; p->_caller != NULL; p = p->_caller) {
+ p->_caller = p->_caller->clone_shallow(C);
+ }
+ assert(n->depth() == depth(), "sanity");
+ assert(n->debug_depth() == debug_depth(), "sanity");
+ return n;
+}
+
+//=============================================================================
+uint CallNode::cmp( const Node &n ) const
+{ return _tf == ((CallNode&)n)._tf && _jvms == ((CallNode&)n)._jvms; }
+#ifndef PRODUCT
+void CallNode::dump_req() const {
+ // Dump the required inputs, enclosed in '(' and ')'
+ uint i; // Exit value of loop
+ for( i=0; i<req(); i++ ) { // For all required inputs
+ if( i == TypeFunc::Parms ) tty->print("(");
+ if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+ else tty->print("_ ");
+ }
+ tty->print(")");
+}
+
+void CallNode::dump_spec(outputStream *st) const {
+ st->print(" ");
+ tf()->dump_on(st);
+ if (_cnt != COUNT_UNKNOWN) st->print(" C=%f",_cnt);
+ if (jvms() != NULL) jvms()->dump_spec(st);
+}
+#endif
+
+const Type *CallNode::bottom_type() const { return tf()->range(); }
+const Type *CallNode::Value(PhaseTransform *phase) const {
+ if (phase->type(in(0)) == Type::TOP) return Type::TOP;
+ return tf()->range();
+}
+
+//------------------------------calling_convention-----------------------------
+void CallNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {
+ // Use the standard compiler calling convention
+ Matcher::calling_convention( sig_bt, parm_regs, argcnt, true );
+}
+
+
+//------------------------------match------------------------------------------
+// Construct projections for control, I/O, memory-fields, ..., and
+// return result(s) along with their RegMask info
+Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
+ switch (proj->_con) {
+ case TypeFunc::Control:
+ case TypeFunc::I_O:
+ case TypeFunc::Memory:
+ return new (match->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
+
+ case TypeFunc::Parms+1: // For LONG & DOUBLE returns
+ assert(tf()->_range->field_at(TypeFunc::Parms+1) == Type::HALF, "");
+ // 2nd half of doubles and longs
+ return new (match->C, 1) MachProjNode(this,proj->_con, RegMask::Empty, (uint)OptoReg::Bad);
+
+ case TypeFunc::Parms: { // Normal returns
+ uint ideal_reg = Matcher::base2reg[tf()->range()->field_at(TypeFunc::Parms)->base()];
+ OptoRegPair regs = is_CallRuntime()
+ ? match->c_return_value(ideal_reg,true) // Calls into C runtime
+ : match-> return_value(ideal_reg,true); // Calls into compiled Java code
+ RegMask rm = RegMask(regs.first());
+ if( OptoReg::is_valid(regs.second()) )
+ rm.Insert( regs.second() );
+ return new (match->C, 1) MachProjNode(this,proj->_con,rm,ideal_reg);
+ }
+
+ case TypeFunc::ReturnAdr:
+ case TypeFunc::FramePtr:
+ default:
+ ShouldNotReachHere();
+ }
+ return NULL;
+}
+
+// Do we Match on this edge index or not? Match no edges
+uint CallNode::match_edge(uint idx) const {
+ return 0;
+}
+
+//=============================================================================
+uint CallJavaNode::size_of() const { return sizeof(*this); }
+uint CallJavaNode::cmp( const Node &n ) const {
+ CallJavaNode &call = (CallJavaNode&)n;
+ return CallNode::cmp(call) && _method == call._method;
+}
+#ifndef PRODUCT
+void CallJavaNode::dump_spec(outputStream *st) const {
+ if( _method ) _method->print_short_name(st);
+ CallNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint CallStaticJavaNode::size_of() const { return sizeof(*this); }
+uint CallStaticJavaNode::cmp( const Node &n ) const {
+ CallStaticJavaNode &call = (CallStaticJavaNode&)n;
+ return CallJavaNode::cmp(call);
+}
+
+//----------------------------uncommon_trap_request----------------------------
+// If this is an uncommon trap, return the request code, else zero.
+int CallStaticJavaNode::uncommon_trap_request() const {
+ if (_name != NULL && !strcmp(_name, "uncommon_trap")) {
+ return extract_uncommon_trap_request(this);
+ }
+ return 0;
+}
+int CallStaticJavaNode::extract_uncommon_trap_request(const Node* call) {
+#ifndef PRODUCT
+ if (!(call->req() > TypeFunc::Parms &&
+ call->in(TypeFunc::Parms) != NULL &&
+ call->in(TypeFunc::Parms)->is_Con())) {
+ assert(_in_dump_cnt != 0, "OK if dumping");
+ tty->print("[bad uncommon trap]");
+ return 0;
+ }
+#endif
+ return call->in(TypeFunc::Parms)->bottom_type()->is_int()->get_con();
+}
+
+#ifndef PRODUCT
+void CallStaticJavaNode::dump_spec(outputStream *st) const {
+ st->print("# Static ");
+ if (_name != NULL) {
+ st->print("%s", _name);
+ int trap_req = uncommon_trap_request();
+ if (trap_req != 0) {
+ char buf[100];
+ st->print("(%s)",
+ Deoptimization::format_trap_request(buf, sizeof(buf),
+ trap_req));
+ }
+ st->print(" ");
+ }
+ CallJavaNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint CallDynamicJavaNode::size_of() const { return sizeof(*this); }
+uint CallDynamicJavaNode::cmp( const Node &n ) const {
+ CallDynamicJavaNode &call = (CallDynamicJavaNode&)n;
+ return CallJavaNode::cmp(call);
+}
+#ifndef PRODUCT
+void CallDynamicJavaNode::dump_spec(outputStream *st) const {
+ st->print("# Dynamic ");
+ CallJavaNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint CallRuntimeNode::size_of() const { return sizeof(*this); }
+uint CallRuntimeNode::cmp( const Node &n ) const {
+ CallRuntimeNode &call = (CallRuntimeNode&)n;
+ return CallNode::cmp(call) && !strcmp(_name,call._name);
+}
+#ifndef PRODUCT
+void CallRuntimeNode::dump_spec(outputStream *st) const {
+ st->print("# ");
+ st->print(_name);
+ CallNode::dump_spec(st);
+}
+#endif
+
+//------------------------------calling_convention-----------------------------
+void CallRuntimeNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {
+ Matcher::c_calling_convention( sig_bt, parm_regs, argcnt );
+}
+
+//=============================================================================
+//------------------------------calling_convention-----------------------------
+
+
+//=============================================================================
+#ifndef PRODUCT
+void CallLeafNode::dump_spec(outputStream *st) const {
+ st->print("# ");
+ st->print(_name);
+ CallNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+
+void SafePointNode::set_local(JVMState* jvms, uint idx, Node *c) {
+ assert(verify_jvms(jvms), "jvms must match");
+ int loc = jvms->locoff() + idx;
+ if (in(loc)->is_top() && idx > 0 && !c->is_top() ) {
+ // If current local idx is top then local idx - 1 could
+ // be a long/double that needs to be killed since top could
+ // represent the 2nd half ofthe long/double.
+ uint ideal = in(loc -1)->ideal_reg();
+ if (ideal == Op_RegD || ideal == Op_RegL) {
+ // set other (low index) half to top
+ set_req(loc - 1, in(loc));
+ }
+ }
+ set_req(loc, c);
+}
+
+uint SafePointNode::size_of() const { return sizeof(*this); }
+uint SafePointNode::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+//-------------------------set_next_exception----------------------------------
+void SafePointNode::set_next_exception(SafePointNode* n) {
+ assert(n == NULL || n->Opcode() == Op_SafePoint, "correct value for next_exception");
+ if (len() == req()) {
+ if (n != NULL) add_prec(n);
+ } else {
+ set_prec(req(), n);
+ }
+}
+
+
+//----------------------------next_exception-----------------------------------
+SafePointNode* SafePointNode::next_exception() const {
+ if (len() == req()) {
+ return NULL;
+ } else {
+ Node* n = in(req());
+ assert(n == NULL || n->Opcode() == Op_SafePoint, "no other uses of prec edges");
+ return (SafePointNode*) n;
+ }
+}
+
+
+//------------------------------Ideal------------------------------------------
+// Skip over any collapsed Regions
+Node *SafePointNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (remove_dead_region(phase, can_reshape)) return this;
+
+ return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Remove obviously duplicate safepoints
+Node *SafePointNode::Identity( PhaseTransform *phase ) {
+
+ // If you have back to back safepoints, remove one
+ if( in(TypeFunc::Control)->is_SafePoint() )
+ return in(TypeFunc::Control);
+
+ if( in(0)->is_Proj() ) {
+ Node *n0 = in(0)->in(0);
+ // Check if he is a call projection (except Leaf Call)
+ if( n0->is_Catch() ) {
+ n0 = n0->in(0)->in(0);
+ assert( n0->is_Call(), "expect a call here" );
+ }
+ if( n0->is_Call() && n0->as_Call()->guaranteed_safepoint() ) {
+ // Useless Safepoint, so remove it
+ return in(TypeFunc::Control);
+ }
+ }
+
+ return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *SafePointNode::Value( PhaseTransform *phase ) const {
+ if( phase->type(in(0)) == Type::TOP ) return Type::TOP;
+ if( phase->eqv( in(0), this ) ) return Type::TOP; // Dead infinite loop
+ return Type::CONTROL;
+}
+
+#ifndef PRODUCT
+void SafePointNode::dump_spec(outputStream *st) const {
+ st->print(" SafePoint ");
+}
+#endif
+
+const RegMask &SafePointNode::in_RegMask(uint idx) const {
+ if( idx < TypeFunc::Parms ) return RegMask::Empty;
+ // Values outside the domain represent debug info
+ return *(Compile::current()->matcher()->idealreg2debugmask[in(idx)->ideal_reg()]);
+}
+const RegMask &SafePointNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+
+void SafePointNode::grow_stack(JVMState* jvms, uint grow_by) {
+ assert((int)grow_by > 0, "sanity");
+ int monoff = jvms->monoff();
+ int endoff = jvms->endoff();
+ assert(endoff == (int)req(), "no other states or debug info after me");
+ Node* top = Compile::current()->top();
+ for (uint i = 0; i < grow_by; i++) {
+ ins_req(monoff, top);
+ }
+ jvms->set_monoff(monoff + grow_by);
+ jvms->set_endoff(endoff + grow_by);
+}
+
+void SafePointNode::push_monitor(const FastLockNode *lock) {
+ // Add a LockNode, which points to both the original BoxLockNode (the
+ // stack space for the monitor) and the Object being locked.
+ const int MonitorEdges = 2;
+ assert(JVMState::logMonitorEdges == exact_log2(MonitorEdges), "correct MonitorEdges");
+ assert(req() == jvms()->endoff(), "correct sizing");
+ if (GenerateSynchronizationCode) {
+ add_req(lock->box_node());
+ add_req(lock->obj_node());
+ } else {
+ add_req(NULL);
+ add_req(NULL);
+ }
+ jvms()->set_endoff(req());
+}
+
+void SafePointNode::pop_monitor() {
+ // Delete last monitor from debug info
+ debug_only(int num_before_pop = jvms()->nof_monitors());
+ const int MonitorEdges = (1<<JVMState::logMonitorEdges);
+ int endoff = jvms()->endoff();
+ int new_endoff = endoff - MonitorEdges;
+ jvms()->set_endoff(new_endoff);
+ while (endoff > new_endoff) del_req(--endoff);
+ assert(jvms()->nof_monitors() == num_before_pop-1, "");
+}
+
+Node *SafePointNode::peek_monitor_box() const {
+ int mon = jvms()->nof_monitors() - 1;
+ assert(mon >= 0, "most have a monitor");
+ return monitor_box(jvms(), mon);
+}
+
+Node *SafePointNode::peek_monitor_obj() const {
+ int mon = jvms()->nof_monitors() - 1;
+ assert(mon >= 0, "most have a monitor");
+ return monitor_obj(jvms(), mon);
+}
+
+// Do we Match on this edge index or not? Match no edges
+uint SafePointNode::match_edge(uint idx) const {
+ if( !needs_polling_address_input() )
+ return 0;
+
+ return (TypeFunc::Parms == idx);
+}
+
+//=============================================================================
+uint AllocateNode::size_of() const { return sizeof(*this); }
+
+AllocateNode::AllocateNode(Compile* C, const TypeFunc *atype,
+ Node *ctrl, Node *mem, Node *abio,
+ Node *size, Node *klass_node, Node *initial_test)
+ : CallNode(atype, NULL, TypeRawPtr::BOTTOM)
+{
+ init_class_id(Class_Allocate);
+ init_flags(Flag_is_macro);
+ Node *topnode = C->top();
+
+ init_req( TypeFunc::Control , ctrl );
+ init_req( TypeFunc::I_O , abio );
+ init_req( TypeFunc::Memory , mem );
+ init_req( TypeFunc::ReturnAdr, topnode );
+ init_req( TypeFunc::FramePtr , topnode );
+ init_req( AllocSize , size);
+ init_req( KlassNode , klass_node);
+ init_req( InitialTest , initial_test);
+ init_req( ALength , topnode);
+ C->add_macro_node(this);
+}
+
+//=============================================================================
+uint AllocateArrayNode::size_of() const { return sizeof(*this); }
+
+//=============================================================================
+uint LockNode::size_of() const { return sizeof(*this); }
+
+// Redundant lock elimination
+//
+// There are various patterns of locking where we release and
+// immediately reacquire a lock in a piece of code where no operations
+// occur in between that would be observable. In those cases we can
+// skip releasing and reacquiring the lock without violating any
+// fairness requirements. Doing this around a loop could cause a lock
+// to be held for a very long time so we concentrate on non-looping
+// control flow. We also require that the operations are fully
+// redundant meaning that we don't introduce new lock operations on
+// some paths so to be able to eliminate it on others ala PRE. This
+// would probably require some more extensive graph manipulation to
+// guarantee that the memory edges were all handled correctly.
+//
+// Assuming p is a simple predicate which can't trap in any way and s
+// is a synchronized method consider this code:
+//
+// s();
+// if (p)
+// s();
+// else
+// s();
+// s();
+//
+// 1. The unlocks of the first call to s can be eliminated if the
+// locks inside the then and else branches are eliminated.
+//
+// 2. The unlocks of the then and else branches can be eliminated if
+// the lock of the final call to s is eliminated.
+//
+// Either of these cases subsumes the simple case of sequential control flow
+//
+// Addtionally we can eliminate versions without the else case:
+//
+// s();
+// if (p)
+// s();
+// s();
+//
+// 3. In this case we eliminate the unlock of the first s, the lock
+// and unlock in the then case and the lock in the final s.
+//
+// Note also that in all these cases the then/else pieces don't have
+// to be trivial as long as they begin and end with synchronization
+// operations.
+//
+// s();
+// if (p)
+// s();
+// f();
+// s();
+// s();
+//
+// The code will work properly for this case, leaving in the unlock
+// before the call to f and the relock after it.
+//
+// A potentially interesting case which isn't handled here is when the
+// locking is partially redundant.
+//
+// s();
+// if (p)
+// s();
+//
+// This could be eliminated putting unlocking on the else case and
+// eliminating the first unlock and the lock in the then side.
+// Alternatively the unlock could be moved out of the then side so it
+// was after the merge and the first unlock and second lock
+// eliminated. This might require less manipulation of the memory
+// state to get correct.
+//
+// Additionally we might allow work between a unlock and lock before
+// giving up eliminating the locks. The current code disallows any
+// conditional control flow between these operations. A formulation
+// similar to partial redundancy elimination computing the
+// availability of unlocking and the anticipatability of locking at a
+// program point would allow detection of fully redundant locking with
+// some amount of work in between. I'm not sure how often I really
+// think that would occur though. Most of the cases I've seen
+// indicate it's likely non-trivial work would occur in between.
+// There may be other more complicated constructs where we could
+// eliminate locking but I haven't seen any others appear as hot or
+// interesting.
+//
+// Locking and unlocking have a canonical form in ideal that looks
+// roughly like this:
+//
+// <obj>
+// | \\------+
+// | \ \
+// | BoxLock \
+// | | | \
+// | | \ \
+// | | FastLock
+// | | /
+// | | /
+// | | |
+//
+// Lock
+// |
+// Proj #0
+// |
+// MembarAcquire
+// |
+// Proj #0
+//
+// MembarRelease
+// |
+// Proj #0
+// |
+// Unlock
+// |
+// Proj #0
+//
+//
+// This code proceeds by processing Lock nodes during PhaseIterGVN
+// and searching back through its control for the proper code
+// patterns. Once it finds a set of lock and unlock operations to
+// eliminate they are marked as eliminatable which causes the
+// expansion of the Lock and Unlock macro nodes to make the operation a NOP
+//
+//=============================================================================
+
+//
+// Utility function to skip over uninteresting control nodes. Nodes skipped are:
+// - copy regions. (These may not have been optimized away yet.)
+// - eliminated locking nodes
+//
+static Node *next_control(Node *ctrl) {
+ if (ctrl == NULL)
+ return NULL;
+ while (1) {
+ if (ctrl->is_Region()) {
+ RegionNode *r = ctrl->as_Region();
+ Node *n = r->is_copy();
+ if (n == NULL)
+ break; // hit a region, return it
+ else
+ ctrl = n;
+ } else if (ctrl->is_Proj()) {
+ Node *in0 = ctrl->in(0);
+ if (in0->is_AbstractLock() && in0->as_AbstractLock()->is_eliminated()) {
+ ctrl = in0->in(0);
+ } else {
+ break;
+ }
+ } else {
+ break; // found an interesting control
+ }
+ }
+ return ctrl;
+}
+//
+// Given a control, see if it's the control projection of an Unlock which
+// operating on the same object as lock.
+//
+bool AbstractLockNode::find_matching_unlock(const Node* ctrl, LockNode* lock,
+ GrowableArray<AbstractLockNode*> &lock_ops) {
+ ProjNode *ctrl_proj = (ctrl->is_Proj()) ? ctrl->as_Proj() : NULL;
+ if (ctrl_proj != NULL && ctrl_proj->_con == TypeFunc::Control) {
+ Node *n = ctrl_proj->in(0);
+ if (n != NULL && n->is_Unlock()) {
+ UnlockNode *unlock = n->as_Unlock();
+ if ((lock->obj_node() == unlock->obj_node()) &&
+ (lock->box_node() == unlock->box_node()) && !unlock->is_eliminated()) {
+ lock_ops.append(unlock);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+//
+// Find the lock matching an unlock. Returns null if a safepoint
+// or complicated control is encountered first.
+LockNode *AbstractLockNode::find_matching_lock(UnlockNode* unlock) {
+ LockNode *lock_result = NULL;
+ // find the matching lock, or an intervening safepoint
+ Node *ctrl = next_control(unlock->in(0));
+ while (1) {
+ assert(ctrl != NULL, "invalid control graph");
+ assert(!ctrl->is_Start(), "missing lock for unlock");
+ if (ctrl->is_top()) break; // dead control path
+ if (ctrl->is_Proj()) ctrl = ctrl->in(0);
+ if (ctrl->is_SafePoint()) {
+ break; // found a safepoint (may be the lock we are searching for)
+ } else if (ctrl->is_Region()) {
+ // Check for a simple diamond pattern. Punt on anything more complicated
+ if (ctrl->req() == 3 && ctrl->in(1) != NULL && ctrl->in(2) != NULL) {
+ Node *in1 = next_control(ctrl->in(1));
+ Node *in2 = next_control(ctrl->in(2));
+ if (((in1->is_IfTrue() && in2->is_IfFalse()) ||
+ (in2->is_IfTrue() && in1->is_IfFalse())) && (in1->in(0) == in2->in(0))) {
+ ctrl = next_control(in1->in(0)->in(0));
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ } else {
+ ctrl = next_control(ctrl->in(0)); // keep searching
+ }
+ }
+ if (ctrl->is_Lock()) {
+ LockNode *lock = ctrl->as_Lock();
+ if ((lock->obj_node() == unlock->obj_node()) &&
+ (lock->box_node() == unlock->box_node())) {
+ lock_result = lock;
+ }
+ }
+ return lock_result;
+}
+
+// This code corresponds to case 3 above.
+
+bool AbstractLockNode::find_lock_and_unlock_through_if(Node* node, LockNode* lock,
+ GrowableArray<AbstractLockNode*> &lock_ops) {
+ Node* if_node = node->in(0);
+ bool if_true = node->is_IfTrue();
+
+ if (if_node->is_If() && if_node->outcnt() == 2 && (if_true || node->is_IfFalse())) {
+ Node *lock_ctrl = next_control(if_node->in(0));
+ if (find_matching_unlock(lock_ctrl, lock, lock_ops)) {
+ Node* lock1_node = NULL;
+ ProjNode* proj = if_node->as_If()->proj_out(!if_true);
+ if (if_true) {
+ if (proj->is_IfFalse() && proj->outcnt() == 1) {
+ lock1_node = proj->unique_out();
+ }
+ } else {
+ if (proj->is_IfTrue() && proj->outcnt() == 1) {
+ lock1_node = proj->unique_out();
+ }
+ }
+ if (lock1_node != NULL && lock1_node->is_Lock()) {
+ LockNode *lock1 = lock1_node->as_Lock();
+ if ((lock->obj_node() == lock1->obj_node()) &&
+ (lock->box_node() == lock1->box_node()) && !lock1->is_eliminated()) {
+ lock_ops.append(lock1);
+ return true;
+ }
+ }
+ }
+ }
+
+ lock_ops.trunc_to(0);
+ return false;
+}
+
+bool AbstractLockNode::find_unlocks_for_region(const RegionNode* region, LockNode* lock,
+ GrowableArray<AbstractLockNode*> &lock_ops) {
+ // check each control merging at this point for a matching unlock.
+ // in(0) should be self edge so skip it.
+ for (int i = 1; i < (int)region->req(); i++) {
+ Node *in_node = next_control(region->in(i));
+ if (in_node != NULL) {
+ if (find_matching_unlock(in_node, lock, lock_ops)) {
+ // found a match so keep on checking.
+ continue;
+ } else if (find_lock_and_unlock_through_if(in_node, lock, lock_ops)) {
+ continue;
+ }
+
+ // If we fall through to here then it was some kind of node we
+ // don't understand or there wasn't a matching unlock, so give
+ // up trying to merge locks.
+ lock_ops.trunc_to(0);
+ return false;
+ }
+ }
+ return true;
+
+}
+
+#ifndef PRODUCT
+//
+// Create a counter which counts the number of times this lock is acquired
+//
+void AbstractLockNode::create_lock_counter(JVMState* state) {
+ _counter = OptoRuntime::new_named_counter(state, NamedCounter::LockCounter);
+}
+#endif
+
+void AbstractLockNode::set_eliminated() {
+ _eliminate = true;
+#ifndef PRODUCT
+ if (_counter) {
+ // Update the counter to indicate that this lock was eliminated.
+ // The counter update code will stay around even though the
+ // optimizer will eliminate the lock operation itself.
+ _counter->set_tag(NamedCounter::EliminatedLockCounter);
+ }
+#endif
+}
+
+//=============================================================================
+Node *LockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+
+ // perform any generic optimizations first
+ Node *result = SafePointNode::Ideal(phase, can_reshape);
+
+ // Now see if we can optimize away this lock. We don't actually
+ // remove the locking here, we simply set the _eliminate flag which
+ // prevents macro expansion from expanding the lock. Since we don't
+ // modify the graph, the value returned from this function is the
+ // one computed above.
+ if (EliminateLocks && !is_eliminated()) {
+ //
+ // Try lock coarsening
+ //
+ PhaseIterGVN* iter = phase->is_IterGVN();
+ if (iter != NULL) {
+
+ GrowableArray<AbstractLockNode*> lock_ops;
+
+ Node *ctrl = next_control(in(0));
+
+ // now search back for a matching Unlock
+ if (find_matching_unlock(ctrl, this, lock_ops)) {
+ // found an unlock directly preceding this lock. This is the
+ // case of single unlock directly control dependent on a
+ // single lock which is the trivial version of case 1 or 2.
+ } else if (ctrl->is_Region() ) {
+ if (find_unlocks_for_region(ctrl->as_Region(), this, lock_ops)) {
+ // found lock preceded by multiple unlocks along all paths
+ // joining at this point which is case 3 in description above.
+ }
+ } else {
+ // see if this lock comes from either half of an if and the
+ // predecessors merges unlocks and the other half of the if
+ // performs a lock.
+ if (find_lock_and_unlock_through_if(ctrl, this, lock_ops)) {
+ // found unlock splitting to an if with locks on both branches.
+ }
+ }
+
+ if (lock_ops.length() > 0) {
+ // add ourselves to the list of locks to be eliminated.
+ lock_ops.append(this);
+
+ #ifndef PRODUCT
+ if (PrintEliminateLocks) {
+ int locks = 0;
+ int unlocks = 0;
+ for (int i = 0; i < lock_ops.length(); i++) {
+ AbstractLockNode* lock = lock_ops.at(i);
+ if (lock->Opcode() == Op_Lock) locks++;
+ else unlocks++;
+ if (Verbose) {
+ lock->dump(1);
+ }
+ }
+ tty->print_cr("***Eliminated %d unlocks and %d locks", unlocks, locks);
+ }
+ #endif
+
+ // for each of the identified locks, mark them
+ // as eliminatable
+ for (int i = 0; i < lock_ops.length(); i++) {
+ AbstractLockNode* lock = lock_ops.at(i);
+
+ // Mark it eliminated to update any counters
+ lock->set_eliminated();
+ }
+ } else if (result != NULL && ctrl->is_Region() &&
+ iter->_worklist.member(ctrl)) {
+ // We weren't able to find any opportunities but the region this
+ // lock is control dependent on hasn't been processed yet so put
+ // this lock back on the worklist so we can check again once any
+ // region simplification has occurred.
+ iter->_worklist.push(this);
+ }
+ }
+ }
+
+ return result;
+}
+
+//=============================================================================
+uint UnlockNode::size_of() const { return sizeof(*this); }
+
+//=============================================================================
+Node *UnlockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+
+ // perform any generic optimizations first
+ Node * result = SafePointNode::Ideal(phase, can_reshape);
+
+ // Now see if we can optimize away this unlock. We don't actually
+ // remove the unlocking here, we simply set the _eliminate flag which
+ // prevents macro expansion from expanding the unlock. Since we don't
+ // modify the graph, the value returned from this function is the
+ // one computed above.
+ if (EliminateLocks && !is_eliminated()) {
+ //
+ // If we are unlocking an unescaped object, the lock/unlock is unnecessary
+ // We can eliminate them if there are no safepoints in the locked region.
+ //
+ ConnectionGraph *cgr = Compile::current()->congraph();
+ if (cgr != NULL && cgr->escape_state(obj_node(), phase) == PointsToNode::NoEscape) {
+ GrowableArray<AbstractLockNode*> lock_ops;
+ LockNode *lock = find_matching_lock(this);
+ if (lock != NULL) {
+ lock_ops.append(this);
+ lock_ops.append(lock);
+ // find other unlocks which pair with the lock we found and add them
+ // to the list
+ Node * box = box_node();
+
+ for (DUIterator_Fast imax, i = box->fast_outs(imax); i < imax; i++) {
+ Node *use = box->fast_out(i);
+ if (use->is_Unlock() && use != this) {
+ UnlockNode *unlock1 = use->as_Unlock();
+ if (!unlock1->is_eliminated()) {
+ LockNode *lock1 = find_matching_lock(unlock1);
+ if (lock == lock1)
+ lock_ops.append(unlock1);
+ else if (lock1 == NULL) {
+ // we can't find a matching lock, we must assume the worst
+ lock_ops.trunc_to(0);
+ break;
+ }
+ }
+ }
+ }
+ if (lock_ops.length() > 0) {
+
+ #ifndef PRODUCT
+ if (PrintEliminateLocks) {
+ int locks = 0;
+ int unlocks = 0;
+ for (int i = 0; i < lock_ops.length(); i++) {
+ AbstractLockNode* lock = lock_ops.at(i);
+ if (lock->Opcode() == Op_Lock) locks++;
+ else unlocks++;
+ if (Verbose) {
+ lock->dump(1);
+ }
+ }
+ tty->print_cr("***Eliminated %d unescaped unlocks and %d unescaped locks", unlocks, locks);
+ }
+ #endif
+
+ // for each of the identified locks, mark them
+ // as eliminatable
+ for (int i = 0; i < lock_ops.length(); i++) {
+ AbstractLockNode* lock = lock_ops.at(i);
+
+ // Mark it eliminated to update any counters
+ lock->set_eliminated();
+ }
+ }
+ }
+ }
+ }
+ return result;
+}
diff --git a/src/share/vm/opto/callnode.hpp b/src/share/vm/opto/callnode.hpp
new file mode 100644
index 000000000..e1e6116b6
--- /dev/null
+++ b/src/share/vm/opto/callnode.hpp
@@ -0,0 +1,814 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+class Chaitin;
+class NamedCounter;
+class MultiNode;
+class SafePointNode;
+class CallNode;
+class CallJavaNode;
+class CallStaticJavaNode;
+class CallDynamicJavaNode;
+class CallRuntimeNode;
+class CallLeafNode;
+class CallLeafNoFPNode;
+class AllocateNode;
+class AllocateArrayNode;
+class LockNode;
+class UnlockNode;
+class JVMState;
+class OopMap;
+class State;
+class StartNode;
+class MachCallNode;
+class FastLockNode;
+
+//------------------------------StartNode--------------------------------------
+// The method start node
+class StartNode : public MultiNode {
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+public:
+ const TypeTuple *_domain;
+ StartNode( Node *root, const TypeTuple *domain ) : MultiNode(2), _domain(domain) {
+ init_class_id(Class_Start);
+ init_flags(Flag_is_block_start);
+ init_req(0,this);
+ init_req(1,root);
+ }
+ virtual int Opcode() const;
+ virtual bool pinned() const { return true; };
+ virtual const Type *bottom_type() const;
+ virtual const TypePtr *adr_type() const { return TypePtr::BOTTOM; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual void calling_convention( BasicType* sig_bt, VMRegPair *parm_reg, uint length ) const;
+ virtual const RegMask &in_RegMask(uint) const;
+ virtual Node *match( const ProjNode *proj, const Matcher *m );
+ virtual uint ideal_reg() const { return 0; }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------StartOSRNode-----------------------------------
+// The method start node for on stack replacement code
+class StartOSRNode : public StartNode {
+public:
+ StartOSRNode( Node *root, const TypeTuple *domain ) : StartNode(root, domain) {}
+ virtual int Opcode() const;
+ static const TypeTuple *osr_domain();
+};
+
+
+//------------------------------ParmNode---------------------------------------
+// Incoming parameters
+class ParmNode : public ProjNode {
+ static const char * const names[TypeFunc::Parms+1];
+public:
+ ParmNode( StartNode *src, uint con ) : ProjNode(src,con) {}
+ virtual int Opcode() const;
+ virtual bool is_CFG() const { return (_con == TypeFunc::Control); }
+ virtual uint ideal_reg() const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+
+//------------------------------ReturnNode-------------------------------------
+// Return from subroutine node
+class ReturnNode : public Node {
+public:
+ ReturnNode( uint edges, Node *cntrl, Node *i_o, Node *memory, Node *retadr, Node *frameptr );
+ virtual int Opcode() const;
+ virtual bool is_CFG() const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual bool depends_only_on_test() const { return false; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ virtual uint match_edge(uint idx) const;
+#ifndef PRODUCT
+ virtual void dump_req() const;
+#endif
+};
+
+
+//------------------------------RethrowNode------------------------------------
+// Rethrow of exception at call site. Ends a procedure before rethrowing;
+// ends the current basic block like a ReturnNode. Restores registers and
+// unwinds stack. Rethrow happens in the caller's method.
+class RethrowNode : public Node {
+ public:
+ RethrowNode( Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *ret_adr, Node *exception );
+ virtual int Opcode() const;
+ virtual bool is_CFG() const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual bool depends_only_on_test() const { return false; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint match_edge(uint idx) const;
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+#ifndef PRODUCT
+ virtual void dump_req() const;
+#endif
+};
+
+
+//------------------------------TailCallNode-----------------------------------
+// Pop stack frame and jump indirect
+class TailCallNode : public ReturnNode {
+public:
+ TailCallNode( Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *retadr, Node *target, Node *moop )
+ : ReturnNode( TypeFunc::Parms+2, cntrl, i_o, memory, frameptr, retadr ) {
+ init_req(TypeFunc::Parms, target);
+ init_req(TypeFunc::Parms+1, moop);
+ }
+
+ virtual int Opcode() const;
+ virtual uint match_edge(uint idx) const;
+};
+
+//------------------------------TailJumpNode-----------------------------------
+// Pop stack frame and jump indirect
+class TailJumpNode : public ReturnNode {
+public:
+ TailJumpNode( Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *target, Node *ex_oop)
+ : ReturnNode(TypeFunc::Parms+2, cntrl, i_o, memory, frameptr, Compile::current()->top()) {
+ init_req(TypeFunc::Parms, target);
+ init_req(TypeFunc::Parms+1, ex_oop);
+ }
+
+ virtual int Opcode() const;
+ virtual uint match_edge(uint idx) const;
+};
+
+//-------------------------------JVMState-------------------------------------
+// A linked list of JVMState nodes captures the whole interpreter state,
+// plus GC roots, for all active calls at some call site in this compilation
+// unit. (If there is no inlining, then the list has exactly one link.)
+// This provides a way to map the optimized program back into the interpreter,
+// or to let the GC mark the stack.
+class JVMState : public ResourceObj {
+private:
+ JVMState* _caller; // List pointer for forming scope chains
+ uint _depth; // One mroe than caller depth, or one.
+ uint _locoff; // Offset to locals in input edge mapping
+ uint _stkoff; // Offset to stack in input edge mapping
+ uint _monoff; // Offset to monitors in input edge mapping
+ uint _endoff; // Offset to end of input edge mapping
+ uint _sp; // Jave Expression Stack Pointer for this state
+ int _bci; // Byte Code Index of this JVM point
+ ciMethod* _method; // Method Pointer
+ SafePointNode* _map; // Map node associated with this scope
+public:
+ friend class Compile;
+
+ // Because JVMState objects live over the entire lifetime of the
+ // Compile object, they are allocated into the comp_arena, which
+ // does not get resource marked or reset during the compile process
+ void *operator new( size_t x, Compile* C ) { return C->comp_arena()->Amalloc(x); }
+ void operator delete( void * ) { } // fast deallocation
+
+ // Create a new JVMState, ready for abstract interpretation.
+ JVMState(ciMethod* method, JVMState* caller);
+ JVMState(int stack_size); // root state; has a null method
+
+ // Access functions for the JVM
+ uint locoff() const { return _locoff; }
+ uint stkoff() const { return _stkoff; }
+ uint argoff() const { return _stkoff + _sp; }
+ uint monoff() const { return _monoff; }
+ uint endoff() const { return _endoff; }
+ uint oopoff() const { return debug_end(); }
+
+ int loc_size() const { return _stkoff - _locoff; }
+ int stk_size() const { return _monoff - _stkoff; }
+ int mon_size() const { return _endoff - _monoff; }
+
+ bool is_loc(uint i) const { return i >= _locoff && i < _stkoff; }
+ bool is_stk(uint i) const { return i >= _stkoff && i < _monoff; }
+ bool is_mon(uint i) const { return i >= _monoff && i < _endoff; }
+
+ uint sp() const { return _sp; }
+ int bci() const { return _bci; }
+ bool has_method() const { return _method != NULL; }
+ ciMethod* method() const { assert(has_method(), ""); return _method; }
+ JVMState* caller() const { return _caller; }
+ SafePointNode* map() const { return _map; }
+ uint depth() const { return _depth; }
+ uint debug_start() const; // returns locoff of root caller
+ uint debug_end() const; // returns endoff of self
+ uint debug_size() const { return loc_size() + sp() + mon_size(); }
+ uint debug_depth() const; // returns sum of debug_size values at all depths
+
+ // Returns the JVM state at the desired depth (1 == root).
+ JVMState* of_depth(int d) const;
+
+ // Tells if two JVM states have the same call chain (depth, methods, & bcis).
+ bool same_calls_as(const JVMState* that) const;
+
+ // Monitors (monitors are stored as (boxNode, objNode) pairs
+ enum { logMonitorEdges = 1 };
+ int nof_monitors() const { return mon_size() >> logMonitorEdges; }
+ int monitor_depth() const { return nof_monitors() + (caller() ? caller()->monitor_depth() : 0); }
+ int monitor_box_offset(int idx) const { return monoff() + (idx << logMonitorEdges) + 0; }
+ int monitor_obj_offset(int idx) const { return monoff() + (idx << logMonitorEdges) + 1; }
+ bool is_monitor_box(uint off) const {
+ assert(is_mon(off), "should be called only for monitor edge");
+ return (0 == bitfield(off - monoff(), 0, logMonitorEdges));
+ }
+ bool is_monitor_use(uint off) const { return (is_mon(off)
+ && is_monitor_box(off))
+ || (caller() && caller()->is_monitor_use(off)); }
+
+ // Initialization functions for the JVM
+ void set_locoff(uint off) { _locoff = off; }
+ void set_stkoff(uint off) { _stkoff = off; }
+ void set_monoff(uint off) { _monoff = off; }
+ void set_endoff(uint off) { _endoff = off; }
+ void set_offsets(uint off) { _locoff = _stkoff = _monoff = _endoff = off; }
+ void set_map(SafePointNode *map) { _map = map; }
+ void set_sp(uint sp) { _sp = sp; }
+ void set_bci(int bci) { _bci = bci; }
+
+ // Miscellaneous utility functions
+ JVMState* clone_deep(Compile* C) const; // recursively clones caller chain
+ JVMState* clone_shallow(Compile* C) const; // retains uncloned caller
+
+#ifndef PRODUCT
+ void format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const;
+ void dump_spec(outputStream *st) const;
+ void dump_on(outputStream* st) const;
+ void dump() const {
+ dump_on(tty);
+ }
+#endif
+};
+
+//------------------------------SafePointNode----------------------------------
+// A SafePointNode is a subclass of a MultiNode for convenience (and
+// potential code sharing) only - conceptually it is independent of
+// the Node semantics.
+class SafePointNode : public MultiNode {
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+
+public:
+ SafePointNode(uint edges, JVMState* jvms,
+ // A plain safepoint advertises no memory effects (NULL):
+ const TypePtr* adr_type = NULL)
+ : MultiNode( edges ),
+ _jvms(jvms),
+ _oop_map(NULL),
+ _adr_type(adr_type)
+ {
+ init_class_id(Class_SafePoint);
+ }
+
+ OopMap* _oop_map; // Array of OopMap info (8-bit char) for GC
+ JVMState* const _jvms; // Pointer to list of JVM State objects
+ const TypePtr* _adr_type; // What type of memory does this node produce?
+
+ // Many calls take *all* of memory as input,
+ // but some produce a limited subset of that memory as output.
+ // The adr_type reports the call's behavior as a store, not a load.
+
+ virtual JVMState* jvms() const { return _jvms; }
+ void set_jvms(JVMState* s) {
+ *(JVMState**)&_jvms = s; // override const attribute in the accessor
+ }
+ OopMap *oop_map() const { return _oop_map; }
+ void set_oop_map(OopMap *om) { _oop_map = om; }
+
+ // Functionality from old debug nodes which has changed
+ Node *local(JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(jvms->locoff() + idx);
+ }
+ Node *stack(JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(jvms->stkoff() + idx);
+ }
+ Node *argument(JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(jvms->argoff() + idx);
+ }
+ Node *monitor_box(JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(jvms->monitor_box_offset(idx));
+ }
+ Node *monitor_obj(JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(jvms->monitor_obj_offset(idx));
+ }
+
+ void set_local(JVMState* jvms, uint idx, Node *c);
+
+ void set_stack(JVMState* jvms, uint idx, Node *c) {
+ assert(verify_jvms(jvms), "jvms must match");
+ set_req(jvms->stkoff() + idx, c);
+ }
+ void set_argument(JVMState* jvms, uint idx, Node *c) {
+ assert(verify_jvms(jvms), "jvms must match");
+ set_req(jvms->argoff() + idx, c);
+ }
+ void ensure_stack(JVMState* jvms, uint stk_size) {
+ assert(verify_jvms(jvms), "jvms must match");
+ int grow_by = (int)stk_size - (int)jvms->stk_size();
+ if (grow_by > 0) grow_stack(jvms, grow_by);
+ }
+ void grow_stack(JVMState* jvms, uint grow_by);
+ // Handle monitor stack
+ void push_monitor( const FastLockNode *lock );
+ void pop_monitor ();
+ Node *peek_monitor_box() const;
+ Node *peek_monitor_obj() const;
+
+ // Access functions for the JVM
+ Node *control () const { return in(TypeFunc::Control ); }
+ Node *i_o () const { return in(TypeFunc::I_O ); }
+ Node *memory () const { return in(TypeFunc::Memory ); }
+ Node *returnadr() const { return in(TypeFunc::ReturnAdr); }
+ Node *frameptr () const { return in(TypeFunc::FramePtr ); }
+
+ void set_control ( Node *c ) { set_req(TypeFunc::Control,c); }
+ void set_i_o ( Node *c ) { set_req(TypeFunc::I_O ,c); }
+ void set_memory ( Node *c ) { set_req(TypeFunc::Memory ,c); }
+
+ MergeMemNode* merged_memory() const {
+ return in(TypeFunc::Memory)->as_MergeMem();
+ }
+
+ // The parser marks useless maps as dead when it's done with them:
+ bool is_killed() { return in(TypeFunc::Control) == NULL; }
+
+ // Exception states bubbling out of subgraphs such as inlined calls
+ // are recorded here. (There might be more than one, hence the "next".)
+ // This feature is used only for safepoints which serve as "maps"
+ // for JVM states during parsing, intrinsic expansion, etc.
+ SafePointNode* next_exception() const;
+ void set_next_exception(SafePointNode* n);
+ bool has_exceptions() const { return next_exception() != NULL; }
+
+ // Standard Node stuff
+ virtual int Opcode() const;
+ virtual bool pinned() const { return true; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return Type::CONTROL; }
+ virtual const TypePtr *adr_type() const { return _adr_type; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return 0; }
+ virtual const RegMask &in_RegMask(uint) const;
+ virtual const RegMask &out_RegMask() const;
+ virtual uint match_edge(uint idx) const;
+
+ static bool needs_polling_address_input();
+
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallNode---------------------------------------
+// Call nodes now subsume the function of debug nodes at callsites, so they
+// contain the functionality of a full scope chain of debug nodes.
+class CallNode : public SafePointNode {
+public:
+ const TypeFunc *_tf; // Function type
+ address _entry_point; // Address of method being called
+ float _cnt; // Estimate of number of times called
+ PointsToNode::EscapeState _escape_state;
+
+ CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type)
+ : SafePointNode(tf->domain()->cnt(), NULL, adr_type),
+ _tf(tf),
+ _entry_point(addr),
+ _cnt(COUNT_UNKNOWN)
+ {
+ init_class_id(Class_Call);
+ init_flags(Flag_is_Call);
+ _escape_state = PointsToNode::UnknownEscape;
+ }
+
+ const TypeFunc* tf() const { return _tf; }
+ const address entry_point() const { return _entry_point; }
+ const float cnt() const { return _cnt; }
+
+ void set_tf(const TypeFunc* tf) { _tf = tf; }
+ void set_entry_point(address p) { _entry_point = p; }
+ void set_cnt(float c) { _cnt = c; }
+
+ virtual const Type *bottom_type() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase ) { return this; }
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const = 0;
+ virtual void calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const;
+ virtual Node *match( const ProjNode *proj, const Matcher *m );
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ // Are we guaranteed that this node is a safepoint? Not true for leaf calls and
+ // for some macro nodes whose expansion does not have a safepoint on the fast path.
+ virtual bool guaranteed_safepoint() { return true; }
+ // For macro nodes, the JVMState gets modified during expansion, so when cloning
+ // the node the JVMState must be cloned.
+ virtual void clone_jvms() { } // default is not to clone
+
+ virtual uint match_edge(uint idx) const;
+
+#ifndef PRODUCT
+ virtual void dump_req() const;
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallJavaNode-----------------------------------
+// Make a static or dynamic subroutine call node using Java calling
+// convention. (The "Java" calling convention is the compiler's calling
+// convention, as opposed to the interpreter's or that of native C.)
+class CallJavaNode : public CallNode {
+protected:
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+
+ bool _optimized_virtual;
+ ciMethod* _method; // Method being direct called
+public:
+ const int _bci; // Byte Code Index of call byte code
+ CallJavaNode(const TypeFunc* tf , address addr, ciMethod* method, int bci)
+ : CallNode(tf, addr, TypePtr::BOTTOM),
+ _method(method), _bci(bci), _optimized_virtual(false)
+ {
+ init_class_id(Class_CallJava);
+ }
+
+ virtual int Opcode() const;
+ ciMethod* method() const { return _method; }
+ void set_method(ciMethod *m) { _method = m; }
+ void set_optimized_virtual(bool f) { _optimized_virtual = f; }
+ bool is_optimized_virtual() const { return _optimized_virtual; }
+
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallStaticJavaNode-----------------------------
+// Make a direct subroutine call using Java calling convention (for static
+// calls and optimized virtual calls, plus calls to wrappers for run-time
+// routines); generates static stub.
+class CallStaticJavaNode : public CallJavaNode {
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+public:
+ CallStaticJavaNode(const TypeFunc* tf, address addr, ciMethod* method, int bci)
+ : CallJavaNode(tf, addr, method, bci), _name(NULL) {
+ init_class_id(Class_CallStaticJava);
+ }
+ CallStaticJavaNode(const TypeFunc* tf, address addr, const char* name, int bci,
+ const TypePtr* adr_type)
+ : CallJavaNode(tf, addr, NULL, bci), _name(name) {
+ init_class_id(Class_CallStaticJava);
+ // This node calls a runtime stub, which often has narrow memory effects.
+ _adr_type = adr_type;
+ }
+ const char *_name; // Runtime wrapper name
+
+ // If this is an uncommon trap, return the request code, else zero.
+ int uncommon_trap_request() const;
+ static int extract_uncommon_trap_request(const Node* call);
+
+ virtual int Opcode() const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallDynamicJavaNode----------------------------
+// Make a dispatched call using Java calling convention.
+class CallDynamicJavaNode : public CallJavaNode {
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+public:
+ CallDynamicJavaNode( const TypeFunc *tf , address addr, ciMethod* method, int vtable_index, int bci ) : CallJavaNode(tf,addr,method,bci), _vtable_index(vtable_index) {
+ init_class_id(Class_CallDynamicJava);
+ }
+
+ int _vtable_index;
+ virtual int Opcode() const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallRuntimeNode--------------------------------
+// Make a direct subroutine call node into compiled C++ code.
+class CallRuntimeNode : public CallNode {
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+public:
+ CallRuntimeNode(const TypeFunc* tf, address addr, const char* name,
+ const TypePtr* adr_type)
+ : CallNode(tf, addr, adr_type),
+ _name(name)
+ {
+ init_class_id(Class_CallRuntime);
+ }
+
+ const char *_name; // Printable name, if _method is NULL
+ virtual int Opcode() const;
+ virtual void calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const;
+
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallLeafNode-----------------------------------
+// Make a direct subroutine call node into compiled C++ code, without
+// safepoints
+class CallLeafNode : public CallRuntimeNode {
+public:
+ CallLeafNode(const TypeFunc* tf, address addr, const char* name,
+ const TypePtr* adr_type)
+ : CallRuntimeNode(tf, addr, name, adr_type)
+ {
+ init_class_id(Class_CallLeaf);
+ }
+ virtual int Opcode() const;
+ virtual bool guaranteed_safepoint() { return false; }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallLeafNoFPNode-------------------------------
+// CallLeafNode, not using floating point or using it in the same manner as
+// the generated code
+class CallLeafNoFPNode : public CallLeafNode {
+public:
+ CallLeafNoFPNode(const TypeFunc* tf, address addr, const char* name,
+ const TypePtr* adr_type)
+ : CallLeafNode(tf, addr, name, adr_type)
+ {
+ }
+ virtual int Opcode() const;
+};
+
+
+//------------------------------Allocate---------------------------------------
+// High-level memory allocation
+//
+// AllocateNode and AllocateArrayNode are subclasses of CallNode because they will
+// get expanded into a code sequence containing a call. Unlike other CallNodes,
+// they have 2 memory projections and 2 i_o projections (which are distinguished by
+// the _is_io_use flag in the projection.) This is needed when expanding the node in
+// order to differentiate the uses of the projection on the normal control path from
+// those on the exception return path.
+//
+class AllocateNode : public CallNode {
+public:
+ enum {
+ // Output:
+ RawAddress = TypeFunc::Parms, // the newly-allocated raw address
+ // Inputs:
+ AllocSize = TypeFunc::Parms, // size (in bytes) of the new object
+ KlassNode, // type (maybe dynamic) of the obj.
+ InitialTest, // slow-path test (may be constant)
+ ALength, // array length (or TOP if none)
+ ParmLimit
+ };
+
+ static const TypeFunc* alloc_type() {
+ const Type** fields = TypeTuple::fields(ParmLimit - TypeFunc::Parms);
+ fields[AllocSize] = TypeInt::POS;
+ fields[KlassNode] = TypeInstPtr::NOTNULL;
+ fields[InitialTest] = TypeInt::BOOL;
+ fields[ALength] = TypeInt::INT; // length (can be a bad length)
+
+ const TypeTuple *domain = TypeTuple::make(ParmLimit, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ return TypeFunc::make(domain, range);
+ }
+
+ virtual uint size_of() const; // Size is bigger
+ AllocateNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio,
+ Node *size, Node *klass_node, Node *initial_test);
+ // Expansion modifies the JVMState, so we need to clone it
+ virtual void clone_jvms() {
+ set_jvms(jvms()->clone_deep(Compile::current()));
+ }
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegP; }
+ virtual bool guaranteed_safepoint() { return false; }
+
+ // Pattern-match a possible usage of AllocateNode.
+ // Return null if no allocation is recognized.
+ // The operand is the pointer produced by the (possible) allocation.
+ // It must be a projection of the Allocate or its subsequent CastPP.
+ // (Note: This function is defined in file graphKit.cpp, near
+ // GraphKit::new_instance/new_array, whose output it recognizes.)
+ // The 'ptr' may not have an offset unless the 'offset' argument is given.
+ static AllocateNode* Ideal_allocation(Node* ptr, PhaseTransform* phase);
+
+ // Fancy version which uses AddPNode::Ideal_base_and_offset to strip
+ // an offset, which is reported back to the caller.
+ // (Note: AllocateNode::Ideal_allocation is defined in graphKit.cpp.)
+ static AllocateNode* Ideal_allocation(Node* ptr, PhaseTransform* phase,
+ intptr_t& offset);
+
+ // Dig the klass operand out of a (possible) allocation site.
+ static Node* Ideal_klass(Node* ptr, PhaseTransform* phase) {
+ AllocateNode* allo = Ideal_allocation(ptr, phase);
+ return (allo == NULL) ? NULL : allo->in(KlassNode);
+ }
+
+ // Conservatively small estimate of offset of first non-header byte.
+ int minimum_header_size() {
+ return is_AllocateArray() ? sizeof(arrayOopDesc) : sizeof(oopDesc);
+ }
+
+ // Return the corresponding initialization barrier (or null if none).
+ // Walks out edges to find it...
+ // (Note: Both InitializeNode::allocation and AllocateNode::initialization
+ // are defined in graphKit.cpp, which sets up the bidirectional relation.)
+ InitializeNode* initialization();
+
+ // Convenience for initialization->maybe_set_complete(phase)
+ bool maybe_set_complete(PhaseGVN* phase);
+};
+
+//------------------------------AllocateArray---------------------------------
+//
+// High-level array allocation
+//
+class AllocateArrayNode : public AllocateNode {
+public:
+ AllocateArrayNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio,
+ Node* size, Node* klass_node, Node* initial_test,
+ Node* count_val
+ )
+ : AllocateNode(C, atype, ctrl, mem, abio, size, klass_node,
+ initial_test)
+ {
+ init_class_id(Class_AllocateArray);
+ set_req(AllocateNode::ALength, count_val);
+ }
+ virtual int Opcode() const;
+ virtual uint size_of() const; // Size is bigger
+
+ // Pattern-match a possible usage of AllocateArrayNode.
+ // Return null if no allocation is recognized.
+ static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) {
+ AllocateNode* allo = Ideal_allocation(ptr, phase);
+ return (allo == NULL || !allo->is_AllocateArray())
+ ? NULL : allo->as_AllocateArray();
+ }
+
+ // Dig the length operand out of a (possible) array allocation site.
+ static Node* Ideal_length(Node* ptr, PhaseTransform* phase) {
+ AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase);
+ return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength);
+ }
+};
+
+//------------------------------AbstractLockNode-----------------------------------
+class AbstractLockNode: public CallNode {
+private:
+ bool _eliminate; // indicates this lock can be safely eliminated
+#ifndef PRODUCT
+ NamedCounter* _counter;
+#endif
+
+protected:
+ // helper functions for lock elimination
+ //
+
+ bool find_matching_unlock(const Node* ctrl, LockNode* lock,
+ GrowableArray<AbstractLockNode*> &lock_ops);
+ bool find_lock_and_unlock_through_if(Node* node, LockNode* lock,
+ GrowableArray<AbstractLockNode*> &lock_ops);
+ bool find_unlocks_for_region(const RegionNode* region, LockNode* lock,
+ GrowableArray<AbstractLockNode*> &lock_ops);
+ LockNode *find_matching_lock(UnlockNode* unlock);
+
+
+public:
+ AbstractLockNode(const TypeFunc *tf)
+ : CallNode(tf, NULL, TypeRawPtr::BOTTOM),
+ _eliminate(false)
+ {
+#ifndef PRODUCT
+ _counter = NULL;
+#endif
+ }
+ virtual int Opcode() const = 0;
+ Node * obj_node() const {return in(TypeFunc::Parms + 0); }
+ Node * box_node() const {return in(TypeFunc::Parms + 1); }
+ Node * fastlock_node() const {return in(TypeFunc::Parms + 2); }
+ const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
+
+ virtual uint size_of() const { return sizeof(*this); }
+
+ bool is_eliminated() {return _eliminate; }
+ // mark node as eliminated and update the counter if there is one
+ void set_eliminated();
+
+#ifndef PRODUCT
+ void create_lock_counter(JVMState* s);
+ NamedCounter* counter() const { return _counter; }
+#endif
+};
+
+//------------------------------Lock---------------------------------------
+// High-level lock operation
+//
+// This is a subclass of CallNode because it is a macro node which gets expanded
+// into a code sequence containing a call. This node takes 3 "parameters":
+// 0 - object to lock
+// 1 - a BoxLockNode
+// 2 - a FastLockNode
+//
+class LockNode : public AbstractLockNode {
+public:
+
+ static const TypeFunc *lock_type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(3);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked
+ fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock
+ fields[TypeFunc::Parms+2] = TypeInt::BOOL; // FastLock
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+ return TypeFunc::make(domain,range);
+ }
+
+ virtual int Opcode() const;
+ virtual uint size_of() const; // Size is bigger
+ LockNode(Compile* C, const TypeFunc *tf) : AbstractLockNode( tf ) {
+ init_class_id(Class_Lock);
+ init_flags(Flag_is_macro);
+ C->add_macro_node(this);
+ }
+ virtual bool guaranteed_safepoint() { return false; }
+
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ // Expansion modifies the JVMState, so we need to clone it
+ virtual void clone_jvms() {
+ set_jvms(jvms()->clone_deep(Compile::current()));
+ }
+};
+
+//------------------------------Unlock---------------------------------------
+// High-level unlock operation
+class UnlockNode : public AbstractLockNode {
+public:
+ virtual int Opcode() const;
+ virtual uint size_of() const; // Size is bigger
+ UnlockNode(Compile* C, const TypeFunc *tf) : AbstractLockNode( tf ) {
+ init_class_id(Class_Unlock);
+ init_flags(Flag_is_macro);
+ C->add_macro_node(this);
+ }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ // unlock is never a safepoint
+ virtual bool guaranteed_safepoint() { return false; }
+};
diff --git a/src/share/vm/opto/cfgnode.cpp b/src/share/vm/opto/cfgnode.cpp
new file mode 100644
index 000000000..1c91c6be8
--- /dev/null
+++ b/src/share/vm/opto/cfgnode.cpp
@@ -0,0 +1,1954 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_cfgnode.cpp.incl"
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute the type of the RegionNode.
+const Type *RegionNode::Value( PhaseTransform *phase ) const {
+ for( uint i=1; i<req(); ++i ) { // For all paths in
+ Node *n = in(i); // Get Control source
+ if( !n ) continue; // Missing inputs are TOP
+ if( phase->type(n) == Type::CONTROL )
+ return Type::CONTROL;
+ }
+ return Type::TOP; // All paths dead? Then so are we
+}
+
+//------------------------------Identity---------------------------------------
+// Check for Region being Identity.
+Node *RegionNode::Identity( PhaseTransform *phase ) {
+ // Cannot have Region be an identity, even if it has only 1 input.
+ // Phi users cannot have their Region input folded away for them,
+ // since they need to select the proper data input
+ return this;
+}
+
+//------------------------------merge_region-----------------------------------
+// If a Region flows into a Region, merge into one big happy merge. This is
+// hard to do if there is stuff that has to happen
+static Node *merge_region(RegionNode *region, PhaseGVN *phase) {
+ if( region->Opcode() != Op_Region ) // Do not do to LoopNodes
+ return NULL;
+ Node *progress = NULL; // Progress flag
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+
+ uint rreq = region->req();
+ for( uint i = 1; i < rreq; i++ ) {
+ Node *r = region->in(i);
+ if( r && r->Opcode() == Op_Region && // Found a region?
+ r->in(0) == r && // Not already collapsed?
+ r != region && // Avoid stupid situations
+ r->outcnt() == 2 ) { // Self user and 'region' user only?
+ assert(!r->as_Region()->has_phi(), "no phi users");
+ if( !progress ) { // No progress
+ if (region->has_phi()) {
+ return NULL; // Only flatten if no Phi users
+ // igvn->hash_delete( phi );
+ }
+ igvn->hash_delete( region );
+ progress = region; // Making progress
+ }
+ igvn->hash_delete( r );
+
+ // Append inputs to 'r' onto 'region'
+ for( uint j = 1; j < r->req(); j++ ) {
+ // Move an input from 'r' to 'region'
+ region->add_req(r->in(j));
+ r->set_req(j, phase->C->top());
+ // Update phis of 'region'
+ //for( uint k = 0; k < max; k++ ) {
+ // Node *phi = region->out(k);
+ // if( phi->is_Phi() ) {
+ // phi->add_req(phi->in(i));
+ // }
+ //}
+
+ rreq++; // One more input to Region
+ } // Found a region to merge into Region
+ // Clobber pointer to the now dead 'r'
+ region->set_req(i, phase->C->top());
+ }
+ }
+
+ return progress;
+}
+
+
+
+//--------------------------------has_phi--------------------------------------
+// Helper function: Return any PhiNode that uses this region or NULL
+PhiNode* RegionNode::has_phi() const {
+ for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+ Node* phi = fast_out(i);
+ if (phi->is_Phi()) { // Check for Phi users
+ assert(phi->in(0) == (Node*)this, "phi uses region only via in(0)");
+ return phi->as_Phi(); // this one is good enough
+ }
+ }
+
+ return NULL;
+}
+
+
+//-----------------------------has_unique_phi----------------------------------
+// Helper function: Return the only PhiNode that uses this region or NULL
+PhiNode* RegionNode::has_unique_phi() const {
+ // Check that only one use is a Phi
+ PhiNode* only_phi = NULL;
+ for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+ Node* phi = fast_out(i);
+ if (phi->is_Phi()) { // Check for Phi users
+ assert(phi->in(0) == (Node*)this, "phi uses region only via in(0)");
+ if (only_phi == NULL) {
+ only_phi = phi->as_Phi();
+ } else {
+ return NULL; // multiple phis
+ }
+ }
+ }
+
+ return only_phi;
+}
+
+
+//------------------------------check_phi_clipping-----------------------------
+// Helper function for RegionNode's identification of FP clipping
+// Check inputs to the Phi
+static bool check_phi_clipping( PhiNode *phi, ConNode * &min, uint &min_idx, ConNode * &max, uint &max_idx, Node * &val, uint &val_idx ) {
+ min = NULL;
+ max = NULL;
+ val = NULL;
+ min_idx = 0;
+ max_idx = 0;
+ val_idx = 0;
+ uint phi_max = phi->req();
+ if( phi_max == 4 ) {
+ for( uint j = 1; j < phi_max; ++j ) {
+ Node *n = phi->in(j);
+ int opcode = n->Opcode();
+ switch( opcode ) {
+ case Op_ConI:
+ {
+ if( min == NULL ) {
+ min = n->Opcode() == Op_ConI ? (ConNode*)n : NULL;
+ min_idx = j;
+ } else {
+ max = n->Opcode() == Op_ConI ? (ConNode*)n : NULL;
+ max_idx = j;
+ if( min->get_int() > max->get_int() ) {
+ // Swap min and max
+ ConNode *temp;
+ uint temp_idx;
+ temp = min; min = max; max = temp;
+ temp_idx = min_idx; min_idx = max_idx; max_idx = temp_idx;
+ }
+ }
+ }
+ break;
+ default:
+ {
+ val = n;
+ val_idx = j;
+ }
+ break;
+ }
+ }
+ }
+ return ( min && max && val && (min->get_int() <= 0) && (max->get_int() >=0) );
+}
+
+
+//------------------------------check_if_clipping------------------------------
+// Helper function for RegionNode's identification of FP clipping
+// Check that inputs to Region come from two IfNodes,
+//
+// If
+// False True
+// If |
+// False True |
+// | | |
+// RegionNode_inputs
+//
+static bool check_if_clipping( const RegionNode *region, IfNode * &bot_if, IfNode * &top_if ) {
+ top_if = NULL;
+ bot_if = NULL;
+
+ // Check control structure above RegionNode for (if ( if ) )
+ Node *in1 = region->in(1);
+ Node *in2 = region->in(2);
+ Node *in3 = region->in(3);
+ // Check that all inputs are projections
+ if( in1->is_Proj() && in2->is_Proj() && in3->is_Proj() ) {
+ Node *in10 = in1->in(0);
+ Node *in20 = in2->in(0);
+ Node *in30 = in3->in(0);
+ // Check that #1 and #2 are ifTrue and ifFalse from same If
+ if( in10 != NULL && in10->is_If() &&
+ in20 != NULL && in20->is_If() &&
+ in30 != NULL && in30->is_If() && in10 == in20 &&
+ (in1->Opcode() != in2->Opcode()) ) {
+ Node *in100 = in10->in(0);
+ Node *in1000 = (in100 != NULL && in100->is_Proj()) ? in100->in(0) : NULL;
+ // Check that control for in10 comes from other branch of IF from in3
+ if( in1000 != NULL && in1000->is_If() &&
+ in30 == in1000 && (in3->Opcode() != in100->Opcode()) ) {
+ // Control pattern checks
+ top_if = (IfNode*)in1000;
+ bot_if = (IfNode*)in10;
+ }
+ }
+ }
+
+ return (top_if != NULL);
+}
+
+
+//------------------------------check_convf2i_clipping-------------------------
+// Helper function for RegionNode's identification of FP clipping
+// Verify that the value input to the phi comes from "ConvF2I; LShift; RShift"
+static bool check_convf2i_clipping( PhiNode *phi, uint idx, ConvF2INode * &convf2i, Node *min, Node *max) {
+ convf2i = NULL;
+
+ // Check for the RShiftNode
+ Node *rshift = phi->in(idx);
+ assert( rshift, "Previous checks ensure phi input is present");
+ if( rshift->Opcode() != Op_RShiftI ) { return false; }
+
+ // Check for the LShiftNode
+ Node *lshift = rshift->in(1);
+ assert( lshift, "Previous checks ensure phi input is present");
+ if( lshift->Opcode() != Op_LShiftI ) { return false; }
+
+ // Check for the ConvF2INode
+ Node *conv = lshift->in(1);
+ if( conv->Opcode() != Op_ConvF2I ) { return false; }
+
+ // Check that shift amounts are only to get sign bits set after F2I
+ jint max_cutoff = max->get_int();
+ jint min_cutoff = min->get_int();
+ jint left_shift = lshift->in(2)->get_int();
+ jint right_shift = rshift->in(2)->get_int();
+ jint max_post_shift = nth_bit(BitsPerJavaInteger - left_shift - 1);
+ if( left_shift != right_shift ||
+ 0 > left_shift || left_shift >= BitsPerJavaInteger ||
+ max_post_shift < max_cutoff ||
+ max_post_shift < -min_cutoff ) {
+ // Shifts are necessary but current transformation eliminates them
+ return false;
+ }
+
+ // OK to return the result of ConvF2I without shifting
+ convf2i = (ConvF2INode*)conv;
+ return true;
+}
+
+
+//------------------------------check_compare_clipping-------------------------
+// Helper function for RegionNode's identification of FP clipping
+static bool check_compare_clipping( bool less_than, IfNode *iff, ConNode *limit, Node * & input ) {
+ Node *i1 = iff->in(1);
+ if ( !i1->is_Bool() ) { return false; }
+ BoolNode *bool1 = i1->as_Bool();
+ if( less_than && bool1->_test._test != BoolTest::le ) { return false; }
+ else if( !less_than && bool1->_test._test != BoolTest::lt ) { return false; }
+ const Node *cmpF = bool1->in(1);
+ if( cmpF->Opcode() != Op_CmpF ) { return false; }
+ // Test that the float value being compared against
+ // is equivalent to the int value used as a limit
+ Node *nodef = cmpF->in(2);
+ if( nodef->Opcode() != Op_ConF ) { return false; }
+ jfloat conf = nodef->getf();
+ jint coni = limit->get_int();
+ if( ((int)conf) != coni ) { return false; }
+ input = cmpF->in(1);
+ return true;
+}
+
+//------------------------------is_unreachable_region--------------------------
+// Find if the Region node is reachable from the root.
+bool RegionNode::is_unreachable_region(PhaseGVN *phase) const {
+ assert(req() == 2, "");
+
+ // First, cut the simple case of fallthrough region when NONE of
+ // region's phis references itself directly or through a data node.
+ uint max = outcnt();
+ uint i;
+ for (i = 0; i < max; i++) {
+ Node* phi = raw_out(i);
+ if (phi != NULL && phi->is_Phi()) {
+ assert(phase->eqv(phi->in(0), this) && phi->req() == 2, "");
+ if (phi->outcnt() == 0)
+ continue; // Safe case - no loops
+ if (phi->outcnt() == 1) {
+ Node* u = phi->raw_out(0);
+ // Skip if only one use is an other Phi or Call or Uncommon trap.
+ // It is safe to consider this case as fallthrough.
+ if (u != NULL && (u->is_Phi() || u->is_CFG()))
+ continue;
+ }
+ // Check when phi references itself directly or through an other node.
+ if (phi->as_Phi()->simple_data_loop_check(phi->in(1)) >= PhiNode::Unsafe)
+ break; // Found possible unsafe data loop.
+ }
+ }
+ if (i >= max)
+ return false; // An unsafe case was NOT found - don't need graph walk.
+
+ // Unsafe case - check if the Region node is reachable from root.
+ ResourceMark rm;
+
+ Arena *a = Thread::current()->resource_area();
+ Node_List nstack(a);
+ VectorSet visited(a);
+
+ // Mark all control nodes reachable from root outputs
+ Node *n = (Node*)phase->C->root();
+ nstack.push(n);
+ visited.set(n->_idx);
+ while (nstack.size() != 0) {
+ n = nstack.pop();
+ uint max = n->outcnt();
+ for (uint i = 0; i < max; i++) {
+ Node* m = n->raw_out(i);
+ if (m != NULL && m->is_CFG()) {
+ if (phase->eqv(m, this)) {
+ return false; // We reached the Region node - it is not dead.
+ }
+ if (!visited.test_set(m->_idx))
+ nstack.push(m);
+ }
+ }
+ }
+
+ return true; // The Region node is unreachable - it is dead.
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Must preserve
+// the CFG, but we can still strip out dead paths.
+Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( !can_reshape && !in(0) ) return NULL; // Already degraded to a Copy
+ assert(!in(0) || !in(0)->is_Root(), "not a specially hidden merge");
+
+ // Check for RegionNode with no Phi users and both inputs come from either
+ // arm of the same IF. If found, then the control-flow split is useless.
+ bool has_phis = false;
+ if (can_reshape) { // Need DU info to check for Phi users
+ has_phis = (has_phi() != NULL); // Cache result
+ if (!has_phis) { // No Phi users? Nothing merging?
+ for (uint i = 1; i < req()-1; i++) {
+ Node *if1 = in(i);
+ if( !if1 ) continue;
+ Node *iff = if1->in(0);
+ if( !iff || !iff->is_If() ) continue;
+ for( uint j=i+1; j<req(); j++ ) {
+ if( in(j) && in(j)->in(0) == iff &&
+ if1->Opcode() != in(j)->Opcode() ) {
+ // Add the IF Projections to the worklist. They (and the IF itself)
+ // will be eliminated if dead.
+ phase->is_IterGVN()->add_users_to_worklist(iff);
+ set_req(i, iff->in(0));// Skip around the useless IF diamond
+ set_req(j, NULL);
+ return this; // Record progress
+ }
+ }
+ }
+ }
+ }
+
+ // Remove TOP or NULL input paths. If only 1 input path remains, this Region
+ // degrades to a copy.
+ bool add_to_worklist = false;
+ int cnt = 0; // Count of values merging
+ DEBUG_ONLY( int cnt_orig = req(); ) // Save original inputs count
+ int del_it = 0; // The last input path we delete
+ // For all inputs...
+ for( uint i=1; i<req(); ++i ){// For all paths in
+ Node *n = in(i); // Get the input
+ if( n != NULL ) {
+ // Remove useless control copy inputs
+ if( n->is_Region() && n->as_Region()->is_copy() ) {
+ set_req(i, n->nonnull_req());
+ i--;
+ continue;
+ }
+ if( n->is_Proj() ) { // Remove useless rethrows
+ Node *call = n->in(0);
+ if (call->is_Call() && call->as_Call()->entry_point() == OptoRuntime::rethrow_stub()) {
+ set_req(i, call->in(0));
+ i--;
+ continue;
+ }
+ }
+ if( phase->type(n) == Type::TOP ) {
+ set_req(i, NULL); // Ignore TOP inputs
+ i--;
+ continue;
+ }
+ cnt++; // One more value merging
+
+ } else if (can_reshape) { // Else found dead path with DU info
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ del_req(i); // Yank path from self
+ del_it = i;
+ uint max = outcnt();
+ DUIterator j;
+ bool progress = true;
+ while(progress) { // Need to establish property over all users
+ progress = false;
+ for (j = outs(); has_out(j); j++) {
+ Node *n = out(j);
+ if( n->req() != req() && n->is_Phi() ) {
+ assert( n->in(0) == this, "" );
+ igvn->hash_delete(n); // Yank from hash before hacking edges
+ n->set_req_X(i,NULL,igvn);// Correct DU info
+ n->del_req(i); // Yank path from Phis
+ if( max != outcnt() ) {
+ progress = true;
+ j = refresh_out_pos(j);
+ max = outcnt();
+ }
+ }
+ }
+ }
+ add_to_worklist = true;
+ i--;
+ }
+ }
+
+ if (can_reshape && cnt == 1) {
+ // Is it dead loop?
+ // If it is LoopNopde it had 2 (+1 itself) inputs and
+ // one of them was cut. The loop is dead if it was EntryContol.
+ assert(!this->is_Loop() || cnt_orig == 3, "Loop node should have 3 inputs");
+ if (this->is_Loop() && del_it == LoopNode::EntryControl ||
+ !this->is_Loop() && has_phis && is_unreachable_region(phase)) {
+ // Yes, the region will be removed during the next step below.
+ // Cut the backedge input and remove phis since no data paths left.
+ // We don't cut outputs to other nodes here since we need to put them
+ // on the worklist.
+ del_req(1);
+ cnt = 0;
+ assert( req() == 1, "no more inputs expected" );
+ uint max = outcnt();
+ bool progress = true;
+ Node *top = phase->C->top();
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ DUIterator j;
+ while(progress) {
+ progress = false;
+ for (j = outs(); has_out(j); j++) {
+ Node *n = out(j);
+ if( n->is_Phi() ) {
+ assert( igvn->eqv(n->in(0), this), "" );
+ assert( n->req() == 2 && n->in(1) != NULL, "Only one data input expected" );
+ // Break dead loop data path.
+ // Eagerly replace phis with top to avoid phis copies generation.
+ igvn->add_users_to_worklist(n);
+ igvn->hash_delete(n); // Yank from hash before hacking edges
+ igvn->subsume_node(n, top);
+ if( max != outcnt() ) {
+ progress = true;
+ j = refresh_out_pos(j);
+ max = outcnt();
+ }
+ }
+ }
+ }
+ add_to_worklist = true;
+ }
+ }
+ if (add_to_worklist) {
+ phase->is_IterGVN()->add_users_to_worklist(this); // Revisit collapsed Phis
+ }
+
+ if( cnt <= 1 ) { // Only 1 path in?
+ set_req(0, NULL); // Null control input for region copy
+ if( cnt == 0 && !can_reshape) { // Parse phase - leave the node as it is.
+ // No inputs or all inputs are NULL.
+ return NULL;
+ } else if (can_reshape) { // Optimization phase - remove the node
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ Node *parent_ctrl;
+ if( cnt == 0 ) {
+ assert( req() == 1, "no inputs expected" );
+ // During IGVN phase such region will be subsumed by TOP node
+ // so region's phis will have TOP as control node.
+ // Kill phis here to avoid it. PhiNode::is_copy() will be always false.
+ // Also set other user's input to top.
+ parent_ctrl = phase->C->top();
+ } else {
+ // The fallthrough case since we already checked dead loops above.
+ parent_ctrl = in(1);
+ assert(parent_ctrl != NULL, "Region is a copy of some non-null control");
+ assert(!igvn->eqv(parent_ctrl, this), "Close dead loop");
+ }
+ if (!add_to_worklist)
+ igvn->add_users_to_worklist(this); // Check for further allowed opts
+ for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
+ Node* n = last_out(i);
+ igvn->hash_delete(n); // Remove from worklist before modifying edges
+ if( n->is_Phi() ) { // Collapse all Phis
+ // Eagerly replace phis to avoid copies generation.
+ igvn->add_users_to_worklist(n);
+ igvn->hash_delete(n); // Yank from hash before hacking edges
+ if( cnt == 0 ) {
+ assert( n->req() == 1, "No data inputs expected" );
+ igvn->subsume_node(n, parent_ctrl); // replaced by top
+ } else {
+ assert( n->req() == 2 && n->in(1) != NULL, "Only one data input expected" );
+ Node* in1 = n->in(1); // replaced by unique input
+ if( n->as_Phi()->is_unsafe_data_reference(in1) )
+ in1 = phase->C->top(); // replaced by top
+ igvn->subsume_node(n, in1);
+ }
+ }
+ else if( n->is_Region() ) { // Update all incoming edges
+ assert( !igvn->eqv(n, this), "Must be removed from DefUse edges");
+ uint uses_found = 0;
+ for( uint k=1; k < n->req(); k++ ) {
+ if( n->in(k) == this ) {
+ n->set_req(k, parent_ctrl);
+ uses_found++;
+ }
+ }
+ if( uses_found > 1 ) { // (--i) done at the end of the loop.
+ i -= (uses_found - 1);
+ }
+ }
+ else {
+ assert( igvn->eqv(n->in(0), this), "Expect RegionNode to be control parent");
+ n->set_req(0, parent_ctrl);
+ }
+#ifdef ASSERT
+ for( uint k=0; k < n->req(); k++ ) {
+ assert( !igvn->eqv(n->in(k), this), "All uses of RegionNode should be gone");
+ }
+#endif
+ }
+ // Remove the RegionNode itself from DefUse info
+ igvn->remove_dead_node(this);
+ return NULL;
+ }
+ return this; // Record progress
+ }
+
+
+ // If a Region flows into a Region, merge into one big happy merge.
+ if (can_reshape) {
+ Node *m = merge_region(this, phase);
+ if (m != NULL) return m;
+ }
+
+ // Check if this region is the root of a clipping idiom on floats
+ if( ConvertFloat2IntClipping && can_reshape && req() == 4 ) {
+ // Check that only one use is a Phi and that it simplifies to two constants +
+ PhiNode* phi = has_unique_phi();
+ if (phi != NULL) { // One Phi user
+ // Check inputs to the Phi
+ ConNode *min;
+ ConNode *max;
+ Node *val;
+ uint min_idx;
+ uint max_idx;
+ uint val_idx;
+ if( check_phi_clipping( phi, min, min_idx, max, max_idx, val, val_idx ) ) {
+ IfNode *top_if;
+ IfNode *bot_if;
+ if( check_if_clipping( this, bot_if, top_if ) ) {
+ // Control pattern checks, now verify compares
+ Node *top_in = NULL; // value being compared against
+ Node *bot_in = NULL;
+ if( check_compare_clipping( true, bot_if, min, bot_in ) &&
+ check_compare_clipping( false, top_if, max, top_in ) ) {
+ if( bot_in == top_in ) {
+ PhaseIterGVN *gvn = phase->is_IterGVN();
+ assert( gvn != NULL, "Only had DefUse info in IterGVN");
+ // Only remaining check is that bot_in == top_in == (Phi's val + mods)
+
+ // Check for the ConvF2INode
+ ConvF2INode *convf2i;
+ if( check_convf2i_clipping( phi, val_idx, convf2i, min, max ) &&
+ convf2i->in(1) == bot_in ) {
+ // Matched pattern, including LShiftI; RShiftI, replace with integer compares
+ // max test
+ Node *cmp = gvn->register_new_node_with_optimizer(new (phase->C, 3) CmpINode( convf2i, min ));
+ Node *boo = gvn->register_new_node_with_optimizer(new (phase->C, 2) BoolNode( cmp, BoolTest::lt ));
+ IfNode *iff = (IfNode*)gvn->register_new_node_with_optimizer(new (phase->C, 2) IfNode( top_if->in(0), boo, PROB_UNLIKELY_MAG(5), top_if->_fcnt ));
+ Node *if_min= gvn->register_new_node_with_optimizer(new (phase->C, 1) IfTrueNode (iff));
+ Node *ifF = gvn->register_new_node_with_optimizer(new (phase->C, 1) IfFalseNode(iff));
+ // min test
+ cmp = gvn->register_new_node_with_optimizer(new (phase->C, 3) CmpINode( convf2i, max ));
+ boo = gvn->register_new_node_with_optimizer(new (phase->C, 2) BoolNode( cmp, BoolTest::gt ));
+ iff = (IfNode*)gvn->register_new_node_with_optimizer(new (phase->C, 2) IfNode( ifF, boo, PROB_UNLIKELY_MAG(5), bot_if->_fcnt ));
+ Node *if_max= gvn->register_new_node_with_optimizer(new (phase->C, 1) IfTrueNode (iff));
+ ifF = gvn->register_new_node_with_optimizer(new (phase->C, 1) IfFalseNode(iff));
+ // update input edges to region node
+ set_req_X( min_idx, if_min, gvn );
+ set_req_X( max_idx, if_max, gvn );
+ set_req_X( val_idx, ifF, gvn );
+ // remove unnecessary 'LShiftI; RShiftI' idiom
+ gvn->hash_delete(phi);
+ phi->set_req_X( val_idx, convf2i, gvn );
+ gvn->hash_find_insert(phi);
+ // Return transformed region node
+ return this;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+
+const RegMask &RegionNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+// Find the one non-null required input. RegionNode only
+Node *Node::nonnull_req() const {
+ assert( is_Region(), "" );
+ for( uint i = 1; i < _cnt; i++ )
+ if( in(i) )
+ return in(i);
+ ShouldNotReachHere();
+ return NULL;
+}
+
+
+//=============================================================================
+// note that these functions assume that the _adr_type field is flattened
+uint PhiNode::hash() const {
+ const Type* at = _adr_type;
+ return TypeNode::hash() + (at ? at->hash() : 0);
+}
+uint PhiNode::cmp( const Node &n ) const {
+ return TypeNode::cmp(n) && _adr_type == ((PhiNode&)n)._adr_type;
+}
+static inline
+const TypePtr* flatten_phi_adr_type(const TypePtr* at) {
+ if (at == NULL || at == TypePtr::BOTTOM) return at;
+ return Compile::current()->alias_type(at)->adr_type();
+}
+
+//----------------------------make---------------------------------------------
+// create a new phi with edges matching r and set (initially) to x
+PhiNode* PhiNode::make(Node* r, Node* x, const Type *t, const TypePtr* at) {
+ uint preds = r->req(); // Number of predecessor paths
+ assert(t != Type::MEMORY || at == flatten_phi_adr_type(at), "flatten at");
+ PhiNode* p = new (Compile::current(), preds) PhiNode(r, t, at);
+ for (uint j = 1; j < preds; j++) {
+ // Fill in all inputs, except those which the region does not yet have
+ if (r->in(j) != NULL)
+ p->init_req(j, x);
+ }
+ return p;
+}
+PhiNode* PhiNode::make(Node* r, Node* x) {
+ const Type* t = x->bottom_type();
+ const TypePtr* at = NULL;
+ if (t == Type::MEMORY) at = flatten_phi_adr_type(x->adr_type());
+ return make(r, x, t, at);
+}
+PhiNode* PhiNode::make_blank(Node* r, Node* x) {
+ const Type* t = x->bottom_type();
+ const TypePtr* at = NULL;
+ if (t == Type::MEMORY) at = flatten_phi_adr_type(x->adr_type());
+ return new (Compile::current(), r->req()) PhiNode(r, t, at);
+}
+
+
+//------------------------slice_memory-----------------------------------------
+// create a new phi with narrowed memory type
+PhiNode* PhiNode::slice_memory(const TypePtr* adr_type) const {
+ PhiNode* mem = (PhiNode*) clone();
+ *(const TypePtr**)&mem->_adr_type = adr_type;
+ // convert self-loops, or else we get a bad graph
+ for (uint i = 1; i < req(); i++) {
+ if ((const Node*)in(i) == this) mem->set_req(i, mem);
+ }
+ mem->verify_adr_type();
+ return mem;
+}
+
+//------------------------verify_adr_type--------------------------------------
+#ifdef ASSERT
+void PhiNode::verify_adr_type(VectorSet& visited, const TypePtr* at) const {
+ if (visited.test_set(_idx)) return; //already visited
+
+ // recheck constructor invariants:
+ verify_adr_type(false);
+
+ // recheck local phi/phi consistency:
+ assert(_adr_type == at || _adr_type == TypePtr::BOTTOM,
+ "adr_type must be consistent across phi nest");
+
+ // walk around
+ for (uint i = 1; i < req(); i++) {
+ Node* n = in(i);
+ if (n == NULL) continue;
+ const Node* np = in(i);
+ if (np->is_Phi()) {
+ np->as_Phi()->verify_adr_type(visited, at);
+ } else if (n->bottom_type() == Type::TOP
+ || (n->is_Mem() && n->in(MemNode::Address)->bottom_type() == Type::TOP)) {
+ // ignore top inputs
+ } else {
+ const TypePtr* nat = flatten_phi_adr_type(n->adr_type());
+ // recheck phi/non-phi consistency at leaves:
+ assert((nat != NULL) == (at != NULL), "");
+ assert(nat == at || nat == TypePtr::BOTTOM,
+ "adr_type must be consistent at leaves of phi nest");
+ }
+ }
+}
+
+// Verify a whole nest of phis rooted at this one.
+void PhiNode::verify_adr_type(bool recursive) const {
+ if (is_error_reported()) return; // muzzle asserts when debugging an error
+ if (Node::in_dump()) return; // muzzle asserts when printing
+
+ assert((_type == Type::MEMORY) == (_adr_type != NULL), "adr_type for memory phis only");
+
+ if (!VerifyAliases) return; // verify thoroughly only if requested
+
+ assert(_adr_type == flatten_phi_adr_type(_adr_type),
+ "Phi::adr_type must be pre-normalized");
+
+ if (recursive) {
+ VectorSet visited(Thread::current()->resource_area());
+ verify_adr_type(visited, _adr_type);
+ }
+}
+#endif
+
+
+//------------------------------Value------------------------------------------
+// Compute the type of the PhiNode
+const Type *PhiNode::Value( PhaseTransform *phase ) const {
+ Node *r = in(0); // RegionNode
+ if( !r ) // Copy or dead
+ return in(1) ? phase->type(in(1)) : Type::TOP;
+
+ // Note: During parsing, phis are often transformed before their regions.
+ // This means we have to use type_or_null to defend against untyped regions.
+ if( phase->type_or_null(r) == Type::TOP ) // Dead code?
+ return Type::TOP;
+
+ // Check for trip-counted loop. If so, be smarter.
+ CountedLoopNode *l = r->is_CountedLoop() ? r->as_CountedLoop() : NULL;
+ if( l && l->can_be_counted_loop(phase) &&
+ ((const Node*)l->phi() == this) ) { // Trip counted loop!
+ // protect against init_trip() or limit() returning NULL
+ const Node *init = l->init_trip();
+ const Node *limit = l->limit();
+ if( init != NULL && limit != NULL && l->stride_is_con() ) {
+ const TypeInt *lo = init ->bottom_type()->isa_int();
+ const TypeInt *hi = limit->bottom_type()->isa_int();
+ if( lo && hi ) { // Dying loops might have TOP here
+ int stride = l->stride_con();
+ if( stride < 0 ) { // Down-counter loop
+ const TypeInt *tmp = lo; lo = hi; hi = tmp;
+ stride = -stride;
+ }
+ if( lo->_hi < hi->_lo ) // Reversed endpoints are well defined :-(
+ return TypeInt::make(lo->_lo,hi->_hi,3);
+ }
+ }
+ }
+
+ // Until we have harmony between classes and interfaces in the type
+ // lattice, we must tread carefully around phis which implicitly
+ // convert the one to the other.
+ const TypeInstPtr* ttip = _type->isa_instptr();
+ bool is_intf = false;
+ if (ttip != NULL) {
+ ciKlass* k = ttip->klass();
+ if (k->is_loaded() && k->is_interface())
+ is_intf = true;
+ }
+
+ // Default case: merge all inputs
+ const Type *t = Type::TOP; // Merged type starting value
+ for (uint i = 1; i < req(); ++i) {// For all paths in
+ // Reachable control path?
+ if (r->in(i) && phase->type(r->in(i)) == Type::CONTROL) {
+ const Type* ti = phase->type(in(i));
+ // We assume that each input of an interface-valued Phi is a true
+ // subtype of that interface. This might not be true of the meet
+ // of all the input types. The lattice is not distributive in
+ // such cases. Ward off asserts in type.cpp by refusing to do
+ // meets between interfaces and proper classes.
+ const TypeInstPtr* tiip = ti->isa_instptr();
+ if (tiip) {
+ bool ti_is_intf = false;
+ ciKlass* k = tiip->klass();
+ if (k->is_loaded() && k->is_interface())
+ ti_is_intf = true;
+ if (is_intf != ti_is_intf)
+ { t = _type; break; }
+ }
+ t = t->meet(ti);
+ }
+ }
+
+ // The worst-case type (from ciTypeFlow) should be consistent with "t".
+ // That is, we expect that "t->higher_equal(_type)" holds true.
+ // There are various exceptions:
+ // - Inputs which are phis might in fact be widened unnecessarily.
+ // For example, an input might be a widened int while the phi is a short.
+ // - Inputs might be BotPtrs but this phi is dependent on a null check,
+ // and postCCP has removed the cast which encodes the result of the check.
+ // - The type of this phi is an interface, and the inputs are classes.
+ // - Value calls on inputs might produce fuzzy results.
+ // (Occurrences of this case suggest improvements to Value methods.)
+ //
+ // It is not possible to see Type::BOTTOM values as phi inputs,
+ // because the ciTypeFlow pre-pass produces verifier-quality types.
+ const Type* ft = t->filter(_type); // Worst case type
+
+#ifdef ASSERT
+ // The following logic has been moved into TypeOopPtr::filter.
+ const Type* jt = t->join(_type);
+ if( jt->empty() ) { // Emptied out???
+
+ // Check for evil case of 't' being a class and '_type' expecting an
+ // interface. This can happen because the bytecodes do not contain
+ // enough type info to distinguish a Java-level interface variable
+ // from a Java-level object variable. If we meet 2 classes which
+ // both implement interface I, but their meet is at 'j/l/O' which
+ // doesn't implement I, we have no way to tell if the result should
+ // be 'I' or 'j/l/O'. Thus we'll pick 'j/l/O'. If this then flows
+ // into a Phi which "knows" it's an Interface type we'll have to
+ // uplift the type.
+ if( !t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface() )
+ { assert(ft == _type, ""); } // Uplift to interface
+ // Otherwise it's something stupid like non-overlapping int ranges
+ // found on dying counted loops.
+ else
+ { assert(ft == Type::TOP, ""); } // Canonical empty value
+ }
+
+ else {
+
+ // If we have an interface-typed Phi and we narrow to a class type, the join
+ // should report back the class. However, if we have a J/L/Object
+ // class-typed Phi and an interface flows in, it's possible that the meet &
+ // join report an interface back out. This isn't possible but happens
+ // because the type system doesn't interact well with interfaces.
+ const TypeInstPtr *jtip = jt->isa_instptr();
+ if( jtip && ttip ) {
+ if( jtip->is_loaded() && jtip->klass()->is_interface() &&
+ ttip->is_loaded() && !ttip->klass()->is_interface() )
+ // Happens in a CTW of rt.jar, 320-341, no extra flags
+ { assert(ft == ttip->cast_to_ptr_type(jtip->ptr()), ""); jt = ft; }
+ }
+ if (jt != ft && jt->base() == ft->base()) {
+ if (jt->isa_int() &&
+ jt->is_int()->_lo == ft->is_int()->_lo &&
+ jt->is_int()->_hi == ft->is_int()->_hi)
+ jt = ft;
+ if (jt->isa_long() &&
+ jt->is_long()->_lo == ft->is_long()->_lo &&
+ jt->is_long()->_hi == ft->is_long()->_hi)
+ jt = ft;
+ }
+ if (jt != ft) {
+ tty->print("merge type: "); t->dump(); tty->cr();
+ tty->print("kill type: "); _type->dump(); tty->cr();
+ tty->print("join type: "); jt->dump(); tty->cr();
+ tty->print("filter type: "); ft->dump(); tty->cr();
+ }
+ assert(jt == ft, "");
+ }
+#endif //ASSERT
+
+ // Deal with conversion problems found in data loops.
+ ft = phase->saturate(ft, phase->type_or_null(this), _type);
+
+ return ft;
+}
+
+
+//------------------------------is_diamond_phi---------------------------------
+// Does this Phi represent a simple well-shaped diamond merge? Return the
+// index of the true path or 0 otherwise.
+int PhiNode::is_diamond_phi() const {
+ // Check for a 2-path merge
+ Node *region = in(0);
+ if( !region ) return 0;
+ if( region->req() != 3 ) return 0;
+ if( req() != 3 ) return 0;
+ // Check that both paths come from the same If
+ Node *ifp1 = region->in(1);
+ Node *ifp2 = region->in(2);
+ if( !ifp1 || !ifp2 ) return 0;
+ Node *iff = ifp1->in(0);
+ if( !iff || !iff->is_If() ) return 0;
+ if( iff != ifp2->in(0) ) return 0;
+ // Check for a proper bool/cmp
+ const Node *b = iff->in(1);
+ if( !b->is_Bool() ) return 0;
+ const Node *cmp = b->in(1);
+ if( !cmp->is_Cmp() ) return 0;
+
+ // Check for branching opposite expected
+ if( ifp2->Opcode() == Op_IfTrue ) {
+ assert( ifp1->Opcode() == Op_IfFalse, "" );
+ return 2;
+ } else {
+ assert( ifp1->Opcode() == Op_IfTrue, "" );
+ return 1;
+ }
+}
+
+//----------------------------check_cmove_id-----------------------------------
+// Check for CMove'ing a constant after comparing against the constant.
+// Happens all the time now, since if we compare equality vs a constant in
+// the parser, we "know" the variable is constant on one path and we force
+// it. Thus code like "if( x==0 ) {/*EMPTY*/}" ends up inserting a
+// conditional move: "x = (x==0)?0:x;". Yucko. This fix is slightly more
+// general in that we don't need constants. Since CMove's are only inserted
+// in very special circumstances, we do it here on generic Phi's.
+Node* PhiNode::is_cmove_id(PhaseTransform* phase, int true_path) {
+ assert(true_path !=0, "only diamond shape graph expected");
+
+ // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+ // phi->region->if_proj->ifnode->bool->cmp
+ Node* region = in(0);
+ Node* iff = region->in(1)->in(0);
+ BoolNode* b = iff->in(1)->as_Bool();
+ Node* cmp = b->in(1);
+ Node* tval = in(true_path);
+ Node* fval = in(3-true_path);
+ Node* id = CMoveNode::is_cmove_id(phase, cmp, tval, fval, b);
+ if (id == NULL)
+ return NULL;
+
+ // Either value might be a cast that depends on a branch of 'iff'.
+ // Since the 'id' value will float free of the diamond, either
+ // decast or return failure.
+ Node* ctl = id->in(0);
+ if (ctl != NULL && ctl->in(0) == iff) {
+ if (id->is_ConstraintCast()) {
+ return id->in(1);
+ } else {
+ // Don't know how to disentangle this value.
+ return NULL;
+ }
+ }
+
+ return id;
+}
+
+//------------------------------Identity---------------------------------------
+// Check for Region being Identity.
+Node *PhiNode::Identity( PhaseTransform *phase ) {
+ // Check for no merging going on
+ // (There used to be special-case code here when this->region->is_Loop.
+ // It would check for a tributary phi on the backedge that the main phi
+ // trivially, perhaps with a single cast. The unique_input method
+ // does all this and more, by reducing such tributaries to 'this'.)
+ Node* uin = unique_input(phase);
+ if (uin != NULL) {
+ return uin;
+ }
+
+ int true_path = is_diamond_phi();
+ if (true_path != 0) {
+ Node* id = is_cmove_id(phase, true_path);
+ if (id != NULL) return id;
+ }
+
+ return this; // No identity
+}
+
+//-----------------------------unique_input------------------------------------
+// Find the unique value, discounting top, self-loops, and casts.
+// Return top if there are no inputs, and self if there are multiple.
+Node* PhiNode::unique_input(PhaseTransform* phase) {
+ // 1) One unique direct input, or
+ // 2) some of the inputs have an intervening ConstraintCast and
+ // the type of input is the same or sharper (more specific)
+ // than the phi's type.
+ // 3) an input is a self loop
+ //
+ // 1) input or 2) input or 3) input __
+ // / \ / \ \ / \
+ // \ / | cast phi cast
+ // phi \ / / \ /
+ // phi / --
+
+ Node* r = in(0); // RegionNode
+ if (r == NULL) return in(1); // Already degraded to a Copy
+ Node* uncasted_input = NULL; // The unique uncasted input (ConstraintCasts removed)
+ Node* direct_input = NULL; // The unique direct input
+
+ for (uint i = 1, cnt = req(); i < cnt; ++i) {
+ Node* rc = r->in(i);
+ if (rc == NULL || phase->type(rc) == Type::TOP)
+ continue; // ignore unreachable control path
+ Node* n = in(i);
+ Node* un = n->uncast();
+ if (un == NULL || un == this || phase->type(un) == Type::TOP) {
+ continue; // ignore if top, or in(i) and "this" are in a data cycle
+ }
+ // Check for a unique uncasted input
+ if (uncasted_input == NULL) {
+ uncasted_input = un;
+ } else if (uncasted_input != un) {
+ uncasted_input = NodeSentinel; // no unique uncasted input
+ }
+ // Check for a unique direct input
+ if (direct_input == NULL) {
+ direct_input = n;
+ } else if (direct_input != n) {
+ direct_input = NodeSentinel; // no unique direct input
+ }
+ }
+ if (direct_input == NULL) {
+ return phase->C->top(); // no inputs
+ }
+ assert(uncasted_input != NULL,"");
+
+ if (direct_input != NodeSentinel) {
+ return direct_input; // one unique direct input
+ }
+ if (uncasted_input != NodeSentinel &&
+ phase->type(uncasted_input)->higher_equal(type())) {
+ return uncasted_input; // one unique uncasted input
+ }
+
+ // Nothing.
+ return NULL;
+}
+
+//------------------------------is_x2logic-------------------------------------
+// Check for simple convert-to-boolean pattern
+// If:(C Bool) Region:(IfF IfT) Phi:(Region 0 1)
+// Convert Phi to an ConvIB.
+static Node *is_x2logic( PhaseGVN *phase, PhiNode *phi, int true_path ) {
+ assert(true_path !=0, "only diamond shape graph expected");
+ // Convert the true/false index into an expected 0/1 return.
+ // Map 2->0 and 1->1.
+ int flipped = 2-true_path;
+
+ // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+ // phi->region->if_proj->ifnode->bool->cmp
+ Node *region = phi->in(0);
+ Node *iff = region->in(1)->in(0);
+ BoolNode *b = (BoolNode*)iff->in(1);
+ const CmpNode *cmp = (CmpNode*)b->in(1);
+
+ Node *zero = phi->in(1);
+ Node *one = phi->in(2);
+ const Type *tzero = phase->type( zero );
+ const Type *tone = phase->type( one );
+
+ // Check for compare vs 0
+ const Type *tcmp = phase->type(cmp->in(2));
+ if( tcmp != TypeInt::ZERO && tcmp != TypePtr::NULL_PTR ) {
+ // Allow cmp-vs-1 if the other input is bounded by 0-1
+ if( !(tcmp == TypeInt::ONE && phase->type(cmp->in(1)) == TypeInt::BOOL) )
+ return NULL;
+ flipped = 1-flipped; // Test is vs 1 instead of 0!
+ }
+
+ // Check for setting zero/one opposite expected
+ if( tzero == TypeInt::ZERO ) {
+ if( tone == TypeInt::ONE ) {
+ } else return NULL;
+ } else if( tzero == TypeInt::ONE ) {
+ if( tone == TypeInt::ZERO ) {
+ flipped = 1-flipped;
+ } else return NULL;
+ } else return NULL;
+
+ // Check for boolean test backwards
+ if( b->_test._test == BoolTest::ne ) {
+ } else if( b->_test._test == BoolTest::eq ) {
+ flipped = 1-flipped;
+ } else return NULL;
+
+ // Build int->bool conversion
+ Node *n = new (phase->C, 2) Conv2BNode( cmp->in(1) );
+ if( flipped )
+ n = new (phase->C, 3) XorINode( phase->transform(n), phase->intcon(1) );
+
+ return n;
+}
+
+//------------------------------is_cond_add------------------------------------
+// Check for simple conditional add pattern: "(P < Q) ? X+Y : X;"
+// To be profitable the control flow has to disappear; there can be no other
+// values merging here. We replace the test-and-branch with:
+// "(sgn(P-Q))&Y) + X". Basically, convert "(P < Q)" into 0 or -1 by
+// moving the carry bit from (P-Q) into a register with 'sbb EAX,EAX'.
+// Then convert Y to 0-or-Y and finally add.
+// This is a key transform for SpecJava _201_compress.
+static Node* is_cond_add(PhaseGVN *phase, PhiNode *phi, int true_path) {
+ assert(true_path !=0, "only diamond shape graph expected");
+
+ // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+ // phi->region->if_proj->ifnode->bool->cmp
+ RegionNode *region = (RegionNode*)phi->in(0);
+ Node *iff = region->in(1)->in(0);
+ BoolNode* b = iff->in(1)->as_Bool();
+ const CmpNode *cmp = (CmpNode*)b->in(1);
+
+ // Make sure only merging this one phi here
+ if (region->has_unique_phi() != phi) return NULL;
+
+ // Make sure each arm of the diamond has exactly one output, which we assume
+ // is the region. Otherwise, the control flow won't disappear.
+ if (region->in(1)->outcnt() != 1) return NULL;
+ if (region->in(2)->outcnt() != 1) return NULL;
+
+ // Check for "(P < Q)" of type signed int
+ if (b->_test._test != BoolTest::lt) return NULL;
+ if (cmp->Opcode() != Op_CmpI) return NULL;
+
+ Node *p = cmp->in(1);
+ Node *q = cmp->in(2);
+ Node *n1 = phi->in( true_path);
+ Node *n2 = phi->in(3-true_path);
+
+ int op = n1->Opcode();
+ if( op != Op_AddI // Need zero as additive identity
+ /*&&op != Op_SubI &&
+ op != Op_AddP &&
+ op != Op_XorI &&
+ op != Op_OrI*/ )
+ return NULL;
+
+ Node *x = n2;
+ Node *y = n1->in(1);
+ if( n2 == n1->in(1) ) {
+ y = n1->in(2);
+ } else if( n2 == n1->in(1) ) {
+ } else return NULL;
+
+ // Not so profitable if compare and add are constants
+ if( q->is_Con() && phase->type(q) != TypeInt::ZERO && y->is_Con() )
+ return NULL;
+
+ Node *cmplt = phase->transform( new (phase->C, 3) CmpLTMaskNode(p,q) );
+ Node *j_and = phase->transform( new (phase->C, 3) AndINode(cmplt,y) );
+ return new (phase->C, 3) AddINode(j_and,x);
+}
+
+//------------------------------is_absolute------------------------------------
+// Check for absolute value.
+static Node* is_absolute( PhaseGVN *phase, PhiNode *phi_root, int true_path) {
+ assert(true_path !=0, "only diamond shape graph expected");
+
+ int cmp_zero_idx = 0; // Index of compare input where to look for zero
+ int phi_x_idx = 0; // Index of phi input where to find naked x
+
+ // ABS ends with the merge of 2 control flow paths.
+ // Find the false path from the true path. With only 2 inputs, 3 - x works nicely.
+ int false_path = 3 - true_path;
+
+ // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+ // phi->region->if_proj->ifnode->bool->cmp
+ BoolNode *bol = phi_root->in(0)->in(1)->in(0)->in(1)->as_Bool();
+
+ // Check bool sense
+ switch( bol->_test._test ) {
+ case BoolTest::lt: cmp_zero_idx = 1; phi_x_idx = true_path; break;
+ case BoolTest::le: cmp_zero_idx = 2; phi_x_idx = false_path; break;
+ case BoolTest::gt: cmp_zero_idx = 2; phi_x_idx = true_path; break;
+ case BoolTest::ge: cmp_zero_idx = 1; phi_x_idx = false_path; break;
+ default: return NULL; break;
+ }
+
+ // Test is next
+ Node *cmp = bol->in(1);
+ const Type *tzero = NULL;
+ switch( cmp->Opcode() ) {
+ case Op_CmpF: tzero = TypeF::ZERO; break; // Float ABS
+ case Op_CmpD: tzero = TypeD::ZERO; break; // Double ABS
+ default: return NULL;
+ }
+
+ // Find zero input of compare; the other input is being abs'd
+ Node *x = NULL;
+ bool flip = false;
+ if( phase->type(cmp->in(cmp_zero_idx)) == tzero ) {
+ x = cmp->in(3 - cmp_zero_idx);
+ } else if( phase->type(cmp->in(3 - cmp_zero_idx)) == tzero ) {
+ // The test is inverted, we should invert the result...
+ x = cmp->in(cmp_zero_idx);
+ flip = true;
+ } else {
+ return NULL;
+ }
+
+ // Next get the 2 pieces being selected, one is the original value
+ // and the other is the negated value.
+ if( phi_root->in(phi_x_idx) != x ) return NULL;
+
+ // Check other phi input for subtract node
+ Node *sub = phi_root->in(3 - phi_x_idx);
+
+ // Allow only Sub(0,X) and fail out for all others; Neg is not OK
+ if( tzero == TypeF::ZERO ) {
+ if( sub->Opcode() != Op_SubF ||
+ sub->in(2) != x ||
+ phase->type(sub->in(1)) != tzero ) return NULL;
+ x = new (phase->C, 2) AbsFNode(x);
+ if (flip) {
+ x = new (phase->C, 3) SubFNode(sub->in(1), phase->transform(x));
+ }
+ } else {
+ if( sub->Opcode() != Op_SubD ||
+ sub->in(2) != x ||
+ phase->type(sub->in(1)) != tzero ) return NULL;
+ x = new (phase->C, 2) AbsDNode(x);
+ if (flip) {
+ x = new (phase->C, 3) SubDNode(sub->in(1), phase->transform(x));
+ }
+ }
+
+ return x;
+}
+
+//------------------------------split_once-------------------------------------
+// Helper for split_flow_path
+static void split_once(PhaseIterGVN *igvn, Node *phi, Node *val, Node *n, Node *newn) {
+ igvn->hash_delete(n); // Remove from hash before hacking edges
+
+ uint j = 1;
+ for( uint i = phi->req()-1; i > 0; i-- ) {
+ if( phi->in(i) == val ) { // Found a path with val?
+ // Add to NEW Region/Phi, no DU info
+ newn->set_req( j++, n->in(i) );
+ // Remove from OLD Region/Phi
+ n->del_req(i);
+ }
+ }
+
+ // Register the new node but do not transform it. Cannot transform until the
+ // entire Region/Phi conglerate has been hacked as a single huge transform.
+ igvn->register_new_node_with_optimizer( newn );
+ // Now I can point to the new node.
+ n->add_req(newn);
+ igvn->_worklist.push(n);
+}
+
+//------------------------------split_flow_path--------------------------------
+// Check for merging identical values and split flow paths
+static Node* split_flow_path(PhaseGVN *phase, PhiNode *phi) {
+ BasicType bt = phi->type()->basic_type();
+ if( bt == T_ILLEGAL || type2size[bt] <= 0 )
+ return NULL; // Bail out on funny non-value stuff
+ if( phi->req() <= 3 ) // Need at least 2 matched inputs and a
+ return NULL; // third unequal input to be worth doing
+
+ // Scan for a constant
+ uint i;
+ for( i = 1; i < phi->req()-1; i++ ) {
+ Node *n = phi->in(i);
+ if( !n ) return NULL;
+ if( phase->type(n) == Type::TOP ) return NULL;
+ if( n->Opcode() == Op_ConP )
+ break;
+ }
+ if( i >= phi->req() ) // Only split for constants
+ return NULL;
+
+ Node *val = phi->in(i); // Constant to split for
+ uint hit = 0; // Number of times it occurs
+
+ for( ; i < phi->req(); i++ ){ // Count occurances of constant
+ Node *n = phi->in(i);
+ if( !n ) return NULL;
+ if( phase->type(n) == Type::TOP ) return NULL;
+ if( phi->in(i) == val )
+ hit++;
+ }
+
+ if( hit <= 1 || // Make sure we find 2 or more
+ hit == phi->req()-1 ) // and not ALL the same value
+ return NULL;
+
+ // Now start splitting out the flow paths that merge the same value.
+ // Split first the RegionNode.
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ Node *r = phi->region();
+ RegionNode *newr = new (phase->C, hit+1) RegionNode(hit+1);
+ split_once(igvn, phi, val, r, newr);
+
+ // Now split all other Phis than this one
+ for (DUIterator_Fast kmax, k = r->fast_outs(kmax); k < kmax; k++) {
+ Node* phi2 = r->fast_out(k);
+ if( phi2->is_Phi() && phi2->as_Phi() != phi ) {
+ PhiNode *newphi = PhiNode::make_blank(newr, phi2);
+ split_once(igvn, phi, val, phi2, newphi);
+ }
+ }
+
+ // Clean up this guy
+ igvn->hash_delete(phi);
+ for( i = phi->req()-1; i > 0; i-- ) {
+ if( phi->in(i) == val ) {
+ phi->del_req(i);
+ }
+ }
+ phi->add_req(val);
+
+ return phi;
+}
+
+//=============================================================================
+//------------------------------simple_data_loop_check-------------------------
+// Try to determing if the phi node in a simple safe/unsafe data loop.
+// Returns:
+// enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop };
+// Safe - safe case when the phi and it's inputs reference only safe data
+// nodes;
+// Unsafe - the phi and it's inputs reference unsafe data nodes but there
+// is no reference back to the phi - need a graph walk
+// to determine if it is in a loop;
+// UnsafeLoop - unsafe case when the phi references itself directly or through
+// unsafe data node.
+// Note: a safe data node is a node which could/never reference itself during
+// GVN transformations. For now it is Con, Proj, Phi, CastPP, CheckCastPP.
+// I mark Phi nodes as safe node not only because they can reference itself
+// but also to prevent mistaking the fallthrough case inside an outer loop
+// as dead loop when the phi references itselfs through an other phi.
+PhiNode::LoopSafety PhiNode::simple_data_loop_check(Node *in) const {
+ // It is unsafe loop if the phi node references itself directly.
+ if (in == (Node*)this)
+ return UnsafeLoop; // Unsafe loop
+ // Unsafe loop if the phi node references itself through an unsafe data node.
+ // Exclude cases with null inputs or data nodes which could reference
+ // itself (safe for dead loops).
+ if (in != NULL && !in->is_dead_loop_safe()) {
+ // Check inputs of phi's inputs also.
+ // It is much less expensive then full graph walk.
+ uint cnt = in->req();
+ for (uint i = 1; i < cnt; ++i) {
+ Node* m = in->in(i);
+ if (m == (Node*)this)
+ return UnsafeLoop; // Unsafe loop
+ if (m != NULL && !m->is_dead_loop_safe()) {
+ // Check the most common case (about 30% of all cases):
+ // phi->Load/Store->AddP->(ConP ConP Con)/(Parm Parm Con).
+ Node *m1 = (m->is_AddP() && m->req() > 3) ? m->in(1) : NULL;
+ if (m1 == (Node*)this)
+ return UnsafeLoop; // Unsafe loop
+ if (m1 != NULL && m1 == m->in(2) &&
+ m1->is_dead_loop_safe() && m->in(3)->is_Con()) {
+ continue; // Safe case
+ }
+ // The phi references an unsafe node - need full analysis.
+ return Unsafe;
+ }
+ }
+ }
+ return Safe; // Safe case - we can optimize the phi node.
+}
+
+//------------------------------is_unsafe_data_reference-----------------------
+// If phi can be reached through the data input - it is data loop.
+bool PhiNode::is_unsafe_data_reference(Node *in) const {
+ assert(req() > 1, "");
+ // First, check simple cases when phi references itself directly or
+ // through an other node.
+ LoopSafety safety = simple_data_loop_check(in);
+ if (safety == UnsafeLoop)
+ return true; // phi references itself - unsafe loop
+ else if (safety == Safe)
+ return false; // Safe case - phi could be replaced with the unique input.
+
+ // Unsafe case when we should go through data graph to determine
+ // if the phi references itself.
+
+ ResourceMark rm;
+
+ Arena *a = Thread::current()->resource_area();
+ Node_List nstack(a);
+ VectorSet visited(a);
+
+ nstack.push(in); // Start with unique input.
+ visited.set(in->_idx);
+ while (nstack.size() != 0) {
+ Node* n = nstack.pop();
+ uint cnt = n->req();
+ for (uint i = 1; i < cnt; i++) { // Only data paths
+ Node* m = n->in(i);
+ if (m == (Node*)this) {
+ return true; // Data loop
+ }
+ if (m != NULL && !m->is_dead_loop_safe()) { // Only look for unsafe cases.
+ if (!visited.test_set(m->_idx))
+ nstack.push(m);
+ }
+ }
+ }
+ return false; // The phi is not reachable from its inputs
+}
+
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Must preserve
+// the CFG, but we can still strip out dead paths.
+Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // The next should never happen after 6297035 fix.
+ if( is_copy() ) // Already degraded to a Copy ?
+ return NULL; // No change
+
+ Node *r = in(0); // RegionNode
+ assert(r->in(0) == NULL || !r->in(0)->is_Root(), "not a specially hidden merge");
+
+ // Note: During parsing, phis are often transformed before their regions.
+ // This means we have to use type_or_null to defend against untyped regions.
+ if( phase->type_or_null(r) == Type::TOP ) // Dead code?
+ return NULL; // No change
+
+ Node *top = phase->C->top();
+
+ // The are 2 situations when only one valid phi's input is left
+ // (in addition to Region input).
+ // One: region is not loop - replace phi with this input.
+ // Two: region is loop - replace phi with top since this data path is dead
+ // and we need to break the dead data loop.
+ Node* progress = NULL; // Record if any progress made
+ for( uint j = 1; j < req(); ++j ){ // For all paths in
+ // Check unreachable control paths
+ Node* rc = r->in(j);
+ Node* n = in(j); // Get the input
+ if (rc == NULL || phase->type(rc) == Type::TOP) {
+ if (n != top) { // Not already top?
+ set_req(j, top); // Nuke it down
+ progress = this; // Record progress
+ }
+ }
+ }
+
+ Node* uin = unique_input(phase);
+ if (uin == top) { // Simplest case: no alive inputs.
+ if (can_reshape) // IGVN transformation
+ return top;
+ else
+ return NULL; // Identity will return TOP
+ } else if (uin != NULL) {
+ // Only one not-NULL unique input path is left.
+ // Determine if this input is backedge of a loop.
+ // (Skip new phis which have no uses and dead regions).
+ if( outcnt() > 0 && r->in(0) != NULL ) {
+ // First, take the short cut when we know it is a loop and
+ // the EntryControl data path is dead.
+ assert(!r->is_Loop() || r->req() == 3, "Loop node should have 3 inputs");
+ // Then, check if there is a data loop when phi references itself directly
+ // or through other data nodes.
+ if( r->is_Loop() && !phase->eqv_uncast(uin, in(LoopNode::EntryControl)) ||
+ !r->is_Loop() && is_unsafe_data_reference(uin) ) {
+ // Break this data loop to avoid creation of a dead loop.
+ if (can_reshape) {
+ return top;
+ } else {
+ // We can't return top if we are in Parse phase - cut inputs only
+ // let Identity to handle the case.
+ replace_edge(uin, top);
+ return NULL;
+ }
+ }
+ }
+
+ // One unique input.
+ debug_only(Node* ident = Identity(phase));
+ // The unique input must eventually be detected by the Identity call.
+#ifdef ASSERT
+ if (ident != uin && !ident->is_top()) {
+ // print this output before failing assert
+ r->dump(3);
+ this->dump(3);
+ ident->dump();
+ uin->dump();
+ }
+#endif
+ assert(ident == uin || ident->is_top(), "Identity must clean this up");
+ return NULL;
+ }
+
+
+ Node* opt = NULL;
+ int true_path = is_diamond_phi();
+ if( true_path != 0 ) {
+ // Check for CMove'ing identity. If it would be unsafe,
+ // handle it here. In the safe case, let Identity handle it.
+ Node* unsafe_id = is_cmove_id(phase, true_path);
+ if( unsafe_id != NULL && is_unsafe_data_reference(unsafe_id) )
+ opt = unsafe_id;
+
+ // Check for simple convert-to-boolean pattern
+ if( opt == NULL )
+ opt = is_x2logic(phase, this, true_path);
+
+ // Check for absolute value
+ if( opt == NULL )
+ opt = is_absolute(phase, this, true_path);
+
+ // Check for conditional add
+ if( opt == NULL && can_reshape )
+ opt = is_cond_add(phase, this, true_path);
+
+ // These 4 optimizations could subsume the phi:
+ // have to check for a dead data loop creation.
+ if( opt != NULL ) {
+ if( opt == unsafe_id || is_unsafe_data_reference(opt) ) {
+ // Found dead loop.
+ if( can_reshape )
+ return top;
+ // We can't return top if we are in Parse phase - cut inputs only
+ // to stop further optimizations for this phi. Identity will return TOP.
+ assert(req() == 3, "only diamond merge phi here");
+ set_req(1, top);
+ set_req(2, top);
+ return NULL;
+ } else {
+ return opt;
+ }
+ }
+ }
+
+ // Check for merging identical values and split flow paths
+ if (can_reshape) {
+ opt = split_flow_path(phase, this);
+ // This optimization only modifies phi - don't need to check for dead loop.
+ assert(opt == NULL || phase->eqv(opt, this), "do not elide phi");
+ if (opt != NULL) return opt;
+ }
+
+ if (in(1) != NULL && in(1)->Opcode() == Op_AddP && can_reshape) {
+ // Try to undo Phi of AddP:
+ // (Phi (AddP base base y) (AddP base2 base2 y))
+ // becomes:
+ // newbase := (Phi base base2)
+ // (AddP newbase newbase y)
+ //
+ // This occurs as a result of unsuccessful split_thru_phi and
+ // interferes with taking advantage of addressing modes. See the
+ // clone_shift_expressions code in matcher.cpp
+ Node* addp = in(1);
+ const Type* type = addp->in(AddPNode::Base)->bottom_type();
+ Node* y = addp->in(AddPNode::Offset);
+ if (y != NULL && addp->in(AddPNode::Base) == addp->in(AddPNode::Address)) {
+ // make sure that all the inputs are similar to the first one,
+ // i.e. AddP with base == address and same offset as first AddP
+ bool doit = true;
+ for (uint i = 2; i < req(); i++) {
+ if (in(i) == NULL ||
+ in(i)->Opcode() != Op_AddP ||
+ in(i)->in(AddPNode::Base) != in(i)->in(AddPNode::Address) ||
+ in(i)->in(AddPNode::Offset) != y) {
+ doit = false;
+ break;
+ }
+ // Accumulate type for resulting Phi
+ type = type->meet(in(i)->in(AddPNode::Base)->bottom_type());
+ }
+ Node* base = NULL;
+ if (doit) {
+ // Check for neighboring AddP nodes in a tree.
+ // If they have a base, use that it.
+ for (DUIterator_Fast kmax, k = this->fast_outs(kmax); k < kmax; k++) {
+ Node* u = this->fast_out(k);
+ if (u->is_AddP()) {
+ Node* base2 = u->in(AddPNode::Base);
+ if (base2 != NULL && !base2->is_top()) {
+ if (base == NULL)
+ base = base2;
+ else if (base != base2)
+ { doit = false; break; }
+ }
+ }
+ }
+ }
+ if (doit) {
+ if (base == NULL) {
+ base = new (phase->C, in(0)->req()) PhiNode(in(0), type, NULL);
+ for (uint i = 1; i < req(); i++) {
+ base->init_req(i, in(i)->in(AddPNode::Base));
+ }
+ phase->is_IterGVN()->register_new_node_with_optimizer(base);
+ }
+ return new (phase->C, 4) AddPNode(base, base, y);
+ }
+ }
+ }
+
+ // Split phis through memory merges, so that the memory merges will go away.
+ // Piggy-back this transformation on the search for a unique input....
+ // It will be as if the merged memory is the unique value of the phi.
+ // (Do not attempt this optimization unless parsing is complete.
+ // It would make the parser's memory-merge logic sick.)
+ // (MergeMemNode is not dead_loop_safe - need to check for dead loop.)
+ if (progress == NULL && can_reshape && type() == Type::MEMORY) {
+ // see if this phi should be sliced
+ uint merge_width = 0;
+ bool saw_self = false;
+ for( uint i=1; i<req(); ++i ) {// For all paths in
+ Node *ii = in(i);
+ if (ii->is_MergeMem()) {
+ MergeMemNode* n = ii->as_MergeMem();
+ merge_width = MAX2(merge_width, n->req());
+ saw_self = saw_self || phase->eqv(n->base_memory(), this);
+ }
+ }
+
+ // This restriction is temporarily necessary to ensure termination:
+ if (!saw_self && adr_type() == TypePtr::BOTTOM) merge_width = 0;
+
+ if (merge_width > Compile::AliasIdxRaw) {
+ // found at least one non-empty MergeMem
+ const TypePtr* at = adr_type();
+ if (at != TypePtr::BOTTOM) {
+ // Patch the existing phi to select an input from the merge:
+ // Phi:AT1(...MergeMem(m0, m1, m2)...) into
+ // Phi:AT1(...m1...)
+ int alias_idx = phase->C->get_alias_index(at);
+ for (uint i=1; i<req(); ++i) {
+ Node *ii = in(i);
+ if (ii->is_MergeMem()) {
+ MergeMemNode* n = ii->as_MergeMem();
+ // compress paths and change unreachable cycles to TOP
+ // If not, we can update the input infinitely along a MergeMem cycle
+ // Equivalent code is in MemNode::Ideal_common
+ Node *m = phase->transform(n);
+ // If tranformed to a MergeMem, get the desired slice
+ // Otherwise the returned node represents memory for every slice
+ Node *new_mem = (m->is_MergeMem()) ?
+ m->as_MergeMem()->memory_at(alias_idx) : m;
+ // Update input if it is progress over what we have now
+ if (new_mem != ii) {
+ set_req(i, new_mem);
+ progress = this;
+ }
+ }
+ }
+ } else {
+ // We know that at least one MergeMem->base_memory() == this
+ // (saw_self == true). If all other inputs also references this phi
+ // (directly or through data nodes) - it is dead loop.
+ bool saw_safe_input = false;
+ for (uint j = 1; j < req(); ++j) {
+ Node *n = in(j);
+ if (n->is_MergeMem() && n->as_MergeMem()->base_memory() == this)
+ continue; // skip known cases
+ if (!is_unsafe_data_reference(n)) {
+ saw_safe_input = true; // found safe input
+ break;
+ }
+ }
+ if (!saw_safe_input)
+ return top; // all inputs reference back to this phi - dead loop
+
+ // Phi(...MergeMem(m0, m1:AT1, m2:AT2)...) into
+ // MergeMem(Phi(...m0...), Phi:AT1(...m1...), Phi:AT2(...m2...))
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ Node* hook = new (phase->C, 1) Node(1);
+ PhiNode* new_base = (PhiNode*) clone();
+ // Must eagerly register phis, since they participate in loops.
+ if (igvn) {
+ igvn->register_new_node_with_optimizer(new_base);
+ hook->add_req(new_base);
+ }
+ MergeMemNode* result = MergeMemNode::make(phase->C, new_base);
+ for (uint i = 1; i < req(); ++i) {
+ Node *ii = in(i);
+ if (ii->is_MergeMem()) {
+ MergeMemNode* n = ii->as_MergeMem();
+ for (MergeMemStream mms(result, n); mms.next_non_empty2(); ) {
+ // If we have not seen this slice yet, make a phi for it.
+ bool made_new_phi = false;
+ if (mms.is_empty()) {
+ Node* new_phi = new_base->slice_memory(mms.adr_type(phase->C));
+ made_new_phi = true;
+ if (igvn) {
+ igvn->register_new_node_with_optimizer(new_phi);
+ hook->add_req(new_phi);
+ }
+ mms.set_memory(new_phi);
+ }
+ Node* phi = mms.memory();
+ assert(made_new_phi || phi->in(i) == n, "replace the i-th merge by a slice");
+ phi->set_req(i, mms.memory2());
+ }
+ }
+ }
+ // Distribute all self-loops.
+ { // (Extra braces to hide mms.)
+ for (MergeMemStream mms(result); mms.next_non_empty(); ) {
+ Node* phi = mms.memory();
+ for (uint i = 1; i < req(); ++i) {
+ if (phi->in(i) == this) phi->set_req(i, phi);
+ }
+ }
+ }
+ // now transform the new nodes, and return the mergemem
+ for (MergeMemStream mms(result); mms.next_non_empty(); ) {
+ Node* phi = mms.memory();
+ mms.set_memory(phase->transform(phi));
+ }
+ if (igvn) { // Unhook.
+ igvn->hash_delete(hook);
+ for (uint i = 1; i < hook->req(); i++) {
+ hook->set_req(i, NULL);
+ }
+ }
+ // Replace self with the result.
+ return result;
+ }
+ }
+ }
+
+ return progress; // Return any progress
+}
+
+//------------------------------out_RegMask------------------------------------
+const RegMask &PhiNode::in_RegMask(uint i) const {
+ return i ? out_RegMask() : RegMask::Empty;
+}
+
+const RegMask &PhiNode::out_RegMask() const {
+ uint ideal_reg = Matcher::base2reg[_type->base()];
+ assert( ideal_reg != Node::NotAMachineReg, "invalid type at Phi" );
+ if( ideal_reg == 0 ) return RegMask::Empty;
+ return *(Compile::current()->matcher()->idealreg2spillmask[ideal_reg]);
+}
+
+#ifndef PRODUCT
+void PhiNode::dump_spec(outputStream *st) const {
+ TypeNode::dump_spec(st);
+ if (in(0) != NULL &&
+ in(0)->is_CountedLoop() &&
+ in(0)->as_CountedLoop()->phi() == this) {
+ st->print(" #tripcount");
+ }
+}
+#endif
+
+
+//=============================================================================
+const Type *GotoNode::Value( PhaseTransform *phase ) const {
+ // If the input is reachable, then we are executed.
+ // If the input is not reachable, then we are not executed.
+ return phase->type(in(0));
+}
+
+Node *GotoNode::Identity( PhaseTransform *phase ) {
+ return in(0); // Simple copy of incoming control
+}
+
+const RegMask &GotoNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+//=============================================================================
+const RegMask &JumpNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+//=============================================================================
+const RegMask &JProjNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+//=============================================================================
+const RegMask &CProjNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+
+
+//=============================================================================
+
+uint PCTableNode::hash() const { return Node::hash() + _size; }
+uint PCTableNode::cmp( const Node &n ) const
+{ return _size == ((PCTableNode&)n)._size; }
+
+const Type *PCTableNode::bottom_type() const {
+ const Type** f = TypeTuple::fields(_size);
+ for( uint i = 0; i < _size; i++ ) f[i] = Type::CONTROL;
+ return TypeTuple::make(_size, f);
+}
+
+//------------------------------Value------------------------------------------
+// Compute the type of the PCTableNode. If reachable it is a tuple of
+// Control, otherwise the table targets are not reachable
+const Type *PCTableNode::Value( PhaseTransform *phase ) const {
+ if( phase->type(in(0)) == Type::CONTROL )
+ return bottom_type();
+ return Type::TOP; // All paths dead? Then so are we
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Strip out
+// control copies
+Node *PCTableNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//=============================================================================
+uint JumpProjNode::hash() const {
+ return Node::hash() + _dest_bci;
+}
+
+uint JumpProjNode::cmp( const Node &n ) const {
+ return ProjNode::cmp(n) &&
+ _dest_bci == ((JumpProjNode&)n)._dest_bci;
+}
+
+#ifndef PRODUCT
+void JumpProjNode::dump_spec(outputStream *st) const {
+ ProjNode::dump_spec(st);
+ st->print("@bci %d ",_dest_bci);
+}
+#endif
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Check for being unreachable, or for coming from a Rethrow. Rethrow's cannot
+// have the default "fall_through_index" path.
+const Type *CatchNode::Value( PhaseTransform *phase ) const {
+ // Unreachable? Then so are all paths from here.
+ if( phase->type(in(0)) == Type::TOP ) return Type::TOP;
+ // First assume all paths are reachable
+ const Type** f = TypeTuple::fields(_size);
+ for( uint i = 0; i < _size; i++ ) f[i] = Type::CONTROL;
+ // Identify cases that will always throw an exception
+ // () rethrow call
+ // () virtual or interface call with NULL receiver
+ // () call is a check cast with incompatible arguments
+ if( in(1)->is_Proj() ) {
+ Node *i10 = in(1)->in(0);
+ if( i10->is_Call() ) {
+ CallNode *call = i10->as_Call();
+ // Rethrows always throw exceptions, never return
+ if (call->entry_point() == OptoRuntime::rethrow_stub()) {
+ f[CatchProjNode::fall_through_index] = Type::TOP;
+ } else if( call->req() > TypeFunc::Parms ) {
+ const Type *arg0 = phase->type( call->in(TypeFunc::Parms) );
+ // Check for null reciever to virtual or interface calls
+ if( call->is_CallDynamicJava() &&
+ arg0->higher_equal(TypePtr::NULL_PTR) ) {
+ f[CatchProjNode::fall_through_index] = Type::TOP;
+ }
+ } // End of if not a runtime stub
+ } // End of if have call above me
+ } // End of slot 1 is not a projection
+ return TypeTuple::make(_size, f);
+}
+
+//=============================================================================
+uint CatchProjNode::hash() const {
+ return Node::hash() + _handler_bci;
+}
+
+
+uint CatchProjNode::cmp( const Node &n ) const {
+ return ProjNode::cmp(n) &&
+ _handler_bci == ((CatchProjNode&)n)._handler_bci;
+}
+
+
+//------------------------------Identity---------------------------------------
+// If only 1 target is possible, choose it if it is the main control
+Node *CatchProjNode::Identity( PhaseTransform *phase ) {
+ // If my value is control and no other value is, then treat as ID
+ const TypeTuple *t = phase->type(in(0))->is_tuple();
+ if (t->field_at(_con) != Type::CONTROL) return this;
+ // If we remove the last CatchProj and elide the Catch/CatchProj, then we
+ // also remove any exception table entry. Thus we must know the call
+ // feeding the Catch will not really throw an exception. This is ok for
+ // the main fall-thru control (happens when we know a call can never throw
+ // an exception) or for "rethrow", because a further optimnization will
+ // yank the rethrow (happens when we inline a function that can throw an
+ // exception and the caller has no handler). Not legal, e.g., for passing
+ // a NULL receiver to a v-call, or passing bad types to a slow-check-cast.
+ // These cases MUST throw an exception via the runtime system, so the VM
+ // will be looking for a table entry.
+ Node *proj = in(0)->in(1); // Expect a proj feeding CatchNode
+ CallNode *call;
+ if (_con != TypeFunc::Control && // Bail out if not the main control.
+ !(proj->is_Proj() && // AND NOT a rethrow
+ proj->in(0)->is_Call() &&
+ (call = proj->in(0)->as_Call()) &&
+ call->entry_point() == OptoRuntime::rethrow_stub()))
+ return this;
+
+ // Search for any other path being control
+ for (uint i = 0; i < t->cnt(); i++) {
+ if (i != _con && t->field_at(i) == Type::CONTROL)
+ return this;
+ }
+ // Only my path is possible; I am identity on control to the jump
+ return in(0)->in(0);
+}
+
+
+#ifndef PRODUCT
+void CatchProjNode::dump_spec(outputStream *st) const {
+ ProjNode::dump_spec(st);
+ st->print("@bci %d ",_handler_bci);
+}
+#endif
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// Check for CreateEx being Identity.
+Node *CreateExNode::Identity( PhaseTransform *phase ) {
+ if( phase->type(in(1)) == Type::TOP ) return in(1);
+ if( phase->type(in(0)) == Type::TOP ) return in(0);
+ // We only come from CatchProj, unless the CatchProj goes away.
+ // If the CatchProj is optimized away, then we just carry the
+ // exception oop through.
+ CallNode *call = in(1)->in(0)->as_Call();
+
+ return ( in(0)->is_CatchProj() && in(0)->in(0)->in(1) == in(1) )
+ ? this
+ : call->in(TypeFunc::Parms);
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void NeverBranchNode::format( PhaseRegAlloc *ra_, outputStream *st) const {
+ st->print("%s", Name());
+}
+#endif
diff --git a/src/share/vm/opto/cfgnode.hpp b/src/share/vm/opto/cfgnode.hpp
new file mode 100644
index 000000000..e01e25258
--- /dev/null
+++ b/src/share/vm/opto/cfgnode.hpp
@@ -0,0 +1,481 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+class Matcher;
+class Node;
+class RegionNode;
+class TypeNode;
+class PhiNode;
+class GotoNode;
+class MultiNode;
+class MultiBranchNode;
+class IfNode;
+class PCTableNode;
+class JumpNode;
+class CatchNode;
+class NeverBranchNode;
+class ProjNode;
+class CProjNode;
+class IfTrueNode;
+class IfFalseNode;
+class CatchProjNode;
+class JProjNode;
+class JumpProjNode;
+class SCMemProjNode;
+class PhaseIdealLoop;
+
+//------------------------------RegionNode-------------------------------------
+// The class of RegionNodes, which can be mapped to basic blocks in the
+// program. Their inputs point to Control sources. PhiNodes (described
+// below) have an input point to a RegionNode. Merged data inputs to PhiNodes
+// correspond 1-to-1 with RegionNode inputs. The zero input of a PhiNode is
+// the RegionNode, and the zero input of the RegionNode is itself.
+class RegionNode : public Node {
+public:
+ // Node layout (parallels PhiNode):
+ enum { Region, // Generally points to self.
+ Control // Control arcs are [1..len)
+ };
+
+ RegionNode( uint required ) : Node(required) {
+ init_class_id(Class_Region);
+ init_req(0,this);
+ }
+
+ Node* is_copy() const {
+ const Node* r = _in[Region];
+ if (r == NULL)
+ return nonnull_req();
+ return NULL; // not a copy!
+ }
+ PhiNode* has_phi() const; // returns an arbitrary phi user, or NULL
+ PhiNode* has_unique_phi() const; // returns the unique phi user, or NULL
+ // Is this region node unreachable from root?
+ bool is_unreachable_region(PhaseGVN *phase) const;
+ virtual int Opcode() const;
+ virtual bool pinned() const { return (const Node *)in(0) == this; }
+ virtual bool is_CFG () const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const Type *bottom_type() const { return Type::CONTROL; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const RegMask &out_RegMask() const;
+};
+
+//------------------------------JProjNode--------------------------------------
+// jump projection for node that produces multiple control-flow paths
+class JProjNode : public ProjNode {
+ public:
+ JProjNode( Node* ctrl, uint idx ) : ProjNode(ctrl,idx) {}
+ virtual int Opcode() const;
+ virtual bool is_CFG() const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual const Node* is_block_proj() const { return in(0); }
+ virtual const RegMask& out_RegMask() const;
+ virtual uint ideal_reg() const { return 0; }
+};
+
+//------------------------------PhiNode----------------------------------------
+// PhiNodes merge values from different Control paths. Slot 0 points to the
+// controlling RegionNode. Other slots map 1-for-1 with incoming control flow
+// paths to the RegionNode. For speed reasons (to avoid another pass) we
+// can turn PhiNodes into copys in-place by NULL'ing out their RegionNode
+// input in slot 0.
+class PhiNode : public TypeNode {
+ const TypePtr* const _adr_type; // non-null only for Type::MEMORY nodes.
+ // Size is bigger to hold the _adr_type field.
+ virtual uint hash() const; // Check the type
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const { return sizeof(*this); }
+
+ // Determine a unique non-trivial input, if any.
+ // Ignore casts if it helps. Return NULL on failure.
+ Node* unique_input(PhaseTransform *phase);
+ // Determine if CMoveNode::is_cmove_id can be used at this join point.
+ Node* is_cmove_id(PhaseTransform* phase, int true_path);
+
+public:
+ // Node layout (parallels RegionNode):
+ enum { Region, // Control input is the Phi's region.
+ Input // Input values are [1..len)
+ };
+
+ PhiNode( Node *r, const Type *t, const TypePtr* at = NULL )
+ : TypeNode(t,r->req()), _adr_type(at) {
+ init_class_id(Class_Phi);
+ init_req(0, r);
+ verify_adr_type();
+ }
+ // create a new phi with in edges matching r and set (initially) to x
+ static PhiNode* make( Node* r, Node* x );
+ // extra type arguments override the new phi's bottom_type and adr_type
+ static PhiNode* make( Node* r, Node* x, const Type *t, const TypePtr* at = NULL );
+ // create a new phi with narrowed memory type
+ PhiNode* slice_memory(const TypePtr* adr_type) const;
+ // like make(r, x), but does not initialize the in edges to x
+ static PhiNode* make_blank( Node* r, Node* x );
+
+ // Accessors
+ RegionNode* region() const { Node* r = in(Region); assert(!r || r->is_Region(), ""); return (RegionNode*)r; }
+
+ Node* is_copy() const {
+ // The node is a real phi if _in[0] is a Region node.
+ DEBUG_ONLY(const Node* r = _in[Region];)
+ assert(r != NULL && r->is_Region(), "Not valid control");
+ return NULL; // not a copy!
+ }
+
+ // Check for a simple dead loop.
+ enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop };
+ LoopSafety simple_data_loop_check(Node *in) const;
+ // Is it unsafe data loop? It becomes a dead loop if this phi node removed.
+ bool is_unsafe_data_reference(Node *in) const;
+ int is_diamond_phi() const;
+ virtual int Opcode() const;
+ virtual bool pinned() const { return in(0) != 0; }
+ virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const RegMask &out_RegMask() const;
+ virtual const RegMask &in_RegMask(uint) const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+#ifdef ASSERT
+ void verify_adr_type(VectorSet& visited, const TypePtr* at) const;
+ void verify_adr_type(bool recursive = false) const;
+#else //ASSERT
+ void verify_adr_type(bool recursive = false) const {}
+#endif //ASSERT
+};
+
+//------------------------------GotoNode---------------------------------------
+// GotoNodes perform direct branches.
+class GotoNode : public Node {
+public:
+ GotoNode( Node *control ) : Node(control) {
+ init_flags(Flag_is_Goto);
+ }
+ virtual int Opcode() const;
+ virtual bool pinned() const { return true; }
+ virtual bool is_CFG() const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual const Node *is_block_proj() const { return this; }
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const Type *bottom_type() const { return Type::CONTROL; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const RegMask &out_RegMask() const;
+};
+
+//------------------------------CProjNode--------------------------------------
+// control projection for node that produces multiple control-flow paths
+class CProjNode : public ProjNode {
+public:
+ CProjNode( Node *ctrl, uint idx ) : ProjNode(ctrl,idx) {}
+ virtual int Opcode() const;
+ virtual bool is_CFG() const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual const Node *is_block_proj() const { return in(0); }
+ virtual const RegMask &out_RegMask() const;
+ virtual uint ideal_reg() const { return 0; }
+};
+
+//---------------------------MultiBranchNode-----------------------------------
+// This class defines a MultiBranchNode, a MultiNode which yields multiple
+// control values. These are distinguished from other types of MultiNodes
+// which yield multiple values, but control is always and only projection #0.
+class MultiBranchNode : public MultiNode {
+public:
+ MultiBranchNode( uint required ) : MultiNode(required) {
+ init_class_id(Class_MultiBranch);
+ }
+};
+
+//------------------------------IfNode-----------------------------------------
+// Output selected Control, based on a boolean test
+class IfNode : public MultiBranchNode {
+ // Size is bigger to hold the probability field. However, _prob does not
+ // change the semantics so it does not appear in the hash & cmp functions.
+ virtual uint size_of() const { return sizeof(*this); }
+public:
+
+ // Degrees of branch prediction probability by order of magnitude:
+ // PROB_UNLIKELY_1e(N) is a 1 in 1eN chance.
+ // PROB_LIKELY_1e(N) is a 1 - PROB_UNLIKELY_1e(N)
+#define PROB_UNLIKELY_MAG(N) (1e- ## N ## f)
+#define PROB_LIKELY_MAG(N) (1.0f-PROB_UNLIKELY_MAG(N))
+
+ // Maximum and minimum branch prediction probabilties
+ // 1 in 1,000,000 (magnitude 6)
+ //
+ // Although PROB_NEVER == PROB_MIN and PROB_ALWAYS == PROB_MAX
+ // they are used to distinguish different situations:
+ //
+ // The name PROB_MAX (PROB_MIN) is for probabilities which correspond to
+ // very likely (unlikely) but with a concrete possibility of a rare
+ // contrary case. These constants would be used for pinning
+ // measurements, and as measures for assertions that have high
+ // confidence, but some evidence of occasional failure.
+ //
+ // The name PROB_ALWAYS (PROB_NEVER) is to stand for situations for which
+ // there is no evidence at all that the contrary case has ever occurred.
+
+#define PROB_NEVER PROB_UNLIKELY_MAG(6)
+#define PROB_ALWAYS PROB_LIKELY_MAG(6)
+
+#define PROB_MIN PROB_UNLIKELY_MAG(6)
+#define PROB_MAX PROB_LIKELY_MAG(6)
+
+ // Static branch prediction probabilities
+ // 1 in 10 (magnitude 1)
+#define PROB_STATIC_INFREQUENT PROB_UNLIKELY_MAG(1)
+#define PROB_STATIC_FREQUENT PROB_LIKELY_MAG(1)
+
+ // Fair probability 50/50
+#define PROB_FAIR (0.5f)
+
+ // Unknown probability sentinel
+#define PROB_UNKNOWN (-1.0f)
+
+ // Probability "constructors", to distinguish as a probability any manifest
+ // constant without a names
+#define PROB_LIKELY(x) ((float) (x))
+#define PROB_UNLIKELY(x) (1.0f - (float)(x))
+
+ // Other probabilities in use, but without a unique name, are documented
+ // here for lack of a better place:
+ //
+ // 1 in 1000 probabilities (magnitude 3):
+ // threshold for converting to conditional move
+ // likelihood of null check failure if a null HAS been seen before
+ // likelihood of slow path taken in library calls
+ //
+ // 1 in 10,000 probabilities (magnitude 4):
+ // threshold for making an uncommon trap probability more extreme
+ // threshold for for making a null check implicit
+ // likelihood of needing a gc if eden top moves during an allocation
+ // likelihood of a predicted call failure
+ //
+ // 1 in 100,000 probabilities (magnitude 5):
+ // threshold for ignoring counts when estimating path frequency
+ // likelihood of FP clipping failure
+ // likelihood of catching an exception from a try block
+ // likelihood of null check failure if a null has NOT been seen before
+ //
+ // Magic manifest probabilities such as 0.83, 0.7, ... can be found in
+ // gen_subtype_check() and catch_inline_exceptions().
+
+ float _prob; // Probability of true path being taken.
+ float _fcnt; // Frequency counter
+ IfNode( Node *control, Node *b, float p, float fcnt )
+ : MultiBranchNode(2), _prob(p), _fcnt(fcnt) {
+ init_class_id(Class_If);
+ init_req(0,control);
+ init_req(1,b);
+ }
+ virtual int Opcode() const;
+ virtual bool pinned() const { return true; }
+ virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const RegMask &out_RegMask() const;
+ void dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
+ int is_range_check(Node* &range, Node* &index, jint &offset);
+ static Node* up_one_dom(Node* curr, bool linear_only = false);
+
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+class IfTrueNode : public CProjNode {
+public:
+ IfTrueNode( IfNode *ifnode ) : CProjNode(ifnode,1) {
+ init_class_id(Class_IfTrue);
+ }
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+};
+
+class IfFalseNode : public CProjNode {
+public:
+ IfFalseNode( IfNode *ifnode ) : CProjNode(ifnode,0) {
+ init_class_id(Class_IfFalse);
+ }
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+};
+
+
+//------------------------------PCTableNode------------------------------------
+// Build an indirect branch table. Given a control and a table index,
+// control is passed to the Projection matching the table index. Used to
+// implement switch statements and exception-handling capabilities.
+// Undefined behavior if passed-in index is not inside the table.
+class PCTableNode : public MultiBranchNode {
+ virtual uint hash() const; // Target count; table size
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const { return sizeof(*this); }
+
+public:
+ const uint _size; // Number of targets
+
+ PCTableNode( Node *ctrl, Node *idx, uint size ) : MultiBranchNode(2), _size(size) {
+ init_class_id(Class_PCTable);
+ init_req(0, ctrl);
+ init_req(1, idx);
+ }
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *bottom_type() const;
+ virtual bool pinned() const { return true; }
+};
+
+//------------------------------JumpNode---------------------------------------
+// Indirect branch. Uses PCTable above to implement a switch statement.
+// It emits as a table load and local branch.
+class JumpNode : public PCTableNode {
+public:
+ JumpNode( Node* control, Node* switch_val, uint size) : PCTableNode(control, switch_val, size) {
+ init_class_id(Class_Jump);
+ }
+ virtual int Opcode() const;
+ virtual const RegMask& out_RegMask() const;
+ virtual const Node* is_block_proj() const { return this; }
+};
+
+class JumpProjNode : public JProjNode {
+ virtual uint hash() const;
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const { return sizeof(*this); }
+
+ private:
+ const int _dest_bci;
+ const uint _proj_no;
+ const int _switch_val;
+ public:
+ JumpProjNode(Node* jumpnode, uint proj_no, int dest_bci, int switch_val)
+ : JProjNode(jumpnode, proj_no), _dest_bci(dest_bci), _proj_no(proj_no), _switch_val(switch_val) {
+ init_class_id(Class_JumpProj);
+ }
+
+ virtual int Opcode() const;
+ virtual const Type* bottom_type() const { return Type::CONTROL; }
+ int dest_bci() const { return _dest_bci; }
+ int switch_val() const { return _switch_val; }
+ uint proj_no() const { return _proj_no; }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CatchNode--------------------------------------
+// Helper node to fork exceptions. "Catch" catches any exceptions thrown by
+// a just-prior call. Looks like a PCTableNode but emits no code - just the
+// table. The table lookup and branch is implemented by RethrowNode.
+class CatchNode : public PCTableNode {
+public:
+ CatchNode( Node *ctrl, Node *idx, uint size ) : PCTableNode(ctrl,idx,size){
+ init_class_id(Class_Catch);
+ }
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+// CatchProjNode controls which exception handler is targetted after a call.
+// It is passed in the bci of the target handler, or no_handler_bci in case
+// the projection doesn't lead to an exception handler.
+class CatchProjNode : public CProjNode {
+ virtual uint hash() const;
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const { return sizeof(*this); }
+
+private:
+ const int _handler_bci;
+
+public:
+ enum {
+ fall_through_index = 0, // the fall through projection index
+ catch_all_index = 1, // the projection index for catch-alls
+ no_handler_bci = -1 // the bci for fall through or catch-all projs
+ };
+
+ CatchProjNode(Node* catchnode, uint proj_no, int handler_bci)
+ : CProjNode(catchnode, proj_no), _handler_bci(handler_bci) {
+ init_class_id(Class_CatchProj);
+ assert(proj_no != fall_through_index || handler_bci < 0, "fall through case must have bci < 0");
+ }
+
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *bottom_type() const { return Type::CONTROL; }
+ int handler_bci() const { return _handler_bci; }
+ bool is_handler_proj() const { return _handler_bci >= 0; }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+
+//---------------------------------CreateExNode--------------------------------
+// Helper node to create the exception coming back from a call
+class CreateExNode : public TypeNode {
+public:
+ CreateExNode(const Type* t, Node* control, Node* i_o) : TypeNode(t, 2) {
+ init_req(0, control);
+ init_req(1, i_o);
+ }
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual bool pinned() const { return true; }
+ uint match_edge(uint idx) const { return 0; }
+ virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+//------------------------------NeverBranchNode-------------------------------
+// The never-taken branch. Used to give the appearance of exiting infinite
+// loops to those algorithms that like all paths to be reachable. Encodes
+// empty.
+class NeverBranchNode : public MultiBranchNode {
+public:
+ NeverBranchNode( Node *ctrl ) : MultiBranchNode(1) { init_req(0,ctrl); }
+ virtual int Opcode() const;
+ virtual bool pinned() const { return true; };
+ virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
+
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
+ virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
+#ifndef PRODUCT
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
diff --git a/src/share/vm/opto/chaitin.cpp b/src/share/vm/opto/chaitin.cpp
new file mode 100644
index 000000000..33ca24ba5
--- /dev/null
+++ b/src/share/vm/opto/chaitin.cpp
@@ -0,0 +1,2042 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_chaitin.cpp.incl"
+
+//=============================================================================
+
+#ifndef PRODUCT
+void LRG::dump( ) const {
+ ttyLocker ttyl;
+ tty->print("%d ",num_regs());
+ _mask.dump();
+ if( _msize_valid ) {
+ if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size);
+ else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size());
+ } else {
+ tty->print(", #?(%d) ",_mask.Size());
+ }
+
+ tty->print("EffDeg: ");
+ if( _degree_valid ) tty->print( "%d ", _eff_degree );
+ else tty->print("? ");
+
+ if( _def == NodeSentinel ) {
+ tty->print("MultiDef ");
+ if (_defs != NULL) {
+ tty->print("(");
+ for (int i = 0; i < _defs->length(); i++) {
+ tty->print("N%d ", _defs->at(i)->_idx);
+ }
+ tty->print(") ");
+ }
+ }
+ else if( _def == 0 ) tty->print("Dead ");
+ else tty->print("Def: N%d ",_def->_idx);
+
+ tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score());
+ // Flags
+ if( _is_oop ) tty->print("Oop ");
+ if( _is_float ) tty->print("Float ");
+ if( _was_spilled1 ) tty->print("Spilled ");
+ if( _was_spilled2 ) tty->print("Spilled2 ");
+ if( _direct_conflict ) tty->print("Direct_conflict ");
+ if( _fat_proj ) tty->print("Fat ");
+ if( _was_lo ) tty->print("Lo ");
+ if( _has_copy ) tty->print("Copy ");
+ if( _at_risk ) tty->print("Risk ");
+
+ if( _must_spill ) tty->print("Must_spill ");
+ if( _is_bound ) tty->print("Bound ");
+ if( _msize_valid ) {
+ if( _degree_valid && lo_degree() ) tty->print("Trivial ");
+ }
+
+ tty->cr();
+}
+#endif
+
+//------------------------------score------------------------------------------
+// Compute score from cost and area. Low score is best to spill.
+static double raw_score( double cost, double area ) {
+ return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
+}
+
+double LRG::score() const {
+ // Scale _area by RegisterCostAreaRatio/64K then subtract from cost.
+ // Bigger area lowers score, encourages spilling this live range.
+ // Bigger cost raise score, prevents spilling this live range.
+ // (Note: 1/65536 is the magic constant below; I dont trust the C optimizer
+ // to turn a divide by a constant into a multiply by the reciprical).
+ double score = raw_score( _cost, _area);
+
+ // Account for area. Basically, LRGs covering large areas are better
+ // to spill because more other LRGs get freed up.
+ if( _area == 0.0 ) // No area? Then no progress to spill
+ return 1e35;
+
+ if( _was_spilled2 ) // If spilled once before, we are unlikely
+ return score + 1e30; // to make progress again.
+
+ if( _cost >= _area*3.0 ) // Tiny area relative to cost
+ return score + 1e17; // Probably no progress to spill
+
+ if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost
+ return score + 1e10; // Likely no progress to spill
+
+ return score;
+}
+
+//------------------------------LRG_List---------------------------------------
+LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) {
+ memset( _lidxs, 0, sizeof(uint)*max );
+}
+
+void LRG_List::extend( uint nidx, uint lidx ) {
+ _nesting.check();
+ if( nidx >= _max ) {
+ uint size = 16;
+ while( size <= nidx ) size <<=1;
+ _lidxs = REALLOC_RESOURCE_ARRAY( uint, _lidxs, _max, size );
+ _max = size;
+ }
+ while( _cnt <= nidx )
+ _lidxs[_cnt++] = 0;
+ _lidxs[nidx] = lidx;
+}
+
+#define NUMBUCKS 3
+
+//------------------------------Chaitin----------------------------------------
+PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
+ : PhaseRegAlloc(unique, cfg, matcher,
+#ifndef PRODUCT
+ print_chaitin_statistics
+#else
+ NULL
+#endif
+ ),
+ _names(unique), _uf_map(unique),
+ _maxlrg(0), _live(0),
+ _spilled_once(Thread::current()->resource_area()),
+ _spilled_twice(Thread::current()->resource_area()),
+ _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0),
+ _oldphi(unique)
+#ifndef PRODUCT
+ , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
+#endif
+{
+ NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
+ uint i,j;
+ // Build a list of basic blocks, sorted by frequency
+ _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+ // Experiment with sorting strategies to speed compilation
+ double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
+ Block **buckets[NUMBUCKS]; // Array of buckets
+ uint buckcnt[NUMBUCKS]; // Array of bucket counters
+ double buckval[NUMBUCKS]; // Array of bucket value cutoffs
+ for( i = 0; i < NUMBUCKS; i++ ) {
+ buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+ buckcnt[i] = 0;
+ // Bump by three orders of magnitude each time
+ cutoff *= 0.001;
+ buckval[i] = cutoff;
+ for( j = 0; j < _cfg._num_blocks; j++ ) {
+ buckets[i][j] = NULL;
+ }
+ }
+ // Sort blocks into buckets
+ for( i = 0; i < _cfg._num_blocks; i++ ) {
+ for( j = 0; j < NUMBUCKS; j++ ) {
+ if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) {
+ // Assign block to end of list for appropriate bucket
+ buckets[j][buckcnt[j]++] = _cfg._blocks[i];
+ break; // kick out of inner loop
+ }
+ }
+ }
+ // Dump buckets into final block array
+ uint blkcnt = 0;
+ for( i = 0; i < NUMBUCKS; i++ ) {
+ for( j = 0; j < buckcnt[i]; j++ ) {
+ _blks[blkcnt++] = buckets[i][j];
+ }
+ }
+
+ assert(blkcnt == _cfg._num_blocks, "Block array not totally filled");
+}
+
+void PhaseChaitin::Register_Allocate() {
+
+ // Above the OLD FP (and in registers) are the incoming arguments. Stack
+ // slots in this area are called "arg_slots". Above the NEW FP (and in
+ // registers) is the outgoing argument area; above that is the spill/temp
+ // area. These are all "frame_slots". Arg_slots start at the zero
+ // stack_slots and count up to the known arg_size. Frame_slots start at
+ // the stack_slot #arg_size and go up. After allocation I map stack
+ // slots to actual offsets. Stack-slots in the arg_slot area are biased
+ // by the frame_size; stack-slots in the frame_slot area are biased by 0.
+
+ _trip_cnt = 0;
+ _alternate = 0;
+ _matcher._allocation_started = true;
+
+ ResourceArea live_arena; // Arena for liveness & IFG info
+ ResourceMark rm(&live_arena);
+
+ // Need live-ness for the IFG; need the IFG for coalescing. If the
+ // liveness is JUST for coalescing, then I can get some mileage by renaming
+ // all copy-related live ranges low and then using the max copy-related
+ // live range as a cut-off for LIVE and the IFG. In other words, I can
+ // build a subset of LIVE and IFG just for copies.
+ PhaseLive live(_cfg,_names,&live_arena);
+
+ // Need IFG for coalescing and coloring
+ PhaseIFG ifg( &live_arena );
+ _ifg = &ifg;
+
+ if (C->unique() > _names.Size()) _names.extend(C->unique()-1, 0);
+
+ // Come out of SSA world to the Named world. Assign (virtual) registers to
+ // Nodes. Use the same register for all inputs and the output of PhiNodes
+ // - effectively ending SSA form. This requires either coalescing live
+ // ranges or inserting copies. For the moment, we insert "virtual copies"
+ // - we pretend there is a copy prior to each Phi in predecessor blocks.
+ // We will attempt to coalesce such "virtual copies" before we manifest
+ // them for real.
+ de_ssa();
+
+ {
+ NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+ _live = NULL; // Mark live as being not available
+ rm.reset_to_mark(); // Reclaim working storage
+ IndexSet::reset_memory(C, &live_arena);
+ ifg.init(_maxlrg); // Empty IFG
+ gather_lrg_masks( false ); // Collect LRG masks
+ live.compute( _maxlrg ); // Compute liveness
+ _live = &live; // Mark LIVE as being available
+ }
+
+ // Base pointers are currently "used" by instructions which define new
+ // derived pointers. This makes base pointers live up to the where the
+ // derived pointer is made, but not beyond. Really, they need to be live
+ // across any GC point where the derived value is live. So this code looks
+ // at all the GC points, and "stretches" the live range of any base pointer
+ // to the GC point.
+ if( stretch_base_pointer_live_ranges(&live_arena) ) {
+ NOT_PRODUCT( Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler); )
+ // Since some live range stretched, I need to recompute live
+ _live = NULL;
+ rm.reset_to_mark(); // Reclaim working storage
+ IndexSet::reset_memory(C, &live_arena);
+ ifg.init(_maxlrg);
+ gather_lrg_masks( false );
+ live.compute( _maxlrg );
+ _live = &live;
+ }
+ // Create the interference graph using virtual copies
+ build_ifg_virtual( ); // Include stack slots this time
+
+ // Aggressive (but pessimistic) copy coalescing.
+ // This pass works on virtual copies. Any virtual copies which are not
+ // coalesced get manifested as actual copies
+ {
+ // The IFG is/was triangular. I am 'squaring it up' so Union can run
+ // faster. Union requires a 'for all' operation which is slow on the
+ // triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
+ // meaning I can visit all the Nodes neighbors less than a Node in time
+ // O(# of neighbors), but I have to visit all the Nodes greater than a
+ // given Node and search them for an instance, i.e., time O(#MaxLRG)).
+ _ifg->SquareUp();
+
+ PhaseAggressiveCoalesce coalesce( *this );
+ coalesce.coalesce_driver( );
+ // Insert un-coalesced copies. Visit all Phis. Where inputs to a Phi do
+ // not match the Phi itself, insert a copy.
+ coalesce.insert_copies(_matcher);
+ }
+
+ // After aggressive coalesce, attempt a first cut at coloring.
+ // To color, we need the IFG and for that we need LIVE.
+ {
+ NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+ _live = NULL;
+ rm.reset_to_mark(); // Reclaim working storage
+ IndexSet::reset_memory(C, &live_arena);
+ ifg.init(_maxlrg);
+ gather_lrg_masks( true );
+ live.compute( _maxlrg );
+ _live = &live;
+ }
+
+ // Build physical interference graph
+ uint must_spill = 0;
+ must_spill = build_ifg_physical( &live_arena );
+ // If we have a guaranteed spill, might as well spill now
+ if( must_spill ) {
+ if( !_maxlrg ) return;
+ // Bail out if unique gets too large (ie - unique > MaxNodeLimit)
+ C->check_node_count(10*must_spill, "out of nodes before split");
+ if (C->failing()) return;
+ _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere
+ // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
+ // or we failed to split
+ C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
+ if (C->failing()) return;
+
+#ifdef ASSERT
+ if( VerifyOpto ) {
+ _cfg.verify();
+ verify_base_ptrs(&live_arena);
+ }
+#endif
+ NOT_PRODUCT( C->verify_graph_edges(); )
+
+ compact(); // Compact LRGs; return new lower max lrg
+
+ {
+ NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+ _live = NULL;
+ rm.reset_to_mark(); // Reclaim working storage
+ IndexSet::reset_memory(C, &live_arena);
+ ifg.init(_maxlrg); // Build a new interference graph
+ gather_lrg_masks( true ); // Collect intersect mask
+ live.compute( _maxlrg ); // Compute LIVE
+ _live = &live;
+ }
+ build_ifg_physical( &live_arena );
+ _ifg->SquareUp();
+ _ifg->Compute_Effective_Degree();
+ // Only do conservative coalescing if requested
+ if( OptoCoalesce ) {
+ // Conservative (and pessimistic) copy coalescing of those spills
+ PhaseConservativeCoalesce coalesce( *this );
+ // If max live ranges greater than cutoff, don't color the stack.
+ // This cutoff can be larger than below since it is only done once.
+ coalesce.coalesce_driver( );
+ }
+ compress_uf_map_for_nodes();
+
+#ifdef ASSERT
+ if( VerifyOpto ) _ifg->verify(this);
+#endif
+ } else {
+ ifg.SquareUp();
+ ifg.Compute_Effective_Degree();
+#ifdef ASSERT
+ set_was_low();
+#endif
+ }
+
+ // Prepare for Simplify & Select
+ cache_lrg_info(); // Count degree of LRGs
+
+ // Simplify the InterFerence Graph by removing LRGs of low degree.
+ // LRGs of low degree are trivially colorable.
+ Simplify();
+
+ // Select colors by re-inserting LRGs back into the IFG in reverse order.
+ // Return whether or not something spills.
+ uint spills = Select( );
+
+ // If we spill, split and recycle the entire thing
+ while( spills ) {
+ if( _trip_cnt++ > 24 ) {
+ DEBUG_ONLY( dump_for_spill_split_recycle(); )
+ if( _trip_cnt > 27 ) {
+ C->record_method_not_compilable("failed spill-split-recycle sanity check");
+ return;
+ }
+ }
+
+ if( !_maxlrg ) return;
+ _maxlrg = Split( _maxlrg ); // Split spilling LRG everywhere
+ // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
+ C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split");
+ if (C->failing()) return;
+#ifdef ASSERT
+ if( VerifyOpto ) {
+ _cfg.verify();
+ verify_base_ptrs(&live_arena);
+ }
+#endif
+
+ compact(); // Compact LRGs; return new lower max lrg
+
+ // Nuke the live-ness and interference graph and LiveRanGe info
+ {
+ NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+ _live = NULL;
+ rm.reset_to_mark(); // Reclaim working storage
+ IndexSet::reset_memory(C, &live_arena);
+ ifg.init(_maxlrg);
+
+ // Create LiveRanGe array.
+ // Intersect register masks for all USEs and DEFs
+ gather_lrg_masks( true );
+ live.compute( _maxlrg );
+ _live = &live;
+ }
+ must_spill = build_ifg_physical( &live_arena );
+ _ifg->SquareUp();
+ _ifg->Compute_Effective_Degree();
+
+ // Only do conservative coalescing if requested
+ if( OptoCoalesce ) {
+ // Conservative (and pessimistic) copy coalescing
+ PhaseConservativeCoalesce coalesce( *this );
+ // Check for few live ranges determines how aggressive coalesce is.
+ coalesce.coalesce_driver( );
+ }
+ compress_uf_map_for_nodes();
+#ifdef ASSERT
+ if( VerifyOpto ) _ifg->verify(this);
+#endif
+ cache_lrg_info(); // Count degree of LRGs
+
+ // Simplify the InterFerence Graph by removing LRGs of low degree.
+ // LRGs of low degree are trivially colorable.
+ Simplify();
+
+ // Select colors by re-inserting LRGs back into the IFG in reverse order.
+ // Return whether or not something spills.
+ spills = Select( );
+ }
+
+ // Count number of Simplify-Select trips per coloring success.
+ _allocator_attempts += _trip_cnt + 1;
+ _allocator_successes += 1;
+
+ // Peephole remove copies
+ post_allocate_copy_removal();
+
+ // max_reg is past the largest *register* used.
+ // Convert that to a frame_slot number.
+ if( _max_reg <= _matcher._new_SP )
+ _framesize = C->out_preserve_stack_slots();
+ else _framesize = _max_reg -_matcher._new_SP;
+ assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough");
+
+ // This frame must preserve the required fp alignment
+ const int stack_alignment_in_words = Matcher::stack_alignment_in_slots();
+ if (stack_alignment_in_words > 0)
+ _framesize = round_to(_framesize, Matcher::stack_alignment_in_bytes());
+ assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" );
+#ifndef PRODUCT
+ _total_framesize += _framesize;
+ if( (int)_framesize > _max_framesize )
+ _max_framesize = _framesize;
+#endif
+
+ // Convert CISC spills
+ fixup_spills();
+
+ // Log regalloc results
+ CompileLog* log = Compile::current()->log();
+ if (log != NULL) {
+ log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing());
+ }
+
+ if (C->failing()) return;
+
+ NOT_PRODUCT( C->verify_graph_edges(); )
+
+ // Move important info out of the live_arena to longer lasting storage.
+ alloc_node_regs(_names.Size());
+ for( uint i=0; i < _names.Size(); i++ ) {
+ if( _names[i] ) { // Live range associated with Node?
+ LRG &lrg = lrgs( _names[i] );
+ if( lrg.num_regs() == 1 ) {
+ _node_regs[i].set1( lrg.reg() );
+ } else { // Must be a register-pair
+ if( !lrg._fat_proj ) { // Must be aligned adjacent register pair
+ // Live ranges record the highest register in their mask.
+ // We want the low register for the AD file writer's convenience.
+ _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) );
+ } else { // Misaligned; extract 2 bits
+ OptoReg::Name hi = lrg.reg(); // Get hi register
+ lrg.Remove(hi); // Yank from mask
+ int lo = lrg.mask().find_first_elem(); // Find lo
+ _node_regs[i].set_pair( hi, lo );
+ }
+ }
+ if( lrg._is_oop ) _node_oops.set(i);
+ } else {
+ _node_regs[i].set_bad();
+ }
+ }
+
+ // Done!
+ _live = NULL;
+ _ifg = NULL;
+ C->set_indexSet_arena(NULL); // ResourceArea is at end of scope
+}
+
+//------------------------------de_ssa-----------------------------------------
+void PhaseChaitin::de_ssa() {
+ // Set initial Names for all Nodes. Most Nodes get the virtual register
+ // number. A few get the ZERO live range number. These do not
+ // get allocated, but instead rely on correct scheduling to ensure that
+ // only one instance is simultaneously live at a time.
+ uint lr_counter = 1;
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+ uint cnt = b->_nodes.size();
+
+ // Handle all the normal Nodes in the block
+ for( uint j = 0; j < cnt; j++ ) {
+ Node *n = b->_nodes[j];
+ // Pre-color to the zero live range, or pick virtual register
+ const RegMask &rm = n->out_RegMask();
+ _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 );
+ }
+ }
+ // Reset the Union-Find mapping to be identity
+ reset_uf_map(lr_counter);
+}
+
+
+//------------------------------gather_lrg_masks-------------------------------
+// Gather LiveRanGe information, including register masks. Modification of
+// cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
+void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
+
+ // Nail down the frame pointer live range
+ uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr));
+ lrgs(fp_lrg)._cost += 1e12; // Cost is infinite
+
+ // For all blocks
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+
+ // For all instructions
+ for( uint j = 1; j < b->_nodes.size(); j++ ) {
+ Node *n = b->_nodes[j];
+ uint input_edge_start =1; // Skip control most nodes
+ if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base();
+ uint idx = n->is_Copy();
+
+ // Get virtual register number, same as LiveRanGe index
+ uint vreg = n2lidx(n);
+ LRG &lrg = lrgs(vreg);
+ if( vreg ) { // No vreg means un-allocable (e.g. memory)
+
+ // Collect has-copy bit
+ if( idx ) {
+ lrg._has_copy = 1;
+ uint clidx = n2lidx(n->in(idx));
+ LRG &copy_src = lrgs(clidx);
+ copy_src._has_copy = 1;
+ }
+
+ // Check for float-vs-int live range (used in register-pressure
+ // calculations)
+ const Type *n_type = n->bottom_type();
+ if( n_type->is_floatingpoint() )
+ lrg._is_float = 1;
+
+ // Check for twice prior spilling. Once prior spilling might have
+ // spilled 'soft', 2nd prior spill should have spilled 'hard' and
+ // further spilling is unlikely to make progress.
+ if( _spilled_once.test(n->_idx) ) {
+ lrg._was_spilled1 = 1;
+ if( _spilled_twice.test(n->_idx) )
+ lrg._was_spilled2 = 1;
+ }
+
+#ifndef PRODUCT
+ if (trace_spilling() && lrg._def != NULL) {
+ // collect defs for MultiDef printing
+ if (lrg._defs == NULL) {
+ lrg._defs = new (_ifg->_arena) GrowableArray<Node*>();
+ lrg._defs->append(lrg._def);
+ }
+ lrg._defs->append(n);
+ }
+#endif
+
+ // Check for a single def LRG; these can spill nicely
+ // via rematerialization. Flag as NULL for no def found
+ // yet, or 'n' for single def or -1 for many defs.
+ lrg._def = lrg._def ? NodeSentinel : n;
+
+ // Limit result register mask to acceptable registers
+ const RegMask &rm = n->out_RegMask();
+ lrg.AND( rm );
+ // Check for bound register masks
+ const RegMask &lrgmask = lrg.mask();
+ if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+ lrg._is_bound = 1;
+
+ // Check for maximum frequency value
+ if( lrg._maxfreq < b->_freq )
+ lrg._maxfreq = b->_freq;
+
+ int ireg = n->ideal_reg();
+ assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
+ "oops must be in Op_RegP's" );
+ // Check for oop-iness, or long/double
+ // Check for multi-kill projection
+ switch( ireg ) {
+ case MachProjNode::fat_proj:
+ // Fat projections have size equal to number of registers killed
+ lrg.set_num_regs(rm.Size());
+ lrg.set_reg_pressure(lrg.num_regs());
+ lrg._fat_proj = 1;
+ lrg._is_bound = 1;
+ break;
+ case Op_RegP:
+#ifdef _LP64
+ lrg.set_num_regs(2); // Size is 2 stack words
+#else
+ lrg.set_num_regs(1); // Size is 1 stack word
+#endif
+ // Register pressure is tracked relative to the maximum values
+ // suggested for that platform, INTPRESSURE and FLOATPRESSURE,
+ // and relative to other types which compete for the same regs.
+ //
+ // The following table contains suggested values based on the
+ // architectures as defined in each .ad file.
+ // INTPRESSURE and FLOATPRESSURE may be tuned differently for
+ // compile-speed or performance.
+ // Note1:
+ // SPARC and SPARCV9 reg_pressures are at 2 instead of 1
+ // since .ad registers are defined as high and low halves.
+ // These reg_pressure values remain compatible with the code
+ // in is_high_pressure() which relates get_invalid_mask_size(),
+ // Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE.
+ // Note2:
+ // SPARC -d32 has 24 registers available for integral values,
+ // but only 10 of these are safe for 64-bit longs.
+ // Using set_reg_pressure(2) for both int and long means
+ // the allocator will believe it can fit 26 longs into
+ // registers. Using 2 for longs and 1 for ints means the
+ // allocator will attempt to put 52 integers into registers.
+ // The settings below limit this problem to methods with
+ // many long values which are being run on 32-bit SPARC.
+ //
+ // ------------------- reg_pressure --------------------
+ // Each entry is reg_pressure_per_value,number_of_regs
+ // RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE
+ // IA32 2 1 1 1 1 6 6
+ // IA64 1 1 1 1 1 50 41
+ // SPARC 2 2 2 2 2 48 (24) 52 (26)
+ // SPARCV9 2 2 2 2 2 48 (24) 52 (26)
+ // AMD64 1 1 1 1 1 14 15
+ // -----------------------------------------------------
+#if defined(SPARC)
+ lrg.set_reg_pressure(2); // use for v9 as well
+#else
+ lrg.set_reg_pressure(1); // normally one value per register
+#endif
+ if( n_type->isa_oop_ptr() ) {
+ lrg._is_oop = 1;
+ }
+ break;
+ case Op_RegL: // Check for long or double
+ case Op_RegD:
+ lrg.set_num_regs(2);
+ // Define platform specific register pressure
+#ifdef SPARC
+ lrg.set_reg_pressure(2);
+#elif defined(IA32)
+ if( ireg == Op_RegL ) {
+ lrg.set_reg_pressure(2);
+ } else {
+ lrg.set_reg_pressure(1);
+ }
+#else
+ lrg.set_reg_pressure(1); // normally one value per register
+#endif
+ // If this def of a double forces a mis-aligned double,
+ // flag as '_fat_proj' - really flag as allowing misalignment
+ // AND changes how we count interferences. A mis-aligned
+ // double can interfere with TWO aligned pairs, or effectively
+ // FOUR registers!
+ if( rm.is_misaligned_Pair() ) {
+ lrg._fat_proj = 1;
+ lrg._is_bound = 1;
+ }
+ break;
+ case Op_RegF:
+ case Op_RegI:
+ case Op_RegFlags:
+ case 0: // not an ideal register
+ lrg.set_num_regs(1);
+#ifdef SPARC
+ lrg.set_reg_pressure(2);
+#else
+ lrg.set_reg_pressure(1);
+#endif
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ // Now do the same for inputs
+ uint cnt = n->req();
+ // Setup for CISC SPILLING
+ uint inp = (uint)AdlcVMDeps::Not_cisc_spillable;
+ if( UseCISCSpill && after_aggressive ) {
+ inp = n->cisc_operand();
+ if( inp != (uint)AdlcVMDeps::Not_cisc_spillable )
+ // Convert operand number to edge index number
+ inp = n->as_Mach()->operand_index(inp);
+ }
+ // Prepare register mask for each input
+ for( uint k = input_edge_start; k < cnt; k++ ) {
+ uint vreg = n2lidx(n->in(k));
+ if( !vreg ) continue;
+
+ // If this instruction is CISC Spillable, add the flags
+ // bit to its appropriate input
+ if( UseCISCSpill && after_aggressive && inp == k ) {
+#ifndef PRODUCT
+ if( TraceCISCSpill ) {
+ tty->print(" use_cisc_RegMask: ");
+ n->dump();
+ }
+#endif
+ n->as_Mach()->use_cisc_RegMask();
+ }
+
+ LRG &lrg = lrgs(vreg);
+ // // Testing for floating point code shape
+ // Node *test = n->in(k);
+ // if( test->is_Mach() ) {
+ // MachNode *m = test->as_Mach();
+ // int op = m->ideal_Opcode();
+ // if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) {
+ // int zzz = 1;
+ // }
+ // }
+
+ // Limit result register mask to acceptable registers.
+ // Do not limit registers from uncommon uses before
+ // AggressiveCoalesce. This effectively pre-virtual-splits
+ // around uncommon uses of common defs.
+ const RegMask &rm = n->in_RegMask(k);
+ if( !after_aggressive &&
+ _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) {
+ // Since we are BEFORE aggressive coalesce, leave the register
+ // mask untrimmed by the call. This encourages more coalescing.
+ // Later, AFTER aggressive, this live range will have to spill
+ // but the spiller handles slow-path calls very nicely.
+ } else {
+ lrg.AND( rm );
+ }
+ // Check for bound register masks
+ const RegMask &lrgmask = lrg.mask();
+ if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+ lrg._is_bound = 1;
+ // If this use of a double forces a mis-aligned double,
+ // flag as '_fat_proj' - really flag as allowing misalignment
+ // AND changes how we count interferences. A mis-aligned
+ // double can interfere with TWO aligned pairs, or effectively
+ // FOUR registers!
+ if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) {
+ lrg._fat_proj = 1;
+ lrg._is_bound = 1;
+ }
+ // if the LRG is an unaligned pair, we will have to spill
+ // so clear the LRG's register mask if it is not already spilled
+ if ( !n->is_SpillCopy() &&
+ (lrg._def == NULL || lrg._def == NodeSentinel || !lrg._def->is_SpillCopy()) &&
+ lrgmask.is_misaligned_Pair()) {
+ lrg.Clear();
+ }
+
+ // Check for maximum frequency value
+ if( lrg._maxfreq < b->_freq )
+ lrg._maxfreq = b->_freq;
+
+ } // End for all allocated inputs
+ } // end for all instructions
+ } // end for all blocks
+
+ // Final per-liverange setup
+ for( uint i2=0; i2<_maxlrg; i2++ ) {
+ LRG &lrg = lrgs(i2);
+ if( lrg.num_regs() == 2 && !lrg._fat_proj )
+ lrg.ClearToPairs();
+ lrg.compute_set_mask_size();
+ if( lrg.not_free() ) { // Handle case where we lose from the start
+ lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
+ lrg._direct_conflict = 1;
+ }
+ lrg.set_degree(0); // no neighbors in IFG yet
+ }
+}
+
+//------------------------------set_was_low------------------------------------
+// Set the was-lo-degree bit. Conservative coalescing should not change the
+// colorability of the graph. If any live range was of low-degree before
+// coalescing, it should Simplify. This call sets the was-lo-degree bit.
+// The bit is checked in Simplify.
+void PhaseChaitin::set_was_low() {
+#ifdef ASSERT
+ for( uint i = 1; i < _maxlrg; i++ ) {
+ int size = lrgs(i).num_regs();
+ uint old_was_lo = lrgs(i)._was_lo;
+ lrgs(i)._was_lo = 0;
+ if( lrgs(i).lo_degree() ) {
+ lrgs(i)._was_lo = 1; // Trivially of low degree
+ } else { // Else check the Brigg's assertion
+ // Brigg's observation is that the lo-degree neighbors of a
+ // hi-degree live range will not interfere with the color choices
+ // of said hi-degree live range. The Simplify reverse-stack-coloring
+ // order takes care of the details. Hence you do not have to count
+ // low-degree neighbors when determining if this guy colors.
+ int briggs_degree = 0;
+ IndexSet *s = _ifg->neighbors(i);
+ IndexSetIterator elements(s);
+ uint lidx;
+ while((lidx = elements.next()) != 0) {
+ if( !lrgs(lidx).lo_degree() )
+ briggs_degree += MAX2(size,lrgs(lidx).num_regs());
+ }
+ if( briggs_degree < lrgs(i).degrees_of_freedom() )
+ lrgs(i)._was_lo = 1; // Low degree via the briggs assertion
+ }
+ assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease");
+ }
+#endif
+}
+
+#define REGISTER_CONSTRAINED 16
+
+//------------------------------cache_lrg_info---------------------------------
+// Compute cost/area ratio, in case we spill. Build the lo-degree list.
+void PhaseChaitin::cache_lrg_info( ) {
+
+ for( uint i = 1; i < _maxlrg; i++ ) {
+ LRG &lrg = lrgs(i);
+
+ // Check for being of low degree: means we can be trivially colored.
+ // Low degree, dead or must-spill guys just get to simplify right away
+ if( lrg.lo_degree() ||
+ !lrg.alive() ||
+ lrg._must_spill ) {
+ // Split low degree list into those guys that must get a
+ // register and those that can go to register or stack.
+ // The idea is LRGs that can go register or stack color first when
+ // they have a good chance of getting a register. The register-only
+ // lo-degree live ranges always get a register.
+ OptoReg::Name hi_reg = lrg.mask().find_last_elem();
+ if( OptoReg::is_stack(hi_reg)) { // Can go to stack?
+ lrg._next = _lo_stk_degree;
+ _lo_stk_degree = i;
+ } else {
+ lrg._next = _lo_degree;
+ _lo_degree = i;
+ }
+ } else { // Else high degree
+ lrgs(_hi_degree)._prev = i;
+ lrg._next = _hi_degree;
+ lrg._prev = 0;
+ _hi_degree = i;
+ }
+ }
+}
+
+//------------------------------Pre-Simplify-----------------------------------
+// Simplify the IFG by removing LRGs of low degree that have NO copies
+void PhaseChaitin::Pre_Simplify( ) {
+
+ // Warm up the lo-degree no-copy list
+ int lo_no_copy = 0;
+ for( uint i = 1; i < _maxlrg; i++ ) {
+ if( (lrgs(i).lo_degree() && !lrgs(i)._has_copy) ||
+ !lrgs(i).alive() ||
+ lrgs(i)._must_spill ) {
+ lrgs(i)._next = lo_no_copy;
+ lo_no_copy = i;
+ }
+ }
+
+ while( lo_no_copy ) {
+ uint lo = lo_no_copy;
+ lo_no_copy = lrgs(lo)._next;
+ int size = lrgs(lo).num_regs();
+
+ // Put the simplified guy on the simplified list.
+ lrgs(lo)._next = _simplified;
+ _simplified = lo;
+
+ // Yank this guy from the IFG.
+ IndexSet *adj = _ifg->remove_node( lo );
+
+ // If any neighbors' degrees fall below their number of
+ // allowed registers, then put that neighbor on the low degree
+ // list. Note that 'degree' can only fall and 'numregs' is
+ // unchanged by this action. Thus the two are equal at most once,
+ // so LRGs hit the lo-degree worklists at most once.
+ IndexSetIterator elements(adj);
+ uint neighbor;
+ while ((neighbor = elements.next()) != 0) {
+ LRG *n = &lrgs(neighbor);
+ assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
+
+ // Check for just becoming of-low-degree
+ if( n->just_lo_degree() && !n->_has_copy ) {
+ assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
+ // Put on lo-degree list
+ n->_next = lo_no_copy;
+ lo_no_copy = neighbor;
+ }
+ }
+ } // End of while lo-degree no_copy worklist not empty
+
+ // No more lo-degree no-copy live ranges to simplify
+}
+
+//------------------------------Simplify---------------------------------------
+// Simplify the IFG by removing LRGs of low degree.
+void PhaseChaitin::Simplify( ) {
+
+ while( 1 ) { // Repeat till simplified it all
+ // May want to explore simplifying lo_degree before _lo_stk_degree.
+ // This might result in more spills coloring into registers during
+ // Select().
+ while( _lo_degree || _lo_stk_degree ) {
+ // If possible, pull from lo_stk first
+ uint lo;
+ if( _lo_degree ) {
+ lo = _lo_degree;
+ _lo_degree = lrgs(lo)._next;
+ } else {
+ lo = _lo_stk_degree;
+ _lo_stk_degree = lrgs(lo)._next;
+ }
+
+ // Put the simplified guy on the simplified list.
+ lrgs(lo)._next = _simplified;
+ _simplified = lo;
+ // If this guy is "at risk" then mark his current neighbors
+ if( lrgs(lo)._at_risk ) {
+ IndexSetIterator elements(_ifg->neighbors(lo));
+ uint datum;
+ while ((datum = elements.next()) != 0) {
+ lrgs(datum)._risk_bias = lo;
+ }
+ }
+
+ // Yank this guy from the IFG.
+ IndexSet *adj = _ifg->remove_node( lo );
+
+ // If any neighbors' degrees fall below their number of
+ // allowed registers, then put that neighbor on the low degree
+ // list. Note that 'degree' can only fall and 'numregs' is
+ // unchanged by this action. Thus the two are equal at most once,
+ // so LRGs hit the lo-degree worklist at most once.
+ IndexSetIterator elements(adj);
+ uint neighbor;
+ while ((neighbor = elements.next()) != 0) {
+ LRG *n = &lrgs(neighbor);
+#ifdef ASSERT
+ if( VerifyOpto ) {
+ assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
+ }
+#endif
+
+ // Check for just becoming of-low-degree just counting registers.
+ // _must_spill live ranges are already on the low degree list.
+ if( n->just_lo_degree() && !n->_must_spill ) {
+ assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
+ // Pull from hi-degree list
+ uint prev = n->_prev;
+ uint next = n->_next;
+ if( prev ) lrgs(prev)._next = next;
+ else _hi_degree = next;
+ lrgs(next)._prev = prev;
+ n->_next = _lo_degree;
+ _lo_degree = neighbor;
+ }
+ }
+ } // End of while lo-degree/lo_stk_degree worklist not empty
+
+ // Check for got everything: is hi-degree list empty?
+ if( !_hi_degree ) break;
+
+ // Time to pick a potential spill guy
+ uint lo_score = _hi_degree;
+ double score = lrgs(lo_score).score();
+ double area = lrgs(lo_score)._area;
+
+ // Find cheapest guy
+ debug_only( int lo_no_simplify=0; );
+ for( uint i = _hi_degree; i; i = lrgs(i)._next ) {
+ assert( !(*_ifg->_yanked)[i], "" );
+ // It's just vaguely possible to move hi-degree to lo-degree without
+ // going through a just-lo-degree stage: If you remove a double from
+ // a float live range it's degree will drop by 2 and you can skip the
+ // just-lo-degree stage. It's very rare (shows up after 5000+ methods
+ // in -Xcomp of Java2Demo). So just choose this guy to simplify next.
+ if( lrgs(i).lo_degree() ) {
+ lo_score = i;
+ break;
+ }
+ debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; );
+ double iscore = lrgs(i).score();
+ double iarea = lrgs(i)._area;
+
+ // Compare cost/area of i vs cost/area of lo_score. Smaller cost/area
+ // wins. Ties happen because all live ranges in question have spilled
+ // a few times before and the spill-score adds a huge number which
+ // washes out the low order bits. We are choosing the lesser of 2
+ // evils; in this case pick largest area to spill.
+ if( iscore < score ||
+ (iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) ) {
+ lo_score = i;
+ score = iscore;
+ area = iarea;
+ }
+ }
+ LRG *lo_lrg = &lrgs(lo_score);
+ // The live range we choose for spilling is either hi-degree, or very
+ // rarely it can be low-degree. If we choose a hi-degree live range
+ // there better not be any lo-degree choices.
+ assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" );
+
+ // Pull from hi-degree list
+ uint prev = lo_lrg->_prev;
+ uint next = lo_lrg->_next;
+ if( prev ) lrgs(prev)._next = next;
+ else _hi_degree = next;
+ lrgs(next)._prev = prev;
+ // Jam him on the lo-degree list, despite his high degree.
+ // Maybe he'll get a color, and maybe he'll spill.
+ // Only Select() will know.
+ lrgs(lo_score)._at_risk = true;
+ _lo_degree = lo_score;
+ lo_lrg->_next = 0;
+
+ } // End of while not simplified everything
+
+}
+
+//------------------------------bias_color-------------------------------------
+// Choose a color using the biasing heuristic
+OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
+
+ // Check for "at_risk" LRG's
+ uint risk_lrg = Find(lrg._risk_bias);
+ if( risk_lrg != 0 ) {
+ // Walk the colored neighbors of the "at_risk" candidate
+ // Choose a color which is both legal and already taken by a neighbor
+ // of the "at_risk" candidate in order to improve the chances of the
+ // "at_risk" candidate of coloring
+ IndexSetIterator elements(_ifg->neighbors(risk_lrg));
+ uint datum;
+ while ((datum = elements.next()) != 0) {
+ OptoReg::Name reg = lrgs(datum).reg();
+ // If this LRG's register is legal for us, choose it
+ if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
+ lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
+ (lrg.num_regs()==1 || // either size 1
+ (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs)
+ return reg;
+ }
+ }
+
+ uint copy_lrg = Find(lrg._copy_bias);
+ if( copy_lrg != 0 ) {
+ // If he has a color,
+ if( !(*(_ifg->_yanked))[copy_lrg] ) {
+ OptoReg::Name reg = lrgs(copy_lrg).reg();
+ // And it is legal for you,
+ if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
+ lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
+ (lrg.num_regs()==1 || // either size 1
+ (reg&1) == 1) ) // or aligned (adjacent reg is available since we already cleared-to-pairs)
+ return reg;
+ } else if( chunk == 0 ) {
+ // Choose a color which is legal for him
+ RegMask tempmask = lrg.mask();
+ tempmask.AND(lrgs(copy_lrg).mask());
+ OptoReg::Name reg;
+ if( lrg.num_regs() == 1 ) {
+ reg = tempmask.find_first_elem();
+ } else {
+ tempmask.ClearToPairs();
+ reg = tempmask.find_first_pair();
+ }
+ if( OptoReg::is_valid(reg) )
+ return reg;
+ }
+ }
+
+ // If no bias info exists, just go with the register selection ordering
+ if( lrg.num_regs() == 2 ) {
+ // Find an aligned pair
+ return OptoReg::add(lrg.mask().find_first_pair(),chunk);
+ }
+
+ // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
+ // copy removal to remove many more copies, by preventing a just-assigned
+ // register from being repeatedly assigned.
+ OptoReg::Name reg = lrg.mask().find_first_elem();
+ if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {
+ // This 'Remove; find; Insert' idiom is an expensive way to find the
+ // SECOND element in the mask.
+ lrg.Remove(reg);
+ OptoReg::Name reg2 = lrg.mask().find_first_elem();
+ lrg.Insert(reg);
+ if( OptoReg::is_reg(reg2))
+ reg = reg2;
+ }
+ return OptoReg::add( reg, chunk );
+}
+
+//------------------------------choose_color-----------------------------------
+// Choose a color in the current chunk
+OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
+ assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
+ assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)");
+
+ if( lrg.num_regs() == 1 || // Common Case
+ !lrg._fat_proj ) // Aligned+adjacent pairs ok
+ // Use a heuristic to "bias" the color choice
+ return bias_color(lrg, chunk);
+
+ assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
+
+ // Fat-proj case or misaligned double argument.
+ assert(lrg.compute_mask_size() == lrg.num_regs() ||
+ lrg.num_regs() == 2,"fat projs exactly color" );
+ assert( !chunk, "always color in 1st chunk" );
+ // Return the highest element in the set.
+ return lrg.mask().find_last_elem();
+}
+
+//------------------------------Select-----------------------------------------
+// Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted
+// in reverse order of removal. As long as nothing of hi-degree was yanked,
+// everything going back is guaranteed a color. Select that color. If some
+// hi-degree LRG cannot get a color then we record that we must spill.
+uint PhaseChaitin::Select( ) {
+ uint spill_reg = LRG::SPILL_REG;
+ _max_reg = OptoReg::Name(0); // Past max register used
+ while( _simplified ) {
+ // Pull next LRG from the simplified list - in reverse order of removal
+ uint lidx = _simplified;
+ LRG *lrg = &lrgs(lidx);
+ _simplified = lrg->_next;
+
+
+#ifndef PRODUCT
+ if (trace_spilling()) {
+ ttyLocker ttyl;
+ tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
+ lrg->degrees_of_freedom());
+ lrg->dump();
+ }
+#endif
+
+ // Re-insert into the IFG
+ _ifg->re_insert(lidx);
+ if( !lrg->alive() ) continue;
+ // capture allstackedness flag before mask is hacked
+ const int is_allstack = lrg->mask().is_AllStack();
+
+ // Yeah, yeah, yeah, I know, I know. I can refactor this
+ // to avoid the GOTO, although the refactored code will not
+ // be much clearer. We arrive here IFF we have a stack-based
+ // live range that cannot color in the current chunk, and it
+ // has to move into the next free stack chunk.
+ int chunk = 0; // Current chunk is first chunk
+ retry_next_chunk:
+
+ // Remove neighbor colors
+ IndexSet *s = _ifg->neighbors(lidx);
+
+ debug_only(RegMask orig_mask = lrg->mask();)
+ IndexSetIterator elements(s);
+ uint neighbor;
+ while ((neighbor = elements.next()) != 0) {
+ // Note that neighbor might be a spill_reg. In this case, exclusion
+ // of its color will be a no-op, since the spill_reg chunk is in outer
+ // space. Also, if neighbor is in a different chunk, this exclusion
+ // will be a no-op. (Later on, if lrg runs out of possible colors in
+ // its chunk, a new chunk of color may be tried, in which case
+ // examination of neighbors is started again, at retry_next_chunk.)
+ LRG &nlrg = lrgs(neighbor);
+ OptoReg::Name nreg = nlrg.reg();
+ // Only subtract masks in the same chunk
+ if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) {
+#ifndef PRODUCT
+ uint size = lrg->mask().Size();
+ RegMask rm = lrg->mask();
+#endif
+ lrg->SUBTRACT(nlrg.mask());
+#ifndef PRODUCT
+ if (trace_spilling() && lrg->mask().Size() != size) {
+ ttyLocker ttyl;
+ tty->print("L%d ", lidx);
+ rm.dump();
+ tty->print(" intersected L%d ", neighbor);
+ nlrg.mask().dump();
+ tty->print(" removed ");
+ rm.SUBTRACT(lrg->mask());
+ rm.dump();
+ tty->print(" leaving ");
+ lrg->mask().dump();
+ tty->cr();
+ }
+#endif
+ }
+ }
+ //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
+ // Aligned pairs need aligned masks
+ if( lrg->num_regs() == 2 && !lrg->_fat_proj )
+ lrg->ClearToPairs();
+
+ // Check if a color is available and if so pick the color
+ OptoReg::Name reg = choose_color( *lrg, chunk );
+#ifdef SPARC
+ debug_only(lrg->compute_set_mask_size());
+ assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
+#endif
+
+ //---------------
+ // If we fail to color and the AllStack flag is set, trigger
+ // a chunk-rollover event
+ if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) {
+ // Bump register mask up to next stack chunk
+ chunk += RegMask::CHUNK_SIZE;
+ lrg->Set_All();
+
+ goto retry_next_chunk;
+ }
+
+ //---------------
+ // Did we get a color?
+ else if( OptoReg::is_valid(reg)) {
+#ifndef PRODUCT
+ RegMask avail_rm = lrg->mask();
+#endif
+
+ // Record selected register
+ lrg->set_reg(reg);
+
+ if( reg >= _max_reg ) // Compute max register limit
+ _max_reg = OptoReg::add(reg,1);
+ // Fold reg back into normal space
+ reg = OptoReg::add(reg,-chunk);
+
+ // If the live range is not bound, then we actually had some choices
+ // to make. In this case, the mask has more bits in it than the colors
+ // choosen. Restrict the mask to just what was picked.
+ if( lrg->num_regs() == 1 ) { // Size 1 live range
+ lrg->Clear(); // Clear the mask
+ lrg->Insert(reg); // Set regmask to match selected reg
+ lrg->set_mask_size(1);
+ } else if( !lrg->_fat_proj ) {
+ // For pairs, also insert the low bit of the pair
+ assert( lrg->num_regs() == 2, "unbound fatproj???" );
+ lrg->Clear(); // Clear the mask
+ lrg->Insert(reg); // Set regmask to match selected reg
+ lrg->Insert(OptoReg::add(reg,-1));
+ lrg->set_mask_size(2);
+ } else { // Else fatproj
+ // mask must be equal to fatproj bits, by definition
+ }
+#ifndef PRODUCT
+ if (trace_spilling()) {
+ ttyLocker ttyl;
+ tty->print("L%d selected ", lidx);
+ lrg->mask().dump();
+ tty->print(" from ");
+ avail_rm.dump();
+ tty->cr();
+ }
+#endif
+ // Note that reg is the highest-numbered register in the newly-bound mask.
+ } // end color available case
+
+ //---------------
+ // Live range is live and no colors available
+ else {
+ assert( lrg->alive(), "" );
+ assert( !lrg->_fat_proj || lrg->_def == NodeSentinel ||
+ lrg->_def->outcnt() > 0, "fat_proj cannot spill");
+ assert( !orig_mask.is_AllStack(), "All Stack does not spill" );
+
+ // Assign the special spillreg register
+ lrg->set_reg(OptoReg::Name(spill_reg++));
+ // Do not empty the regmask; leave mask_size lying around
+ // for use during Spilling
+#ifndef PRODUCT
+ if( trace_spilling() ) {
+ ttyLocker ttyl;
+ tty->print("L%d spilling with neighbors: ", lidx);
+ s->dump();
+ debug_only(tty->print(" original mask: "));
+ debug_only(orig_mask.dump());
+ dump_lrg(lidx);
+ }
+#endif
+ } // end spill case
+
+ }
+
+ return spill_reg-LRG::SPILL_REG; // Return number of spills
+}
+
+
+//------------------------------copy_was_spilled-------------------------------
+// Copy 'was_spilled'-edness from the source Node to the dst Node.
+void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
+ if( _spilled_once.test(src->_idx) ) {
+ _spilled_once.set(dst->_idx);
+ lrgs(Find(dst))._was_spilled1 = 1;
+ if( _spilled_twice.test(src->_idx) ) {
+ _spilled_twice.set(dst->_idx);
+ lrgs(Find(dst))._was_spilled2 = 1;
+ }
+ }
+}
+
+//------------------------------set_was_spilled--------------------------------
+// Set the 'spilled_once' or 'spilled_twice' flag on a node.
+void PhaseChaitin::set_was_spilled( Node *n ) {
+ if( _spilled_once.test_set(n->_idx) )
+ _spilled_twice.set(n->_idx);
+}
+
+//------------------------------fixup_spills-----------------------------------
+// Convert Ideal spill instructions into proper FramePtr + offset Loads and
+// Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are.
+void PhaseChaitin::fixup_spills() {
+ // This function does only cisc spill work.
+ if( !UseCISCSpill ) return;
+
+ NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
+
+ // Grab the Frame Pointer
+ Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr);
+
+ // For all blocks
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+
+ // For all instructions in block
+ uint last_inst = b->end_idx();
+ for( uint j = 1; j <= last_inst; j++ ) {
+ Node *n = b->_nodes[j];
+
+ // Dead instruction???
+ assert( n->outcnt() != 0 ||// Nothing dead after post alloc
+ C->top() == n || // Or the random TOP node
+ n->is_Proj(), // Or a fat-proj kill node
+ "No dead instructions after post-alloc" );
+
+ int inp = n->cisc_operand();
+ if( inp != AdlcVMDeps::Not_cisc_spillable ) {
+ // Convert operand number to edge index number
+ MachNode *mach = n->as_Mach();
+ inp = mach->operand_index(inp);
+ Node *src = n->in(inp); // Value to load or store
+ LRG &lrg_cisc = lrgs( Find_const(src) );
+ OptoReg::Name src_reg = lrg_cisc.reg();
+ // Doubles record the HIGH register of an adjacent pair.
+ src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs());
+ if( OptoReg::is_stack(src_reg) ) { // If input is on stack
+ // This is a CISC Spill, get stack offset and construct new node
+#ifndef PRODUCT
+ if( TraceCISCSpill ) {
+ tty->print(" reg-instr: ");
+ n->dump();
+ }
+#endif
+ int stk_offset = reg2offset(src_reg);
+ // Bailout if we might exceed node limit when spilling this instruction
+ C->check_node_count(0, "out of nodes fixing spills");
+ if (C->failing()) return;
+ // Transform node
+ MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach();
+ cisc->set_req(inp,fp); // Base register is frame pointer
+ if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) {
+ assert( cisc->oper_input_base() == 2, "Only adding one edge");
+ cisc->ins_req(1,src); // Requires a memory edge
+ }
+ b->_nodes.map(j,cisc); // Insert into basic block
+ n->replace_by(cisc); // Correct graph
+ //
+ ++_used_cisc_instructions;
+#ifndef PRODUCT
+ if( TraceCISCSpill ) {
+ tty->print(" cisc-instr: ");
+ cisc->dump();
+ }
+#endif
+ } else {
+#ifndef PRODUCT
+ if( TraceCISCSpill ) {
+ tty->print(" using reg-instr: ");
+ n->dump();
+ }
+#endif
+ ++_unused_cisc_instructions; // input can be on stack
+ }
+ }
+
+ } // End of for all instructions
+
+ } // End of for all blocks
+}
+
+//------------------------------find_base_for_derived--------------------------
+// Helper to stretch above; recursively discover the base Node for a
+// given derived Node. Easy for AddP-related machine nodes, but needs
+// to be recursive for derived Phis.
+Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) {
+ // See if already computed; if so return it
+ if( derived_base_map[derived->_idx] )
+ return derived_base_map[derived->_idx];
+
+ // See if this happens to be a base.
+ // NOTE: we use TypePtr instead of TypeOopPtr because we can have
+ // pointers derived from NULL! These are always along paths that
+ // can't happen at run-time but the optimizer cannot deduce it so
+ // we have to handle it gracefully.
+ const TypePtr *tj = derived->bottom_type()->isa_ptr();
+ // If its an OOP with a non-zero offset, then it is derived.
+ if( tj->_offset == 0 ) {
+ derived_base_map[derived->_idx] = derived;
+ return derived;
+ }
+ // Derived is NULL+offset? Base is NULL!
+ if( derived->is_Con() ) {
+ Node *base = new (C, 1) ConPNode( TypePtr::NULL_PTR );
+ uint no_lidx = 0; // an unmatched constant in debug info has no LRG
+ _names.extend(base->_idx, no_lidx);
+ derived_base_map[derived->_idx] = base;
+ return base;
+ }
+
+ // Check for AddP-related opcodes
+ if( !derived->is_Phi() ) {
+ assert( derived->as_Mach()->ideal_Opcode() == Op_AddP, "" );
+ Node *base = derived->in(AddPNode::Base);
+ derived_base_map[derived->_idx] = base;
+ return base;
+ }
+
+ // Recursively find bases for Phis.
+ // First check to see if we can avoid a base Phi here.
+ Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg);
+ uint i;
+ for( i = 2; i < derived->req(); i++ )
+ if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg))
+ break;
+ // Went to the end without finding any different bases?
+ if( i == derived->req() ) { // No need for a base Phi here
+ derived_base_map[derived->_idx] = base;
+ return base;
+ }
+
+ // Now we see we need a base-Phi here to merge the bases
+ base = new (C, derived->req()) PhiNode( derived->in(0), base->bottom_type() );
+ for( i = 1; i < derived->req(); i++ )
+ base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg));
+
+ // Search the current block for an existing base-Phi
+ Block *b = _cfg._bbs[derived->_idx];
+ for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi
+ Node *phi = b->_nodes[i];
+ if( !phi->is_Phi() ) { // Found end of Phis with no match?
+ b->_nodes.insert( i, base ); // Must insert created Phi here as base
+ _cfg._bbs.map( base->_idx, b );
+ new_lrg(base,maxlrg++);
+ break;
+ }
+ // See if Phi matches.
+ uint j;
+ for( j = 1; j < base->req(); j++ )
+ if( phi->in(j) != base->in(j) &&
+ !(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs
+ break;
+ if( j == base->req() ) { // All inputs match?
+ base = phi; // Then use existing 'phi' and drop 'base'
+ break;
+ }
+ }
+
+
+ // Cache info for later passes
+ derived_base_map[derived->_idx] = base;
+ return base;
+}
+
+
+//------------------------------stretch_base_pointer_live_ranges---------------
+// At each Safepoint, insert extra debug edges for each pair of derived value/
+// base pointer that is live across the Safepoint for oopmap building. The
+// edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
+// required edge set.
+bool PhaseChaitin::stretch_base_pointer_live_ranges( ResourceArea *a ) {
+ int must_recompute_live = false;
+ uint maxlrg = _maxlrg;
+ Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique());
+ memset( derived_base_map, 0, sizeof(Node*)*C->unique() );
+
+ // For all blocks in RPO do...
+ for( uint i=0; i<_cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+ // Note use of deep-copy constructor. I cannot hammer the original
+ // liveout bits, because they are needed by the following coalesce pass.
+ IndexSet liveout(_live->live(b));
+
+ for( uint j = b->end_idx() + 1; j > 1; j-- ) {
+ Node *n = b->_nodes[j-1];
+
+ // Pre-split compares of loop-phis. Loop-phis form a cycle we would
+ // like to see in the same register. Compare uses the loop-phi and so
+ // extends its live range BUT cannot be part of the cycle. If this
+ // extended live range overlaps with the update of the loop-phi value
+ // we need both alive at the same time -- which requires at least 1
+ // copy. But because Intel has only 2-address registers we end up with
+ // at least 2 copies, one before the loop-phi update instruction and
+ // one after. Instead we split the input to the compare just after the
+ // phi.
+ if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) {
+ Node *phi = n->in(1);
+ if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
+ Block *phi_block = _cfg._bbs[phi->_idx];
+ if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) {
+ const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
+ Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
+ insert_proj( phi_block, 1, spill, maxlrg++ );
+ n->set_req(1,spill);
+ must_recompute_live = true;
+ }
+ }
+ }
+
+ // Get value being defined
+ uint lidx = n2lidx(n);
+ if( lidx && lidx < _maxlrg /* Ignore the occasional brand-new live range */) {
+ // Remove from live-out set
+ liveout.remove(lidx);
+
+ // Copies do not define a new value and so do not interfere.
+ // Remove the copies source from the liveout set before interfering.
+ uint idx = n->is_Copy();
+ if( idx ) liveout.remove( n2lidx(n->in(idx)) );
+ }
+
+ // Found a safepoint?
+ JVMState *jvms = n->jvms();
+ if( jvms ) {
+ // Now scan for a live derived pointer
+ IndexSetIterator elements(&liveout);
+ uint neighbor;
+ while ((neighbor = elements.next()) != 0) {
+ // Find reaching DEF for base and derived values
+ // This works because we are still in SSA during this call.
+ Node *derived = lrgs(neighbor)._def;
+ const TypePtr *tj = derived->bottom_type()->isa_ptr();
+ // If its an OOP with a non-zero offset, then it is derived.
+ if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) {
+ Node *base = find_base_for_derived( derived_base_map, derived, maxlrg );
+ assert( base->_idx < _names.Size(), "" );
+ // Add reaching DEFs of derived pointer and base pointer as a
+ // pair of inputs
+ n->add_req( derived );
+ n->add_req( base );
+
+ // See if the base pointer is already live to this point.
+ // Since I'm working on the SSA form, live-ness amounts to
+ // reaching def's. So if I find the base's live range then
+ // I know the base's def reaches here.
+ if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or
+ !liveout.member( n2lidx(base) ) ) && // not live) AND
+ (n2lidx(base) > 0) && // not a constant
+ _cfg._bbs[base->_idx] != b ) { // base not def'd in blk)
+ // Base pointer is not currently live. Since I stretched
+ // the base pointer to here and it crosses basic-block
+ // boundaries, the global live info is now incorrect.
+ // Recompute live.
+ must_recompute_live = true;
+ } // End of if base pointer is not live to debug info
+ }
+ } // End of scan all live data for derived ptrs crossing GC point
+ } // End of if found a GC point
+
+ // Make all inputs live
+ if( !n->is_Phi() ) { // Phi function uses come from prior block
+ for( uint k = 1; k < n->req(); k++ ) {
+ uint lidx = n2lidx(n->in(k));
+ if( lidx < _maxlrg )
+ liveout.insert( lidx );
+ }
+ }
+
+ } // End of forall instructions in block
+ liveout.clear(); // Free the memory used by liveout.
+
+ } // End of forall blocks
+ _maxlrg = maxlrg;
+
+ // If I created a new live range I need to recompute live
+ if( maxlrg != _ifg->_maxlrg )
+ must_recompute_live = true;
+
+ return must_recompute_live != 0;
+}
+
+
+//------------------------------add_reference----------------------------------
+// Extend the node to LRG mapping
+void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) {
+ _names.extend( node->_idx, n2lidx(old_node) );
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void PhaseChaitin::dump( const Node *n ) const {
+ uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0;
+ tty->print("L%d",r);
+ if( r && n->Opcode() != Op_Phi ) {
+ if( _node_regs ) { // Got a post-allocation copy of allocation?
+ tty->print("[");
+ OptoReg::Name second = get_reg_second(n);
+ if( OptoReg::is_valid(second) ) {
+ if( OptoReg::is_reg(second) )
+ tty->print("%s:",Matcher::regName[second]);
+ else
+ tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second));
+ }
+ OptoReg::Name first = get_reg_first(n);
+ if( OptoReg::is_reg(first) )
+ tty->print("%s]",Matcher::regName[first]);
+ else
+ tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first));
+ } else
+ n->out_RegMask().dump();
+ }
+ tty->print("/N%d\t",n->_idx);
+ tty->print("%s === ", n->Name());
+ uint k;
+ for( k = 0; k < n->req(); k++) {
+ Node *m = n->in(k);
+ if( !m ) tty->print("_ ");
+ else {
+ uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0;
+ tty->print("L%d",r);
+ // Data MultiNode's can have projections with no real registers.
+ // Don't die while dumping them.
+ int op = n->Opcode();
+ if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) {
+ if( _node_regs ) {
+ tty->print("[");
+ OptoReg::Name second = get_reg_second(n->in(k));
+ if( OptoReg::is_valid(second) ) {
+ if( OptoReg::is_reg(second) )
+ tty->print("%s:",Matcher::regName[second]);
+ else
+ tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer),
+ reg2offset_unchecked(second));
+ }
+ OptoReg::Name first = get_reg_first(n->in(k));
+ if( OptoReg::is_reg(first) )
+ tty->print("%s]",Matcher::regName[first]);
+ else
+ tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer),
+ reg2offset_unchecked(first));
+ } else
+ n->in_RegMask(k).dump();
+ }
+ tty->print("/N%d ",m->_idx);
+ }
+ }
+ if( k < n->len() && n->in(k) ) tty->print("| ");
+ for( ; k < n->len(); k++ ) {
+ Node *m = n->in(k);
+ if( !m ) break;
+ uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0;
+ tty->print("L%d",r);
+ tty->print("/N%d ",m->_idx);
+ }
+ if( n->is_Mach() ) n->as_Mach()->dump_spec(tty);
+ else n->dump_spec(tty);
+ if( _spilled_once.test(n->_idx ) ) {
+ tty->print(" Spill_1");
+ if( _spilled_twice.test(n->_idx ) )
+ tty->print(" Spill_2");
+ }
+ tty->print("\n");
+}
+
+void PhaseChaitin::dump( const Block * b ) const {
+ b->dump_head( &_cfg._bbs );
+
+ // For all instructions
+ for( uint j = 0; j < b->_nodes.size(); j++ )
+ dump(b->_nodes[j]);
+ // Print live-out info at end of block
+ if( _live ) {
+ tty->print("Liveout: ");
+ IndexSet *live = _live->live(b);
+ IndexSetIterator elements(live);
+ tty->print("{");
+ uint i;
+ while ((i = elements.next()) != 0) {
+ tty->print("L%d ", Find_const(i));
+ }
+ tty->print_cr("}");
+ }
+ tty->print("\n");
+}
+
+void PhaseChaitin::dump() const {
+ tty->print( "--- Chaitin -- argsize: %d framesize: %d ---\n",
+ _matcher._new_SP, _framesize );
+
+ // For all blocks
+ for( uint i = 0; i < _cfg._num_blocks; i++ )
+ dump(_cfg._blocks[i]);
+ // End of per-block dump
+ tty->print("\n");
+
+ if (!_ifg) {
+ tty->print("(No IFG.)\n");
+ return;
+ }
+
+ // Dump LRG array
+ tty->print("--- Live RanGe Array ---\n");
+ for(uint i2 = 1; i2 < _maxlrg; i2++ ) {
+ tty->print("L%d: ",i2);
+ if( i2 < _ifg->_maxlrg ) lrgs(i2).dump( );
+ else tty->print("new LRG");
+ }
+ tty->print_cr("");
+
+ // Dump lo-degree list
+ tty->print("Lo degree: ");
+ for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next )
+ tty->print("L%d ",i3);
+ tty->print_cr("");
+
+ // Dump lo-stk-degree list
+ tty->print("Lo stk degree: ");
+ for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next )
+ tty->print("L%d ",i4);
+ tty->print_cr("");
+
+ // Dump lo-degree list
+ tty->print("Hi degree: ");
+ for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next )
+ tty->print("L%d ",i5);
+ tty->print_cr("");
+}
+
+//------------------------------dump_degree_lists------------------------------
+void PhaseChaitin::dump_degree_lists() const {
+ // Dump lo-degree list
+ tty->print("Lo degree: ");
+ for( uint i = _lo_degree; i; i = lrgs(i)._next )
+ tty->print("L%d ",i);
+ tty->print_cr("");
+
+ // Dump lo-stk-degree list
+ tty->print("Lo stk degree: ");
+ for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next )
+ tty->print("L%d ",i2);
+ tty->print_cr("");
+
+ // Dump lo-degree list
+ tty->print("Hi degree: ");
+ for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next )
+ tty->print("L%d ",i3);
+ tty->print_cr("");
+}
+
+//------------------------------dump_simplified--------------------------------
+void PhaseChaitin::dump_simplified() const {
+ tty->print("Simplified: ");
+ for( uint i = _simplified; i; i = lrgs(i)._next )
+ tty->print("L%d ",i);
+ tty->print_cr("");
+}
+
+static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) {
+ if ((int)reg < 0)
+ sprintf(buf, "<OptoReg::%d>", (int)reg);
+ else if (OptoReg::is_reg(reg))
+ strcpy(buf, Matcher::regName[reg]);
+ else
+ sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer),
+ pc->reg2offset(reg));
+ return buf+strlen(buf);
+}
+
+//------------------------------dump_register----------------------------------
+// Dump a register name into a buffer. Be intelligent if we get called
+// before allocation is complete.
+char *PhaseChaitin::dump_register( const Node *n, char *buf ) const {
+ if( !this ) { // Not got anything?
+ sprintf(buf,"N%d",n->_idx); // Then use Node index
+ } else if( _node_regs ) {
+ // Post allocation, use direct mappings, no LRG info available
+ print_reg( get_reg_first(n), this, buf );
+ } else {
+ uint lidx = Find_const(n); // Grab LRG number
+ if( !_ifg ) {
+ sprintf(buf,"L%d",lidx); // No register binding yet
+ } else if( !lidx ) { // Special, not allocated value
+ strcpy(buf,"Special");
+ } else if( (lrgs(lidx).num_regs() == 1)
+ ? !lrgs(lidx).mask().is_bound1()
+ : !lrgs(lidx).mask().is_bound2() ) {
+ sprintf(buf,"L%d",lidx); // No register binding yet
+ } else { // Hah! We have a bound machine register
+ print_reg( lrgs(lidx).reg(), this, buf );
+ }
+ }
+ return buf+strlen(buf);
+}
+
+//----------------------dump_for_spill_split_recycle--------------------------
+void PhaseChaitin::dump_for_spill_split_recycle() const {
+ if( WizardMode && (PrintCompilation || PrintOpto) ) {
+ // Display which live ranges need to be split and the allocator's state
+ tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt);
+ for( uint bidx = 1; bidx < _maxlrg; bidx++ ) {
+ if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
+ tty->print("L%d: ", bidx);
+ lrgs(bidx).dump();
+ }
+ }
+ tty->cr();
+ dump();
+ }
+}
+
+//------------------------------dump_frame------------------------------------
+void PhaseChaitin::dump_frame() const {
+ const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
+ const TypeTuple *domain = C->tf()->domain();
+ const int argcnt = domain->cnt() - TypeFunc::Parms;
+
+ // Incoming arguments in registers dump
+ for( int k = 0; k < argcnt; k++ ) {
+ OptoReg::Name parmreg = _matcher._parm_regs[k].first();
+ if( OptoReg::is_reg(parmreg)) {
+ const char *reg_name = OptoReg::regname(parmreg);
+ tty->print("#r%3.3d %s", parmreg, reg_name);
+ parmreg = _matcher._parm_regs[k].second();
+ if( OptoReg::is_reg(parmreg)) {
+ tty->print(":%s", OptoReg::regname(parmreg));
+ }
+ tty->print(" : parm %d: ", k);
+ domain->field_at(k + TypeFunc::Parms)->dump();
+ tty->print_cr("");
+ }
+ }
+
+ // Check for un-owned padding above incoming args
+ OptoReg::Name reg = _matcher._new_SP;
+ if( reg > _matcher._in_arg_limit ) {
+ reg = OptoReg::add(reg, -1);
+ tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg));
+ }
+
+ // Incoming argument area dump
+ OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots());
+ while( reg > begin_in_arg ) {
+ reg = OptoReg::add(reg, -1);
+ tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
+ int j;
+ for( j = 0; j < argcnt; j++) {
+ if( _matcher._parm_regs[j].first() == reg ||
+ _matcher._parm_regs[j].second() == reg ) {
+ tty->print("parm %d: ",j);
+ domain->field_at(j + TypeFunc::Parms)->dump();
+ tty->print_cr("");
+ break;
+ }
+ }
+ if( j >= argcnt )
+ tty->print_cr("HOLE, owned by SELF");
+ }
+
+ // Old outgoing preserve area
+ while( reg > _matcher._old_SP ) {
+ reg = OptoReg::add(reg, -1);
+ tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg));
+ }
+
+ // Old SP
+ tty->print_cr("# -- Old %s -- Framesize: %d --",fp,
+ reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize);
+
+ // Preserve area dump
+ reg = OptoReg::add(reg, -1);
+ while( OptoReg::is_stack(reg)) {
+ tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
+ if( _matcher.return_addr() == reg )
+ tty->print_cr("return address");
+ else if( _matcher.return_addr() == OptoReg::add(reg,1) &&
+ VerifyStackAtCalls )
+ tty->print_cr("0xBADB100D +VerifyStackAtCalls");
+ else if ((int)OptoReg::reg2stack(reg) < C->fixed_slots())
+ tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg));
+ else
+ tty->print_cr("pad2, in_preserve");
+ reg = OptoReg::add(reg, -1);
+ }
+
+ // Spill area dump
+ reg = OptoReg::add(_matcher._new_SP, _framesize );
+ while( reg > _matcher._out_arg_limit ) {
+ reg = OptoReg::add(reg, -1);
+ tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg));
+ }
+
+ // Outgoing argument area dump
+ while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) {
+ reg = OptoReg::add(reg, -1);
+ tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg));
+ }
+
+ // Outgoing new preserve area
+ while( reg > _matcher._new_SP ) {
+ reg = OptoReg::add(reg, -1);
+ tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg));
+ }
+ tty->print_cr("#");
+}
+
+//------------------------------dump_bb----------------------------------------
+void PhaseChaitin::dump_bb( uint pre_order ) const {
+ tty->print_cr("---dump of B%d---",pre_order);
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+ if( b->_pre_order == pre_order )
+ dump(b);
+ }
+}
+
+//------------------------------dump_lrg---------------------------------------
+void PhaseChaitin::dump_lrg( uint lidx ) const {
+ tty->print_cr("---dump of L%d---",lidx);
+
+ if( _ifg ) {
+ if( lidx >= _maxlrg ) {
+ tty->print("Attempt to print live range index beyond max live range.\n");
+ return;
+ }
+ tty->print("L%d: ",lidx);
+ lrgs(lidx).dump( );
+ }
+ if( _ifg ) { tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx));
+ _ifg->neighbors(lidx)->dump();
+ tty->cr();
+ }
+ // For all blocks
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+ int dump_once = 0;
+
+ // For all instructions
+ for( uint j = 0; j < b->_nodes.size(); j++ ) {
+ Node *n = b->_nodes[j];
+ if( Find_const(n) == lidx ) {
+ if( !dump_once++ ) {
+ tty->cr();
+ b->dump_head( &_cfg._bbs );
+ }
+ dump(n);
+ continue;
+ }
+ uint cnt = n->req();
+ for( uint k = 1; k < cnt; k++ ) {
+ Node *m = n->in(k);
+ if (!m) continue; // be robust in the dumper
+ if( Find_const(m) == lidx ) {
+ if( !dump_once++ ) {
+ tty->cr();
+ b->dump_head( &_cfg._bbs );
+ }
+ dump(n);
+ }
+ }
+ }
+ } // End of per-block dump
+ tty->cr();
+}
+#endif // not PRODUCT
+
+//------------------------------print_chaitin_statistics-------------------------------
+int PhaseChaitin::_final_loads = 0;
+int PhaseChaitin::_final_stores = 0;
+int PhaseChaitin::_final_memoves= 0;
+int PhaseChaitin::_final_copies = 0;
+double PhaseChaitin::_final_load_cost = 0;
+double PhaseChaitin::_final_store_cost = 0;
+double PhaseChaitin::_final_memove_cost= 0;
+double PhaseChaitin::_final_copy_cost = 0;
+int PhaseChaitin::_conserv_coalesce = 0;
+int PhaseChaitin::_conserv_coalesce_pair = 0;
+int PhaseChaitin::_conserv_coalesce_trie = 0;
+int PhaseChaitin::_conserv_coalesce_quad = 0;
+int PhaseChaitin::_post_alloc = 0;
+int PhaseChaitin::_lost_opp_pp_coalesce = 0;
+int PhaseChaitin::_lost_opp_cflow_coalesce = 0;
+int PhaseChaitin::_used_cisc_instructions = 0;
+int PhaseChaitin::_unused_cisc_instructions = 0;
+int PhaseChaitin::_allocator_attempts = 0;
+int PhaseChaitin::_allocator_successes = 0;
+
+#ifndef PRODUCT
+uint PhaseChaitin::_high_pressure = 0;
+uint PhaseChaitin::_low_pressure = 0;
+
+void PhaseChaitin::print_chaitin_statistics() {
+ tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies);
+ tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost);
+ tty->print_cr("Adjusted spill cost = %7.0f.",
+ _final_load_cost*4.0 + _final_store_cost * 2.0 +
+ _final_copy_cost*1.0 + _final_memove_cost*12.0);
+ tty->print("Conservatively coalesced %d copies, %d pairs",
+ _conserv_coalesce, _conserv_coalesce_pair);
+ if( _conserv_coalesce_trie || _conserv_coalesce_quad )
+ tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad);
+ tty->print_cr(", %d post alloc.", _post_alloc);
+ if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce )
+ tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.",
+ _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce );
+ if( _used_cisc_instructions || _unused_cisc_instructions )
+ tty->print_cr("Used cisc instruction %d, remained in register %d",
+ _used_cisc_instructions, _unused_cisc_instructions);
+ if( _allocator_successes != 0 )
+ tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes);
+ tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure);
+}
+#endif // not PRODUCT
diff --git a/src/share/vm/opto/chaitin.hpp b/src/share/vm/opto/chaitin.hpp
new file mode 100644
index 000000000..df848d72c
--- /dev/null
+++ b/src/share/vm/opto/chaitin.hpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class LoopTree;
+class MachCallNode;
+class MachSafePointNode;
+class Matcher;
+class PhaseCFG;
+class PhaseLive;
+class PhaseRegAlloc;
+class PhaseChaitin;
+
+#define OPTO_DEBUG_SPLIT_FREQ BLOCK_FREQUENCY(0.001)
+#define OPTO_LRG_HIGH_FREQ BLOCK_FREQUENCY(0.25)
+
+//------------------------------LRG--------------------------------------------
+// Live-RanGe structure.
+class LRG : public ResourceObj {
+public:
+ enum { SPILL_REG=29999 }; // Register number of a spilled LRG
+
+ double _cost; // 2 for loads/1 for stores times block freq
+ double _area; // Sum of all simultaneously live values
+ double score() const; // Compute score from cost and area
+ double _maxfreq; // Maximum frequency of any def or use
+
+ Node *_def; // Check for multi-def live ranges
+#ifndef PRODUCT
+ GrowableArray<Node*>* _defs;
+#endif
+
+ uint _risk_bias; // Index of LRG which we want to avoid color
+ uint _copy_bias; // Index of LRG which we want to share color
+
+ uint _next; // Index of next LRG in linked list
+ uint _prev; // Index of prev LRG in linked list
+private:
+ uint _reg; // Chosen register; undefined if mask is plural
+public:
+ // Return chosen register for this LRG. Error if the LRG is not bound to
+ // a single register.
+ OptoReg::Name reg() const { return OptoReg::Name(_reg); }
+ void set_reg( OptoReg::Name r ) { _reg = r; }
+
+private:
+ uint _eff_degree; // Effective degree: Sum of neighbors _num_regs
+public:
+ int degree() const { assert( _degree_valid, "" ); return _eff_degree; }
+ // Degree starts not valid and any change to the IFG neighbor
+ // set makes it not valid.
+ void set_degree( uint degree ) { _eff_degree = degree; debug_only(_degree_valid = 1;) }
+ // Made a change that hammered degree
+ void invalid_degree() { debug_only(_degree_valid=0;) }
+ // Incrementally modify degree. If it was correct, it should remain correct
+ void inc_degree( uint mod ) { _eff_degree += mod; }
+ // Compute the degree between 2 live ranges
+ int compute_degree( LRG &l ) const;
+
+private:
+ RegMask _mask; // Allowed registers for this LRG
+ uint _mask_size; // cache of _mask.Size();
+public:
+ int compute_mask_size() const { return _mask.is_AllStack() ? 65535 : _mask.Size(); }
+ void set_mask_size( int size ) {
+ assert((size == 65535) || (size == (int)_mask.Size()), "");
+ _mask_size = size;
+ debug_only(_msize_valid=1;)
+ debug_only( if( _num_regs == 2 && !_fat_proj ) _mask.VerifyPairs(); )
+ }
+ void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
+ int mask_size() const { assert( _msize_valid, "mask size not valid" );
+ return _mask_size; }
+ // Get the last mask size computed, even if it does not match the
+ // count of bits in the current mask.
+ int get_invalid_mask_size() const { return _mask_size; }
+ const RegMask &mask() const { return _mask; }
+ void set_mask( const RegMask &rm ) { _mask = rm; debug_only(_msize_valid=0;)}
+ void AND( const RegMask &rm ) { _mask.AND(rm); debug_only(_msize_valid=0;)}
+ void SUBTRACT( const RegMask &rm ) { _mask.SUBTRACT(rm); debug_only(_msize_valid=0;)}
+ void Clear() { _mask.Clear() ; debug_only(_msize_valid=1); _mask_size = 0; }
+ void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
+ void Insert( OptoReg::Name reg ) { _mask.Insert(reg); debug_only(_msize_valid=0;) }
+ void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) }
+ void ClearToPairs() { _mask.ClearToPairs(); debug_only(_msize_valid=0;) }
+
+ // Number of registers this live range uses when it colors
+private:
+ uint8 _num_regs; // 2 for Longs and Doubles, 1 for all else
+ // except _num_regs is kill count for fat_proj
+public:
+ int num_regs() const { return _num_regs; }
+ void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
+
+private:
+ // Number of physical registers this live range uses when it colors
+ // Architecture and register-set dependent
+ uint8 _reg_pressure;
+public:
+ void set_reg_pressure(int i) { _reg_pressure = i; }
+ int reg_pressure() const { return _reg_pressure; }
+
+ // How much 'wiggle room' does this live range have?
+ // How many color choices can it make (scaled by _num_regs)?
+ int degrees_of_freedom() const { return mask_size() - _num_regs; }
+ // Bound LRGs have ZERO degrees of freedom. We also count
+ // must_spill as bound.
+ bool is_bound () const { return _is_bound; }
+ // Negative degrees-of-freedom; even with no neighbors this
+ // live range must spill.
+ bool not_free() const { return degrees_of_freedom() < 0; }
+ // Is this live range of "low-degree"? Trivially colorable?
+ bool lo_degree () const { return degree() <= degrees_of_freedom(); }
+ // Is this live range just barely "low-degree"? Trivially colorable?
+ bool just_lo_degree () const { return degree() == degrees_of_freedom(); }
+
+ uint _is_oop:1, // Live-range holds an oop
+ _is_float:1, // True if in float registers
+ _was_spilled1:1, // True if prior spilling on def
+ _was_spilled2:1, // True if twice prior spilling on def
+ _is_bound:1, // live range starts life with no
+ // degrees of freedom.
+ _direct_conflict:1, // True if def and use registers in conflict
+ _must_spill:1, // live range has lost all degrees of freedom
+ // If _fat_proj is set, live range does NOT require aligned, adjacent
+ // registers and has NO interferences.
+ // If _fat_proj is clear, live range requires num_regs() to be a power of
+ // 2, and it requires registers to form an aligned, adjacent set.
+ _fat_proj:1, //
+ _was_lo:1, // Was lo-degree prior to coalesce
+ _msize_valid:1, // _mask_size cache valid
+ _degree_valid:1, // _degree cache valid
+ _has_copy:1, // Adjacent to some copy instruction
+ _at_risk:1; // Simplify says this guy is at risk to spill
+
+
+ // Alive if non-zero, dead if zero
+ bool alive() const { return _def != NULL; }
+
+#ifndef PRODUCT
+ void dump( ) const;
+#endif
+};
+
+//------------------------------LRG_List---------------------------------------
+// Map Node indices to Live RanGe indices.
+// Array lookup in the optimized case.
+class LRG_List : public ResourceObj {
+ uint _cnt, _max;
+ uint* _lidxs;
+ ReallocMark _nesting; // assertion check for reallocations
+public:
+ LRG_List( uint max );
+
+ uint lookup( uint nidx ) const {
+ return _lidxs[nidx];
+ }
+ uint operator[] (uint nidx) const { return lookup(nidx); }
+
+ void map( uint nidx, uint lidx ) {
+ assert( nidx < _cnt, "oob" );
+ _lidxs[nidx] = lidx;
+ }
+ void extend( uint nidx, uint lidx );
+
+ uint Size() const { return _cnt; }
+};
+
+//------------------------------IFG--------------------------------------------
+// InterFerence Graph
+// An undirected graph implementation. Created with a fixed number of
+// vertices. Edges can be added & tested. Vertices can be removed, then
+// added back later with all edges intact. Can add edges between one vertex
+// and a list of other vertices. Can union vertices (and their edges)
+// together. The IFG needs to be really really fast, and also fairly
+// abstract! It needs abstraction so I can fiddle with the implementation to
+// get even more speed.
+class PhaseIFG : public Phase {
+ // Current implementation: a triangular adjacency list.
+
+ // Array of adjacency-lists, indexed by live-range number
+ IndexSet *_adjs;
+
+ // Assertion bit for proper use of Squaring
+ bool _is_square;
+
+ // Live range structure goes here
+ LRG *_lrgs; // Array of LRG structures
+
+public:
+ // Largest live-range number
+ uint _maxlrg;
+
+ Arena *_arena;
+
+ // Keep track of inserted and deleted Nodes
+ VectorSet *_yanked;
+
+ PhaseIFG( Arena *arena );
+ void init( uint maxlrg );
+
+ // Add edge between a and b. Returns true if actually addded.
+ int add_edge( uint a, uint b );
+
+ // Add edge between a and everything in the vector
+ void add_vector( uint a, IndexSet *vec );
+
+ // Test for edge existance
+ int test_edge( uint a, uint b ) const;
+
+ // Square-up matrix for faster Union
+ void SquareUp();
+
+ // Return number of LRG neighbors
+ uint neighbor_cnt( uint a ) const { return _adjs[a].count(); }
+ // Union edges of b into a on Squared-up matrix
+ void Union( uint a, uint b );
+ // Test for edge in Squared-up matrix
+ int test_edge_sq( uint a, uint b ) const;
+ // Yank a Node and all connected edges from the IFG. Be prepared to
+ // re-insert the yanked Node in reverse order of yanking. Return a
+ // list of neighbors (edges) yanked.
+ IndexSet *remove_node( uint a );
+ // Reinsert a yanked Node
+ void re_insert( uint a );
+ // Return set of neighbors
+ IndexSet *neighbors( uint a ) const { return &_adjs[a]; }
+
+#ifndef PRODUCT
+ // Dump the IFG
+ void dump() const;
+ void stats() const;
+ void verify( const PhaseChaitin * ) const;
+#endif
+
+ //--------------- Live Range Accessors
+ LRG &lrgs(uint idx) const { assert(idx < _maxlrg, "oob"); return _lrgs[idx]; }
+
+ // Compute and set effective degree. Might be folded into SquareUp().
+ void Compute_Effective_Degree();
+
+ // Compute effective degree as the sum of neighbors' _sizes.
+ int effective_degree( uint lidx ) const;
+};
+
+// TEMPORARILY REPLACED WITH COMMAND LINE FLAG
+
+//// !!!!! Magic Constants need to move into ad file
+#ifdef SPARC
+//#define FLOAT_PRESSURE 30 /* SFLT_REG_mask.Size() - 1 */
+//#define INT_PRESSURE 23 /* NOTEMP_I_REG_mask.Size() - 1 */
+#define FLOAT_INCREMENT(regs) regs
+#else
+//#define FLOAT_PRESSURE 6
+//#define INT_PRESSURE 6
+#define FLOAT_INCREMENT(regs) 1
+#endif
+
+//------------------------------Chaitin----------------------------------------
+// Briggs-Chaitin style allocation, mostly.
+class PhaseChaitin : public PhaseRegAlloc {
+
+ int _trip_cnt;
+ int _alternate;
+
+ uint _maxlrg; // Max live range number
+ LRG &lrgs(uint idx) const { return _ifg->lrgs(idx); }
+ PhaseLive *_live; // Liveness, used in the interference graph
+ PhaseIFG *_ifg; // Interference graph (for original chunk)
+ Node_List **_lrg_nodes; // Array of node; lists for lrgs which spill
+ VectorSet _spilled_once; // Nodes that have been spilled
+ VectorSet _spilled_twice; // Nodes that have been spilled twice
+
+ LRG_List _names; // Map from Nodes to Live RanGes
+
+ // Union-find map. Declared as a short for speed.
+ // Indexed by live-range number, it returns the compacted live-range number
+ LRG_List _uf_map;
+ // Reset the Union-Find map to identity
+ void reset_uf_map( uint maxlrg );
+ // Remove the need for the Union-Find mapping
+ void compress_uf_map_for_nodes( );
+
+ // Combine the Live Range Indices for these 2 Nodes into a single live
+ // range. Future requests for any Node in either live range will
+ // return the live range index for the combined live range.
+ void Union( const Node *src, const Node *dst );
+
+ void new_lrg( const Node *x, uint lrg );
+
+ // Compact live ranges, removing unused ones. Return new maxlrg.
+ void compact();
+
+ uint _lo_degree; // Head of lo-degree LRGs list
+ uint _lo_stk_degree; // Head of lo-stk-degree LRGs list
+ uint _hi_degree; // Head of hi-degree LRGs list
+ uint _simplified; // Linked list head of simplified LRGs
+
+ // Helper functions for Split()
+ uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
+ uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );
+ int clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg );
+ Node *split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru );
+ // True if lidx is used before any real register is def'd in the block
+ bool prompt_use( Block *b, uint lidx );
+ Node *get_spillcopy_wide( Node *def, Node *use, uint uidx );
+ // Insert the spill at chosen location. Skip over any interveneing Proj's or
+ // Phis. Skip over a CatchNode and projs, inserting in the fall-through block
+ // instead. Update high-pressure indices. Create a new live range.
+ void insert_proj( Block *b, uint i, Node *spill, uint maxlrg );
+
+ bool is_high_pressure( Block *b, LRG *lrg, uint insidx );
+
+ uint _oldphi; // Node index which separates pre-allocation nodes
+
+ Block **_blks; // Array of blocks sorted by frequency for coalescing
+
+#ifndef PRODUCT
+ bool _trace_spilling;
+#endif
+
+public:
+ PhaseChaitin( uint unique, PhaseCFG &cfg, Matcher &matcher );
+ ~PhaseChaitin() {}
+
+ // Convert a Node into a Live Range Index - a lidx
+ uint Find( const Node *n ) {
+ uint lidx = n2lidx(n);
+ uint uf_lidx = _uf_map[lidx];
+ return (uf_lidx == lidx) ? uf_lidx : Find_compress(n);
+ }
+ uint Find_const( uint lrg ) const;
+ uint Find_const( const Node *n ) const;
+
+ // Do all the real work of allocate
+ void Register_Allocate();
+
+ uint n2lidx( const Node *n ) const { return _names[n->_idx]; }
+
+#ifndef PRODUCT
+ bool trace_spilling() const { return _trace_spilling; }
+#endif
+
+private:
+ // De-SSA the world. Assign registers to Nodes. Use the same register for
+ // all inputs to a PhiNode, effectively coalescing live ranges. Insert
+ // copies as needed.
+ void de_ssa();
+ uint Find_compress( const Node *n );
+ uint Find( uint lidx ) {
+ uint uf_lidx = _uf_map[lidx];
+ return (uf_lidx == lidx) ? uf_lidx : Find_compress(lidx);
+ }
+ uint Find_compress( uint lidx );
+
+ uint Find_id( const Node *n ) {
+ uint retval = n2lidx(n);
+ assert(retval == Find(n),"Invalid node to lidx mapping");
+ return retval;
+ }
+
+ // Add edge between reg and everything in the vector.
+ // Same as _ifg->add_vector(reg,live) EXCEPT use the RegMask
+ // information to trim the set of interferences. Return the
+ // count of edges added.
+ void interfere_with_live( uint reg, IndexSet *live );
+ // Count register pressure for asserts
+ uint count_int_pressure( IndexSet *liveout );
+ uint count_float_pressure( IndexSet *liveout );
+
+ // Build the interference graph using virtual registers only.
+ // Used for aggressive coalescing.
+ void build_ifg_virtual( );
+
+ // Build the interference graph using physical registers when available.
+ // That is, if 2 live ranges are simultaneously alive but in their
+ // acceptable register sets do not overlap, then they do not interfere.
+ uint build_ifg_physical( ResourceArea *a );
+
+ // Gather LiveRanGe information, including register masks and base pointer/
+ // derived pointer relationships.
+ void gather_lrg_masks( bool mod_cisc_masks );
+
+ // Force the bases of derived pointers to be alive at GC points.
+ bool stretch_base_pointer_live_ranges( ResourceArea *a );
+ // Helper to stretch above; recursively discover the base Node for
+ // a given derived Node. Easy for AddP-related machine nodes, but
+ // needs to be recursive for derived Phis.
+ Node *find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg );
+
+ // Set the was-lo-degree bit. Conservative coalescing should not change the
+ // colorability of the graph. If any live range was of low-degree before
+ // coalescing, it should Simplify. This call sets the was-lo-degree bit.
+ void set_was_low();
+
+ // Split live-ranges that must spill due to register conflicts (as opposed
+ // to capacity spills). Typically these are things def'd in a register
+ // and used on the stack or vice-versa.
+ void pre_spill();
+
+ // Init LRG caching of degree, numregs. Init lo_degree list.
+ void cache_lrg_info( );
+
+ // Simplify the IFG by removing LRGs of low degree with no copies
+ void Pre_Simplify();
+
+ // Simplify the IFG by removing LRGs of low degree
+ void Simplify();
+
+ // Select colors by re-inserting edges into the IFG.
+ // Return TRUE if any spills occured.
+ uint Select( );
+ // Helper function for select which allows biased coloring
+ OptoReg::Name choose_color( LRG &lrg, int chunk );
+ // Helper function which implements biasing heuristic
+ OptoReg::Name bias_color( LRG &lrg, int chunk );
+
+ // Split uncolorable live ranges
+ // Return new number of live ranges
+ uint Split( uint maxlrg );
+
+ // Copy 'was_spilled'-edness from one Node to another.
+ void copy_was_spilled( Node *src, Node *dst );
+ // Set the 'spilled_once' or 'spilled_twice' flag on a node.
+ void set_was_spilled( Node *n );
+
+ // Convert ideal spill-nodes into machine loads & stores
+ // Set C->failing when fixup spills could not complete, node limit exceeded.
+ void fixup_spills();
+
+ // Post-Allocation peephole copy removal
+ void post_allocate_copy_removal();
+ Node *skip_copies( Node *c );
+ int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
+ int elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs );
+ int use_prior_register( Node *copy, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd );
+ bool may_be_copy_of_callee( Node *def ) const;
+
+ // If nreg already contains the same constant as val then eliminate it
+ bool eliminate_copy_of_constant(Node* val, Block *current_block, Node_List& value, Node_List &regnd,
+ OptoReg::Name nreg, OptoReg::Name nreg2);
+ // Extend the node to LRG mapping
+ void add_reference( const Node *node, const Node *old_node);
+
+private:
+
+ static int _final_loads, _final_stores, _final_copies, _final_memoves;
+ static double _final_load_cost, _final_store_cost, _final_copy_cost, _final_memove_cost;
+ static int _conserv_coalesce, _conserv_coalesce_pair;
+ static int _conserv_coalesce_trie, _conserv_coalesce_quad;
+ static int _post_alloc;
+ static int _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce;
+ static int _used_cisc_instructions, _unused_cisc_instructions;
+ static int _allocator_attempts, _allocator_successes;
+
+#ifndef PRODUCT
+ static uint _high_pressure, _low_pressure;
+
+ void dump() const;
+ void dump( const Node *n ) const;
+ void dump( const Block * b ) const;
+ void dump_degree_lists() const;
+ void dump_simplified() const;
+ void dump_lrg( uint lidx ) const;
+ void dump_bb( uint pre_order ) const;
+
+ // Verify that base pointers and derived pointers are still sane
+ void verify_base_ptrs( ResourceArea *a ) const;
+
+ void dump_for_spill_split_recycle() const;
+
+public:
+ void dump_frame() const;
+ char *dump_register( const Node *n, char *buf ) const;
+private:
+ static void print_chaitin_statistics();
+#endif
+ friend class PhaseCoalesce;
+ friend class PhaseAggressiveCoalesce;
+ friend class PhaseConservativeCoalesce;
+};
diff --git a/src/share/vm/opto/classes.cpp b/src/share/vm/opto/classes.cpp
new file mode 100644
index 000000000..f8cef8a47
--- /dev/null
+++ b/src/share/vm/opto/classes.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_classes.cpp.incl"
+
+// ----------------------------------------------------------------------------
+// Build a table of virtual functions to map from Nodes to dense integer
+// opcode names.
+int Node::Opcode() const { return Op_Node; }
+#define macro(x) int x##Node::Opcode() const { return Op_##x; }
+#include "classes.hpp"
+#undef macro
diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp
new file mode 100644
index 000000000..26eff2b4e
--- /dev/null
+++ b/src/share/vm/opto/classes.hpp
@@ -0,0 +1,308 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The giant table of Node classes.
+// One entry per class, sorted by class name.
+
+macro(AbsD)
+macro(AbsF)
+macro(AbsI)
+macro(AddD)
+macro(AddF)
+macro(AddI)
+macro(AddL)
+macro(AddP)
+macro(Allocate)
+macro(AllocateArray)
+macro(AndI)
+macro(AndL)
+macro(AtanD)
+macro(Binary)
+macro(Bool)
+macro(BoxLock)
+macro(ReverseBytesI)
+macro(ReverseBytesL)
+macro(CProj)
+macro(CallDynamicJava)
+macro(CallJava)
+macro(CallLeaf)
+macro(CallLeafNoFP)
+macro(CallRuntime)
+macro(CallStaticJava)
+macro(CastII)
+macro(CastX2P)
+macro(CastP2X)
+macro(CastPP)
+macro(Catch)
+macro(CatchProj)
+macro(CheckCastPP)
+macro(ClearArray)
+macro(ConstraintCast)
+macro(CMoveD)
+macro(CMoveF)
+macro(CMoveI)
+macro(CMoveL)
+macro(CMoveP)
+macro(CmpD)
+macro(CmpD3)
+macro(CmpF)
+macro(CmpF3)
+macro(CmpI)
+macro(CmpL)
+macro(CmpL3)
+macro(CmpLTMask)
+macro(CmpP)
+macro(CmpU)
+macro(CompareAndSwapI)
+macro(CompareAndSwapL)
+macro(CompareAndSwapP)
+macro(Con)
+macro(ConD)
+macro(ConF)
+macro(ConI)
+macro(ConL)
+macro(ConP)
+macro(Conv2B)
+macro(ConvD2F)
+macro(ConvD2I)
+macro(ConvD2L)
+macro(ConvF2D)
+macro(ConvF2I)
+macro(ConvF2L)
+macro(ConvI2D)
+macro(ConvI2F)
+macro(ConvI2L)
+macro(ConvL2D)
+macro(ConvL2F)
+macro(ConvL2I)
+macro(CosD)
+macro(CountedLoop)
+macro(CountedLoopEnd)
+macro(CreateEx)
+macro(DivD)
+macro(DivF)
+macro(DivI)
+macro(DivL)
+macro(DivMod)
+macro(DivModI)
+macro(DivModL)
+macro(ExpD)
+macro(FastLock)
+macro(FastUnlock)
+macro(Goto)
+macro(Halt)
+macro(If)
+macro(IfFalse)
+macro(IfTrue)
+macro(Initialize)
+macro(JProj)
+macro(Jump)
+macro(JumpProj)
+macro(LShiftI)
+macro(LShiftL)
+macro(LoadB)
+macro(LoadC)
+macro(LoadD)
+macro(LoadD_unaligned)
+macro(LoadF)
+macro(LoadI)
+macro(LoadKlass)
+macro(LoadL)
+macro(LoadL_unaligned)
+macro(LoadPLocked)
+macro(LoadLLocked)
+macro(LoadP)
+macro(LoadRange)
+macro(LoadS)
+macro(Lock)
+macro(LogD)
+macro(Log10D)
+macro(Loop)
+macro(Mach)
+macro(MachProj)
+macro(MaxI)
+macro(MemBarAcquire)
+macro(MemBarCPUOrder)
+macro(MemBarRelease)
+macro(MemBarVolatile)
+macro(MergeMem)
+macro(MinI)
+macro(ModD)
+macro(ModF)
+macro(ModI)
+macro(ModL)
+macro(MoveI2F)
+macro(MoveF2I)
+macro(MoveL2D)
+macro(MoveD2L)
+macro(MulD)
+macro(MulF)
+macro(MulI)
+macro(MulL)
+macro(Multi)
+macro(NegD)
+macro(NegF)
+macro(NeverBranch)
+macro(Opaque1)
+macro(Opaque2)
+macro(OrI)
+macro(OrL)
+macro(PCTable)
+macro(Parm)
+macro(PartialSubtypeCheck)
+macro(Phi)
+macro(PowD)
+macro(PrefetchRead)
+macro(PrefetchWrite)
+macro(Proj)
+macro(RShiftI)
+macro(RShiftL)
+macro(Region)
+macro(Rethrow)
+macro(Return)
+macro(Root)
+macro(RoundDouble)
+macro(RoundFloat)
+macro(SafePoint)
+macro(SCMemProj)
+macro(SinD)
+macro(SqrtD)
+macro(Start)
+macro(StartOSR)
+macro(StoreB)
+macro(StoreC)
+macro(StoreCM)
+macro(StorePConditional)
+macro(StoreLConditional)
+macro(StoreD)
+macro(StoreF)
+macro(StoreI)
+macro(StoreL)
+macro(StoreP)
+macro(StrComp)
+macro(SubD)
+macro(SubF)
+macro(SubI)
+macro(SubL)
+macro(TailCall)
+macro(TailJump)
+macro(TanD)
+macro(ThreadLocal)
+macro(Unlock)
+macro(URShiftI)
+macro(URShiftL)
+macro(XorI)
+macro(XorL)
+macro(Vector)
+macro(AddVB)
+macro(AddVC)
+macro(AddVS)
+macro(AddVI)
+macro(AddVL)
+macro(AddVF)
+macro(AddVD)
+macro(SubVB)
+macro(SubVC)
+macro(SubVS)
+macro(SubVI)
+macro(SubVL)
+macro(SubVF)
+macro(SubVD)
+macro(MulVF)
+macro(MulVD)
+macro(DivVF)
+macro(DivVD)
+macro(LShiftVB)
+macro(LShiftVC)
+macro(LShiftVS)
+macro(LShiftVI)
+macro(URShiftVB)
+macro(URShiftVC)
+macro(URShiftVS)
+macro(URShiftVI)
+macro(AndV)
+macro(OrV)
+macro(XorV)
+macro(VectorLoad)
+macro(Load16B)
+macro(Load8B)
+macro(Load4B)
+macro(Load8C)
+macro(Load4C)
+macro(Load2C)
+macro(Load8S)
+macro(Load4S)
+macro(Load2S)
+macro(Load4I)
+macro(Load2I)
+macro(Load2L)
+macro(Load4F)
+macro(Load2F)
+macro(Load2D)
+macro(VectorStore)
+macro(Store16B)
+macro(Store8B)
+macro(Store4B)
+macro(Store8C)
+macro(Store4C)
+macro(Store2C)
+macro(Store4I)
+macro(Store2I)
+macro(Store2L)
+macro(Store4F)
+macro(Store2F)
+macro(Store2D)
+macro(Pack)
+macro(PackB)
+macro(PackS)
+macro(PackC)
+macro(PackI)
+macro(PackL)
+macro(PackF)
+macro(PackD)
+macro(Pack2x1B)
+macro(Pack2x2B)
+macro(Replicate16B)
+macro(Replicate8B)
+macro(Replicate4B)
+macro(Replicate8S)
+macro(Replicate4S)
+macro(Replicate2S)
+macro(Replicate8C)
+macro(Replicate4C)
+macro(Replicate2C)
+macro(Replicate4I)
+macro(Replicate2I)
+macro(Replicate2L)
+macro(Replicate4F)
+macro(Replicate2F)
+macro(Replicate2D)
+macro(Extract)
+macro(ExtractB)
+macro(ExtractS)
+macro(ExtractC)
+macro(ExtractI)
+macro(ExtractL)
+macro(ExtractF)
+macro(ExtractD)
diff --git a/src/share/vm/opto/coalesce.cpp b/src/share/vm/opto/coalesce.cpp
new file mode 100644
index 000000000..20e9bd179
--- /dev/null
+++ b/src/share/vm/opto/coalesce.cpp
@@ -0,0 +1,915 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_coalesce.cpp.incl"
+
+//=============================================================================
+//------------------------------reset_uf_map-----------------------------------
+void PhaseChaitin::reset_uf_map( uint maxlrg ) {
+ _maxlrg = maxlrg;
+ // Force the Union-Find mapping to be at least this large
+ _uf_map.extend(_maxlrg,0);
+ // Initialize it to be the ID mapping.
+ for( uint i=0; i<_maxlrg; i++ )
+ _uf_map.map(i,i);
+}
+
+//------------------------------compress_uf_map--------------------------------
+// Make all Nodes map directly to their final live range; no need for
+// the Union-Find mapping after this call.
+void PhaseChaitin::compress_uf_map_for_nodes( ) {
+ // For all Nodes, compress mapping
+ uint unique = _names.Size();
+ for( uint i=0; i<unique; i++ ) {
+ uint lrg = _names[i];
+ uint compressed_lrg = Find(lrg);
+ if( lrg != compressed_lrg )
+ _names.map(i,compressed_lrg);
+ }
+}
+
+//------------------------------Find-------------------------------------------
+// Straight out of Tarjan's union-find algorithm
+uint PhaseChaitin::Find_compress( uint lrg ) {
+ uint cur = lrg;
+ uint next = _uf_map[cur];
+ while( next != cur ) { // Scan chain of equivalences
+ assert( next < cur, "always union smaller" );
+ cur = next; // until find a fixed-point
+ next = _uf_map[cur];
+ }
+ // Core of union-find algorithm: update chain of
+ // equivalences to be equal to the root.
+ while( lrg != next ) {
+ uint tmp = _uf_map[lrg];
+ _uf_map.map(lrg, next);
+ lrg = tmp;
+ }
+ return lrg;
+}
+
+//------------------------------Find-------------------------------------------
+// Straight out of Tarjan's union-find algorithm
+uint PhaseChaitin::Find_compress( const Node *n ) {
+ uint lrg = Find_compress(_names[n->_idx]);
+ _names.map(n->_idx,lrg);
+ return lrg;
+}
+
+//------------------------------Find_const-------------------------------------
+// Like Find above, but no path compress, so bad asymptotic behavior
+uint PhaseChaitin::Find_const( uint lrg ) const {
+ if( !lrg ) return lrg; // Ignore the zero LRG
+ // Off the end? This happens during debugging dumps when you got
+ // brand new live ranges but have not told the allocator yet.
+ if( lrg >= _maxlrg ) return lrg;
+ uint next = _uf_map[lrg];
+ while( next != lrg ) { // Scan chain of equivalences
+ assert( next < lrg, "always union smaller" );
+ lrg = next; // until find a fixed-point
+ next = _uf_map[lrg];
+ }
+ return next;
+}
+
+//------------------------------Find-------------------------------------------
+// Like Find above, but no path compress, so bad asymptotic behavior
+uint PhaseChaitin::Find_const( const Node *n ) const {
+ if( n->_idx >= _names.Size() ) return 0; // not mapped, usual for debug dump
+ return Find_const( _names[n->_idx] );
+}
+
+//------------------------------Union------------------------------------------
+// union 2 sets together.
+void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
+ uint src = Find(src_n);
+ uint dst = Find(dst_n);
+ assert( src, "" );
+ assert( dst, "" );
+ assert( src < _maxlrg, "oob" );
+ assert( dst < _maxlrg, "oob" );
+ assert( src < dst, "always union smaller" );
+ _uf_map.map(dst,src);
+}
+
+//------------------------------new_lrg----------------------------------------
+void PhaseChaitin::new_lrg( const Node *x, uint lrg ) {
+ // Make the Node->LRG mapping
+ _names.extend(x->_idx,lrg);
+ // Make the Union-Find mapping an identity function
+ _uf_map.extend(lrg,lrg);
+}
+
+//------------------------------clone_projs------------------------------------
+// After cloning some rematierialized instruction, clone any MachProj's that
+// follow it. Example: Intel zero is XOR, kills flags. Sparc FP constants
+// use G3 as an address temp.
+int PhaseChaitin::clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg ) {
+ Block *bcon = _cfg._bbs[con->_idx];
+ uint cindex = bcon->find_node(con);
+ Node *con_next = bcon->_nodes[cindex+1];
+ if( con_next->in(0) != con || con_next->Opcode() != Op_MachProj )
+ return false; // No MachProj's follow
+
+ // Copy kills after the cloned constant
+ Node *kills = con_next->clone();
+ kills->set_req( 0, copy );
+ b->_nodes.insert( idx, kills );
+ _cfg._bbs.map( kills->_idx, b );
+ new_lrg( kills, maxlrg++ );
+ return true;
+}
+
+//------------------------------compact----------------------------------------
+// Renumber the live ranges to compact them. Makes the IFG smaller.
+void PhaseChaitin::compact() {
+ // Current the _uf_map contains a series of short chains which are headed
+ // by a self-cycle. All the chains run from big numbers to little numbers.
+ // The Find() call chases the chains & shortens them for the next Find call.
+ // We are going to change this structure slightly. Numbers above a moving
+ // wave 'i' are unchanged. Numbers below 'j' point directly to their
+ // compacted live range with no further chaining. There are no chains or
+ // cycles below 'i', so the Find call no longer works.
+ uint j=1;
+ uint i;
+ for( i=1; i < _maxlrg; i++ ) {
+ uint lr = _uf_map[i];
+ // Ignore unallocated live ranges
+ if( !lr ) continue;
+ assert( lr <= i, "" );
+ _uf_map.map(i, ( lr == i ) ? j++ : _uf_map[lr]);
+ }
+ if( false ) // PrintOptoCompactLiveRanges
+ printf("Compacted %d LRs from %d\n",i-j,i);
+ // Now change the Node->LR mapping to reflect the compacted names
+ uint unique = _names.Size();
+ for( i=0; i<unique; i++ )
+ _names.map(i,_uf_map[_names[i]]);
+
+ // Reset the Union-Find mapping
+ reset_uf_map(j);
+
+}
+
+//=============================================================================
+//------------------------------Dump-------------------------------------------
+#ifndef PRODUCT
+void PhaseCoalesce::dump( Node *n ) const {
+ // Being a const function means I cannot use 'Find'
+ uint r = _phc.Find(n);
+ tty->print("L%d/N%d ",r,n->_idx);
+}
+
+//------------------------------dump-------------------------------------------
+void PhaseCoalesce::dump() const {
+ // I know I have a block layout now, so I can print blocks in a loop
+ for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+ uint j;
+ Block *b = _phc._cfg._blocks[i];
+ // Print a nice block header
+ tty->print("B%d: ",b->_pre_order);
+ for( j=1; j<b->num_preds(); j++ )
+ tty->print("B%d ", _phc._cfg._bbs[b->pred(j)->_idx]->_pre_order);
+ tty->print("-> ");
+ for( j=0; j<b->_num_succs; j++ )
+ tty->print("B%d ",b->_succs[j]->_pre_order);
+ tty->print(" IDom: B%d/#%d\n", b->_idom ? b->_idom->_pre_order : 0, b->_dom_depth);
+ uint cnt = b->_nodes.size();
+ for( j=0; j<cnt; j++ ) {
+ Node *n = b->_nodes[j];
+ dump( n );
+ tty->print("\t%s\t",n->Name());
+
+ // Dump the inputs
+ uint k; // Exit value of loop
+ for( k=0; k<n->req(); k++ ) // For all required inputs
+ if( n->in(k) ) dump( n->in(k) );
+ else tty->print("_ ");
+ int any_prec = 0;
+ for( ; k<n->len(); k++ ) // For all precedence inputs
+ if( n->in(k) ) {
+ if( !any_prec++ ) tty->print(" |");
+ dump( n->in(k) );
+ }
+
+ // Dump node-specific info
+ n->dump_spec(tty);
+ tty->print("\n");
+
+ }
+ tty->print("\n");
+ }
+}
+#endif
+
+//------------------------------combine_these_two------------------------------
+// Combine the live ranges def'd by these 2 Nodes. N2 is an input to N1.
+void PhaseCoalesce::combine_these_two( Node *n1, Node *n2 ) {
+ uint lr1 = _phc.Find(n1);
+ uint lr2 = _phc.Find(n2);
+ if( lr1 != lr2 && // Different live ranges already AND
+ !_phc._ifg->test_edge_sq( lr1, lr2 ) ) { // Do not interfere
+ LRG *lrg1 = &_phc.lrgs(lr1);
+ LRG *lrg2 = &_phc.lrgs(lr2);
+ // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
+
+ // Now, why is int->oop OK? We end up declaring a raw-pointer as an oop
+ // and in general that's a bad thing. However, int->oop conversions only
+ // happen at GC points, so the lifetime of the misclassified raw-pointer
+ // is from the CheckCastPP (that converts it to an oop) backwards up
+ // through a merge point and into the slow-path call, and around the
+ // diamond up to the heap-top check and back down into the slow-path call.
+ // The misclassified raw pointer is NOT live across the slow-path call,
+ // and so does not appear in any GC info, so the fact that it is
+ // misclassified is OK.
+
+ if( (lrg1->_is_oop || !lrg2->_is_oop) && // not an oop->int cast AND
+ // Compatible final mask
+ lrg1->mask().overlap( lrg2->mask() ) ) {
+ // Merge larger into smaller.
+ if( lr1 > lr2 ) {
+ uint tmp = lr1; lr1 = lr2; lr2 = tmp;
+ Node *n = n1; n1 = n2; n2 = n;
+ LRG *ltmp = lrg1; lrg1 = lrg2; lrg2 = ltmp;
+ }
+ // Union lr2 into lr1
+ _phc.Union( n1, n2 );
+ if (lrg1->_maxfreq < lrg2->_maxfreq)
+ lrg1->_maxfreq = lrg2->_maxfreq;
+ // Merge in the IFG
+ _phc._ifg->Union( lr1, lr2 );
+ // Combine register restrictions
+ lrg1->AND(lrg2->mask());
+ }
+ }
+}
+
+//------------------------------coalesce_driver--------------------------------
+// Copy coalescing
+void PhaseCoalesce::coalesce_driver( ) {
+
+ verify();
+ // Coalesce from high frequency to low
+ for( uint i=0; i<_phc._cfg._num_blocks; i++ )
+ coalesce( _phc._blks[i] );
+
+}
+
+//------------------------------insert_copy_with_overlap-----------------------
+// I am inserting copies to come out of SSA form. In the general case, I am
+// doing a parallel renaming. I'm in the Named world now, so I can't do a
+// general parallel renaming. All the copies now use "names" (live-ranges)
+// to carry values instead of the explicit use-def chains. Suppose I need to
+// insert 2 copies into the same block. They copy L161->L128 and L128->L132.
+// If I insert them in the wrong order then L128 will get clobbered before it
+// can get used by the second copy. This cannot happen in the SSA model;
+// direct use-def chains get me the right value. It DOES happen in the named
+// model so I have to handle the reordering of copies.
+//
+// In general, I need to topo-sort the placed copies to avoid conflicts.
+// Its possible to have a closed cycle of copies (e.g., recirculating the same
+// values around a loop). In this case I need a temp to break the cycle.
+void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name ) {
+
+ // Scan backwards for the locations of the last use of the dst_name.
+ // I am about to clobber the dst_name, so the copy must be inserted
+ // after the last use. Last use is really first-use on a backwards scan.
+ uint i = b->end_idx()-1;
+ while( 1 ) {
+ Node *n = b->_nodes[i];
+ // Check for end of virtual copies; this is also the end of the
+ // parallel renaming effort.
+ if( n->_idx < _unique ) break;
+ uint idx = n->is_Copy();
+ assert( idx || n->is_Con() || n->Opcode() == Op_MachProj, "Only copies during parallel renaming" );
+ if( idx && _phc.Find(n->in(idx)) == dst_name ) break;
+ i--;
+ }
+ uint last_use_idx = i;
+
+ // Also search for any kill of src_name that exits the block.
+ // Since the copy uses src_name, I have to come before any kill.
+ uint kill_src_idx = b->end_idx();
+ // There can be only 1 kill that exits any block and that is
+ // the last kill. Thus it is the first kill on a backwards scan.
+ i = b->end_idx()-1;
+ while( 1 ) {
+ Node *n = b->_nodes[i];
+ // Check for end of virtual copies; this is also the end of the
+ // parallel renaming effort.
+ if( n->_idx < _unique ) break;
+ assert( n->is_Copy() || n->is_Con() || n->Opcode() == Op_MachProj, "Only copies during parallel renaming" );
+ if( _phc.Find(n) == src_name ) {
+ kill_src_idx = i;
+ break;
+ }
+ i--;
+ }
+ // Need a temp? Last use of dst comes after the kill of src?
+ if( last_use_idx >= kill_src_idx ) {
+ // Need to break a cycle with a temp
+ uint idx = copy->is_Copy();
+ Node *tmp = copy->clone();
+ _phc.new_lrg(tmp,_phc._maxlrg++);
+ // Insert new temp between copy and source
+ tmp ->set_req(idx,copy->in(idx));
+ copy->set_req(idx,tmp);
+ // Save source in temp early, before source is killed
+ b->_nodes.insert(kill_src_idx,tmp);
+ _phc._cfg._bbs.map( tmp->_idx, b );
+ last_use_idx++;
+ }
+
+ // Insert just after last use
+ b->_nodes.insert(last_use_idx+1,copy);
+}
+
+//------------------------------insert_copies----------------------------------
+void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
+ // We do LRGs compressing and fix a liveout data only here since the other
+ // place in Split() is guarded by the assert which we never hit.
+ _phc.compress_uf_map_for_nodes();
+ // Fix block's liveout data for compressed live ranges.
+ for(uint lrg = 1; lrg < _phc._maxlrg; lrg++ ) {
+ uint compressed_lrg = _phc.Find(lrg);
+ if( lrg != compressed_lrg ) {
+ for( uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++ ) {
+ IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
+ if( liveout->member(lrg) ) {
+ liveout->remove(lrg);
+ liveout->insert(compressed_lrg);
+ }
+ }
+ }
+ }
+
+ // All new nodes added are actual copies to replace virtual copies.
+ // Nodes with index less than '_unique' are original, non-virtual Nodes.
+ _unique = C->unique();
+
+ for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+ Block *b = _phc._cfg._blocks[i];
+ uint cnt = b->num_preds(); // Number of inputs to the Phi
+
+ for( uint l = 1; l<b->_nodes.size(); l++ ) {
+ Node *n = b->_nodes[l];
+
+ // Do not use removed-copies, use copied value instead
+ uint ncnt = n->req();
+ for( uint k = 1; k<ncnt; k++ ) {
+ Node *copy = n->in(k);
+ uint cidx = copy->is_Copy();
+ if( cidx ) {
+ Node *def = copy->in(cidx);
+ if( _phc.Find(copy) == _phc.Find(def) )
+ n->set_req(k,def);
+ }
+ }
+
+ // Remove any explicit copies that get coalesced.
+ uint cidx = n->is_Copy();
+ if( cidx ) {
+ Node *def = n->in(cidx);
+ if( _phc.Find(n) == _phc.Find(def) ) {
+ n->replace_by(def);
+ n->set_req(cidx,NULL);
+ b->_nodes.remove(l);
+ l--;
+ continue;
+ }
+ }
+
+ if( n->is_Phi() ) {
+ // Get the chosen name for the Phi
+ uint phi_name = _phc.Find( n );
+ // Ignore the pre-allocated specials
+ if( !phi_name ) continue;
+ // Check for mismatch inputs to Phi
+ for( uint j = 1; j<cnt; j++ ) {
+ Node *m = n->in(j);
+ uint src_name = _phc.Find(m);
+ if( src_name != phi_name ) {
+ Block *pred = _phc._cfg._bbs[b->pred(j)->_idx];
+ Node *copy;
+ assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
+ // Rematerialize constants instead of copying them
+ if( m->is_Mach() && m->as_Mach()->is_Con() &&
+ m->as_Mach()->rematerialize() ) {
+ copy = m->clone();
+ // Insert the copy in the predecessor basic block
+ pred->add_inst(copy);
+ // Copy any flags as well
+ _phc.clone_projs( pred, pred->end_idx(), m, copy, _phc._maxlrg );
+ } else {
+ const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
+ copy = new (C) MachSpillCopyNode(m,*rm,*rm);
+ // Find a good place to insert. Kinda tricky, use a subroutine
+ insert_copy_with_overlap(pred,copy,phi_name,src_name);
+ }
+ // Insert the copy in the use-def chain
+ n->set_req( j, copy );
+ _phc._cfg._bbs.map( copy->_idx, pred );
+ // Extend ("register allocate") the names array for the copy.
+ _phc._names.extend( copy->_idx, phi_name );
+ } // End of if Phi names do not match
+ } // End of for all inputs to Phi
+ } else { // End of if Phi
+
+ // Now check for 2-address instructions
+ uint idx;
+ if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
+ // Get the chosen name for the Node
+ uint name = _phc.Find( n );
+ assert( name, "no 2-address specials" );
+ // Check for name mis-match on the 2-address input
+ Node *m = n->in(idx);
+ if( _phc.Find(m) != name ) {
+ Node *copy;
+ assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
+ // At this point it is unsafe to extend live ranges (6550579).
+ // Rematerialize only constants as we do for Phi above.
+ if( m->is_Mach() && m->as_Mach()->is_Con() &&
+ m->as_Mach()->rematerialize() ) {
+ copy = m->clone();
+ // Insert the copy in the basic block, just before us
+ b->_nodes.insert( l++, copy );
+ if( _phc.clone_projs( b, l, m, copy, _phc._maxlrg ) )
+ l++;
+ } else {
+ const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
+ copy = new (C) MachSpillCopyNode( m, *rm, *rm );
+ // Insert the copy in the basic block, just before us
+ b->_nodes.insert( l++, copy );
+ }
+ // Insert the copy in the use-def chain
+ n->set_req(idx, copy );
+ // Extend ("register allocate") the names array for the copy.
+ _phc._names.extend( copy->_idx, name );
+ _phc._cfg._bbs.map( copy->_idx, b );
+ }
+
+ } // End of is two-adr
+
+ // Insert a copy at a debug use for a lrg which has high frequency
+ if( (b->_freq < OPTO_DEBUG_SPLIT_FREQ) && n->is_MachSafePoint() ) {
+ // Walk the debug inputs to the node and check for lrg freq
+ JVMState* jvms = n->jvms();
+ uint debug_start = jvms ? jvms->debug_start() : 999999;
+ uint debug_end = jvms ? jvms->debug_end() : 999999;
+ for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
+ // Do not split monitors; they are only needed for debug table
+ // entries and need no code.
+ if( jvms->is_monitor_use(inpidx) ) continue;
+ Node *inp = n->in(inpidx);
+ uint nidx = _phc.n2lidx(inp);
+ LRG &lrg = lrgs(nidx);
+
+ // If this lrg has a high frequency use/def
+ if( lrg._maxfreq >= OPTO_LRG_HIGH_FREQ ) {
+ // If the live range is also live out of this block (like it
+ // would be for a fast/slow idiom), the normal spill mechanism
+ // does an excellent job. If it is not live out of this block
+ // (like it would be for debug info to uncommon trap) splitting
+ // the live range now allows a better allocation in the high
+ // frequency blocks.
+ // Build_IFG_virtual has converted the live sets to
+ // live-IN info, not live-OUT info.
+ uint k;
+ for( k=0; k < b->_num_succs; k++ )
+ if( _phc._live->live(b->_succs[k])->member( nidx ) )
+ break; // Live in to some successor block?
+ if( k < b->_num_succs )
+ continue; // Live out; do not pre-split
+ // Split the lrg at this use
+ const RegMask *rm = C->matcher()->idealreg2spillmask[inp->ideal_reg()];
+ Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
+ // Insert the copy in the use-def chain
+ n->set_req(inpidx, copy );
+ // Insert the copy in the basic block, just before us
+ b->_nodes.insert( l++, copy );
+ // Extend ("register allocate") the names array for the copy.
+ _phc.new_lrg( copy, _phc._maxlrg++ );
+ _phc._cfg._bbs.map( copy->_idx, b );
+ //tty->print_cr("Split a debug use in Aggressive Coalesce");
+ } // End of if high frequency use/def
+ } // End of for all debug inputs
+ } // End of if low frequency safepoint
+
+ } // End of if Phi
+
+ } // End of for all instructions
+ } // End of for all blocks
+}
+
+//=============================================================================
+//------------------------------coalesce---------------------------------------
+// Aggressive (but pessimistic) copy coalescing of a single block
+
+// The following coalesce pass represents a single round of aggressive
+// pessimistic coalesce. "Aggressive" means no attempt to preserve
+// colorability when coalescing. This occasionally means more spills, but
+// it also means fewer rounds of coalescing for better code - and that means
+// faster compiles.
+
+// "Pessimistic" means we do not hit the fixed point in one pass (and we are
+// reaching for the least fixed point to boot). This is typically solved
+// with a few more rounds of coalescing, but the compiler must run fast. We
+// could optimistically coalescing everything touching PhiNodes together
+// into one big live range, then check for self-interference. Everywhere
+// the live range interferes with self it would have to be split. Finding
+// the right split points can be done with some heuristics (based on
+// expected frequency of edges in the live range). In short, it's a real
+// research problem and the timeline is too short to allow such research.
+// Further thoughts: (1) build the LR in a pass, (2) find self-interference
+// in another pass, (3) per each self-conflict, split, (4) split by finding
+// the low-cost cut (min-cut) of the LR, (5) edges in the LR are weighted
+// according to the GCM algorithm (or just exec freq on CFG edges).
+
+void PhaseAggressiveCoalesce::coalesce( Block *b ) {
+ // Copies are still "virtual" - meaning we have not made them explicitly
+ // copies. Instead, Phi functions of successor blocks have mis-matched
+ // live-ranges. If I fail to coalesce, I'll have to insert a copy to line
+ // up the live-ranges. Check for Phis in successor blocks.
+ uint i;
+ for( i=0; i<b->_num_succs; i++ ) {
+ Block *bs = b->_succs[i];
+ // Find index of 'b' in 'bs' predecessors
+ uint j=1;
+ while( _phc._cfg._bbs[bs->pred(j)->_idx] != b ) j++;
+ // Visit all the Phis in successor block
+ for( uint k = 1; k<bs->_nodes.size(); k++ ) {
+ Node *n = bs->_nodes[k];
+ if( !n->is_Phi() ) break;
+ combine_these_two( n, n->in(j) );
+ }
+ } // End of for all successor blocks
+
+
+ // Check _this_ block for 2-address instructions and copies.
+ uint cnt = b->end_idx();
+ for( i = 1; i<cnt; i++ ) {
+ Node *n = b->_nodes[i];
+ uint idx;
+ // 2-address instructions have a virtual Copy matching their input
+ // to their output
+ if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
+ MachNode *mach = n->as_Mach();
+ combine_these_two( mach, mach->in(idx) );
+ }
+ } // End of for all instructions in block
+}
+
+//=============================================================================
+//------------------------------PhaseConservativeCoalesce----------------------
+PhaseConservativeCoalesce::PhaseConservativeCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) {
+ _ulr.initialize(_phc._maxlrg);
+}
+
+//------------------------------verify-----------------------------------------
+void PhaseConservativeCoalesce::verify() {
+#ifdef ASSERT
+ _phc.set_was_low();
+#endif
+}
+
+//------------------------------union_helper-----------------------------------
+void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
+ // Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the
+ // union-find tree
+ _phc.Union( lr1_node, lr2_node );
+
+ // Single-def live range ONLY if both live ranges are single-def.
+ // If both are single def, then src_def powers one live range
+ // and def_copy powers the other. After merging, src_def powers
+ // the combined live range.
+ lrgs(lr1)._def = (lrgs(lr1)._def == NodeSentinel ||
+ lrgs(lr2)._def == NodeSentinel )
+ ? NodeSentinel : src_def;
+ lrgs(lr2)._def = NULL; // No def for lrg 2
+ lrgs(lr2).Clear(); // Force empty mask for LRG 2
+ //lrgs(lr2)._size = 0; // Live-range 2 goes dead
+ lrgs(lr1)._is_oop |= lrgs(lr2)._is_oop;
+ lrgs(lr2)._is_oop = 0; // In particular, not an oop for GC info
+
+ if (lrgs(lr1)._maxfreq < lrgs(lr2)._maxfreq)
+ lrgs(lr1)._maxfreq = lrgs(lr2)._maxfreq;
+
+ // Copy original value instead. Intermediate copies go dead, and
+ // the dst_copy becomes useless.
+ int didx = dst_copy->is_Copy();
+ dst_copy->set_req( didx, src_def );
+ // Add copy to free list
+ // _phc.free_spillcopy(b->_nodes[bindex]);
+ assert( b->_nodes[bindex] == dst_copy, "" );
+ dst_copy->replace_by( dst_copy->in(didx) );
+ dst_copy->set_req( didx, NULL);
+ b->_nodes.remove(bindex);
+ if( bindex < b->_ihrp_index ) b->_ihrp_index--;
+ if( bindex < b->_fhrp_index ) b->_fhrp_index--;
+
+ // Stretched lr1; add it to liveness of intermediate blocks
+ Block *b2 = _phc._cfg._bbs[src_copy->_idx];
+ while( b != b2 ) {
+ b = _phc._cfg._bbs[b->pred(1)->_idx];
+ _phc._live->live(b)->insert(lr1);
+ }
+}
+
+//------------------------------compute_separating_interferences---------------
+// Factored code from copy_copy that computes extra interferences from
+// lengthening a live range by double-coalescing.
+uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
+
+ assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj");
+ assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj");
+ Node *prev_copy = dst_copy->in(dst_copy->is_Copy());
+ Block *b2 = b;
+ uint bindex2 = bindex;
+ while( 1 ) {
+ // Find previous instruction
+ bindex2--; // Chain backwards 1 instruction
+ while( bindex2 == 0 ) { // At block start, find prior block
+ assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
+ b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
+ bindex2 = b2->end_idx()-1;
+ }
+ // Get prior instruction
+ assert(bindex2 < b2->_nodes.size(), "index out of bounds");
+ Node *x = b2->_nodes[bindex2];
+ if( x == prev_copy ) { // Previous copy in copy chain?
+ if( prev_copy == src_copy)// Found end of chain and all interferences
+ break; // So break out of loop
+ // Else work back one in copy chain
+ prev_copy = prev_copy->in(prev_copy->is_Copy());
+ } else { // Else collect interferences
+ uint lidx = _phc.Find(x);
+ // Found another def of live-range being stretched?
+ if( lidx == lr1 ) return max_juint;
+ if( lidx == lr2 ) return max_juint;
+
+ // If we attempt to coalesce across a bound def
+ if( lrgs(lidx).is_bound() ) {
+ // Do not let the coalesced LRG expect to get the bound color
+ rm.SUBTRACT( lrgs(lidx).mask() );
+ // Recompute rm_size
+ rm_size = rm.Size();
+ //if( rm._flags ) rm_size += 1000000;
+ if( reg_degree >= rm_size ) return max_juint;
+ }
+ if( rm.overlap(lrgs(lidx).mask()) ) {
+ // Insert lidx into union LRG; returns TRUE if actually inserted
+ if( _ulr.insert(lidx) ) {
+ // Infinite-stack neighbors do not alter colorability, as they
+ // can always color to some other color.
+ if( !lrgs(lidx).mask().is_AllStack() ) {
+ // If this coalesce will make any new neighbor uncolorable,
+ // do not coalesce.
+ if( lrgs(lidx).just_lo_degree() )
+ return max_juint;
+ // Bump our degree
+ if( ++reg_degree >= rm_size )
+ return max_juint;
+ } // End of if not infinite-stack neighbor
+ } // End of if actually inserted
+ } // End of if live range overlaps
+ } // End of else collect intereferences for 1 node
+ } // End of while forever, scan back for intereferences
+ return reg_degree;
+}
+
+//------------------------------update_ifg-------------------------------------
+void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
+ // Some original neighbors of lr1 might have gone away
+ // because the constrained register mask prevented them.
+ // Remove lr1 from such neighbors.
+ IndexSetIterator one(n_lr1);
+ uint neighbor;
+ LRG &lrg1 = lrgs(lr1);
+ while ((neighbor = one.next()) != 0)
+ if( !_ulr.member(neighbor) )
+ if( _phc._ifg->neighbors(neighbor)->remove(lr1) )
+ lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) );
+
+
+ // lr2 is now called (coalesced into) lr1.
+ // Remove lr2 from the IFG.
+ IndexSetIterator two(n_lr2);
+ LRG &lrg2 = lrgs(lr2);
+ while ((neighbor = two.next()) != 0)
+ if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
+ lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );
+
+ // Some neighbors of intermediate copies now interfere with the
+ // combined live range.
+ IndexSetIterator three(&_ulr);
+ while ((neighbor = three.next()) != 0)
+ if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
+ lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
+}
+
+//------------------------------record_bias------------------------------------
+static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
+ // Tag copy bias here
+ if( !ifg->lrgs(lr1)._copy_bias )
+ ifg->lrgs(lr1)._copy_bias = lr2;
+ if( !ifg->lrgs(lr2)._copy_bias )
+ ifg->lrgs(lr2)._copy_bias = lr1;
+}
+
+//------------------------------copy_copy--------------------------------------
+// See if I can coalesce a series of multiple copies together. I need the
+// final dest copy and the original src copy. They can be the same Node.
+// Compute the compatible register masks.
+bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
+
+ if( !dst_copy->is_SpillCopy() ) return false;
+ if( !src_copy->is_SpillCopy() ) return false;
+ Node *src_def = src_copy->in(src_copy->is_Copy());
+ uint lr1 = _phc.Find(dst_copy);
+ uint lr2 = _phc.Find(src_def );
+
+ // Same live ranges already?
+ if( lr1 == lr2 ) return false;
+
+ // Interfere?
+ if( _phc._ifg->test_edge_sq( lr1, lr2 ) ) return false;
+
+ // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
+ if( !lrgs(lr1)._is_oop && lrgs(lr2)._is_oop ) // not an oop->int cast
+ return false;
+
+ // Coalescing between an aligned live range and a mis-aligned live range?
+ // No, no! Alignment changes how we count degree.
+ if( lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj )
+ return false;
+
+ // Sort; use smaller live-range number
+ Node *lr1_node = dst_copy;
+ Node *lr2_node = src_def;
+ if( lr1 > lr2 ) {
+ uint tmp = lr1; lr1 = lr2; lr2 = tmp;
+ lr1_node = src_def; lr2_node = dst_copy;
+ }
+
+ // Check for compatibility of the 2 live ranges by
+ // intersecting their allowed register sets.
+ RegMask rm = lrgs(lr1).mask();
+ rm.AND(lrgs(lr2).mask());
+ // Number of bits free
+ uint rm_size = rm.Size();
+
+ // If we can use any stack slot, then effective size is infinite
+ if( rm.is_AllStack() ) rm_size += 1000000;
+ // Incompatible masks, no way to coalesce
+ if( rm_size == 0 ) return false;
+
+ // Another early bail-out test is when we are double-coalescing and the
+ // 2 copies are seperated by some control flow.
+ if( dst_copy != src_copy ) {
+ Block *src_b = _phc._cfg._bbs[src_copy->_idx];
+ Block *b2 = b;
+ while( b2 != src_b ) {
+ if( b2->num_preds() > 2 ){// Found merge-point
+ _phc._lost_opp_cflow_coalesce++;
+ // extra record_bias commented out because Chris believes it is not
+ // productive. Since we can record only 1 bias, we want to choose one
+ // that stands a chance of working and this one probably does not.
+ //record_bias( _phc._lrgs, lr1, lr2 );
+ return false; // To hard to find all interferences
+ }
+ b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
+ }
+ }
+
+ // Union the two interference sets together into '_ulr'
+ uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );
+
+ if( reg_degree >= rm_size ) {
+ record_bias( _phc._ifg, lr1, lr2 );
+ return false;
+ }
+
+ // Now I need to compute all the interferences between dst_copy and
+ // src_copy. I'm not willing visit the entire interference graph, so
+ // I limit my search to things in dst_copy's block or in a straight
+ // line of previous blocks. I give up at merge points or when I get
+ // more interferences than my degree. I can stop when I find src_copy.
+ if( dst_copy != src_copy ) {
+ reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
+ if( reg_degree == max_juint ) {
+ record_bias( _phc._ifg, lr1, lr2 );
+ return false;
+ }
+ } // End of if dst_copy & src_copy are different
+
+
+ // ---- THE COMBINED LRG IS COLORABLE ----
+
+ // YEAH - Now coalesce this copy away
+ assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(), "" );
+
+ IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
+ IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);
+
+ // Update the interference graph
+ update_ifg(lr1, lr2, n_lr1, n_lr2);
+
+ _ulr.remove(lr1);
+
+ // Uncomment the following code to trace Coalescing in great detail.
+ //
+ //if (false) {
+ // tty->cr();
+ // tty->print_cr("#######################################");
+ // tty->print_cr("union %d and %d", lr1, lr2);
+ // n_lr1->dump();
+ // n_lr2->dump();
+ // tty->print_cr("resulting set is");
+ // _ulr.dump();
+ //}
+
+ // Replace n_lr1 with the new combined live range. _ulr will use
+ // n_lr1's old memory on the next iteration. n_lr2 is cleared to
+ // send its internal memory to the free list.
+ _ulr.swap(n_lr1);
+ _ulr.clear();
+ n_lr2->clear();
+
+ lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
+ lrgs(lr2).set_degree( 0 );
+
+ // Join live ranges. Merge larger into smaller. Union lr2 into lr1 in the
+ // union-find tree
+ union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
+ // Combine register restrictions
+ lrgs(lr1).set_mask(rm);
+ lrgs(lr1).compute_set_mask_size();
+ lrgs(lr1)._cost += lrgs(lr2)._cost;
+ lrgs(lr1)._area += lrgs(lr2)._area;
+
+ // While its uncommon to successfully coalesce live ranges that started out
+ // being not-lo-degree, it can happen. In any case the combined coalesced
+ // live range better Simplify nicely.
+ lrgs(lr1)._was_lo = 1;
+
+ // kinda expensive to do all the time
+ //tty->print_cr("warning: slow verify happening");
+ //_phc._ifg->verify( &_phc );
+ return true;
+}
+
+//------------------------------coalesce---------------------------------------
+// Conservative (but pessimistic) copy coalescing of a single block
+void PhaseConservativeCoalesce::coalesce( Block *b ) {
+ // Bail out on infrequent blocks
+ if( b->is_uncommon(_phc._cfg._bbs) )
+ return;
+ // Check this block for copies.
+ for( uint i = 1; i<b->end_idx(); i++ ) {
+ // Check for actual copies on inputs. Coalesce a copy into its
+ // input if use and copy's input are compatible.
+ Node *copy1 = b->_nodes[i];
+ uint idx1 = copy1->is_Copy();
+ if( !idx1 ) continue; // Not a copy
+
+ if( copy_copy(copy1,copy1,b,i) ) {
+ i--; // Retry, same location in block
+ PhaseChaitin::_conserv_coalesce++; // Collect stats on success
+ continue;
+ }
+
+ /* do not attempt pairs. About 1/2 of all pairs can be removed by
+ post-alloc. The other set are too few to bother.
+ Node *copy2 = copy1->in(idx1);
+ uint idx2 = copy2->is_Copy();
+ if( !idx2 ) continue;
+ if( copy_copy(copy1,copy2,b,i) ) {
+ i--; // Retry, same location in block
+ PhaseChaitin::_conserv_coalesce_pair++; // Collect stats on success
+ continue;
+ }
+ */
+ }
+}
diff --git a/src/share/vm/opto/coalesce.hpp b/src/share/vm/opto/coalesce.hpp
new file mode 100644
index 000000000..b7cd9da87
--- /dev/null
+++ b/src/share/vm/opto/coalesce.hpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class LoopTree;
+class LRG;
+class LRG_List;
+class Matcher;
+class PhaseIFG;
+class PhaseCFG;
+
+//------------------------------PhaseCoalesce----------------------------------
+class PhaseCoalesce : public Phase {
+protected:
+ PhaseChaitin &_phc;
+
+public:
+ // Coalesce copies
+ PhaseCoalesce( PhaseChaitin &chaitin ) : Phase(Coalesce), _phc(chaitin) { }
+
+ virtual void verify() = 0;
+
+ // Coalesce copies
+ void coalesce_driver( );
+
+ // Coalesce copies in this block
+ virtual void coalesce( Block *b ) = 0;
+
+ // Attempt to coalesce live ranges defined by these 2
+ void combine_these_two( Node *n1, Node *n2 );
+
+ LRG &lrgs( uint lidx ) { return _phc.lrgs(lidx); }
+#ifndef PRODUCT
+ // Dump internally name
+ void dump( Node *n ) const;
+ // Dump whole shebang
+ void dump() const;
+#endif
+};
+
+//------------------------------PhaseAggressiveCoalesce------------------------
+// Aggressively, pessimistic coalesce copies. Aggressive means ignore graph
+// colorability; perhaps coalescing to the point of forcing a spill.
+// Pessimistic means we cannot coalesce if 2 live ranges interfere. This
+// implies we do not hit a fixed point right away.
+class PhaseAggressiveCoalesce : public PhaseCoalesce {
+ uint _unique;
+public:
+ // Coalesce copies
+ PhaseAggressiveCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) {}
+
+ virtual void verify() { };
+
+ // Aggressively coalesce copies in this block
+ virtual void coalesce( Block *b );
+
+ // Where I fail to coalesce, manifest virtual copies as the Real Thing
+ void insert_copies( Matcher &matcher );
+
+ // Copy insertion needs some smarts in case live ranges overlap
+ void insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name );
+};
+
+
+//------------------------------PhaseConservativeCoalesce----------------------
+// Conservatively, pessimistic coalesce copies. Conservative means do not
+// coalesce if the resultant live range will be uncolorable. Pessimistic
+// means we cannot coalesce if 2 live ranges interfere. This implies we do
+// not hit a fixed point right away.
+class PhaseConservativeCoalesce : public PhaseCoalesce {
+ IndexSet _ulr; // Union live range interferences
+public:
+ // Coalesce copies
+ PhaseConservativeCoalesce( PhaseChaitin &chaitin );
+
+ virtual void verify();
+
+ // Conservatively coalesce copies in this block
+ virtual void coalesce( Block *b );
+
+ // Coalesce this chain of copies away
+ bool copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex );
+
+ void union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex );
+
+ uint compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint rm_size, uint reg_degree, uint lr1, uint lr2);
+
+ void update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2);
+};
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
new file mode 100644
index 000000000..d62dbf344
--- /dev/null
+++ b/src/share/vm/opto/compile.cpp
@@ -0,0 +1,2384 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_compile.cpp.incl"
+
+/// Support for intrinsics.
+
+// Return the index at which m must be inserted (or already exists).
+// The sort order is by the address of the ciMethod, with is_virtual as minor key.
+int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
+#ifdef ASSERT
+ for (int i = 1; i < _intrinsics->length(); i++) {
+ CallGenerator* cg1 = _intrinsics->at(i-1);
+ CallGenerator* cg2 = _intrinsics->at(i);
+ assert(cg1->method() != cg2->method()
+ ? cg1->method() < cg2->method()
+ : cg1->is_virtual() < cg2->is_virtual(),
+ "compiler intrinsics list must stay sorted");
+ }
+#endif
+ // Binary search sorted list, in decreasing intervals [lo, hi].
+ int lo = 0, hi = _intrinsics->length()-1;
+ while (lo <= hi) {
+ int mid = (uint)(hi + lo) / 2;
+ ciMethod* mid_m = _intrinsics->at(mid)->method();
+ if (m < mid_m) {
+ hi = mid-1;
+ } else if (m > mid_m) {
+ lo = mid+1;
+ } else {
+ // look at minor sort key
+ bool mid_virt = _intrinsics->at(mid)->is_virtual();
+ if (is_virtual < mid_virt) {
+ hi = mid-1;
+ } else if (is_virtual > mid_virt) {
+ lo = mid+1;
+ } else {
+ return mid; // exact match
+ }
+ }
+ }
+ return lo; // inexact match
+}
+
+void Compile::register_intrinsic(CallGenerator* cg) {
+ if (_intrinsics == NULL) {
+ _intrinsics = new GrowableArray<CallGenerator*>(60);
+ }
+ // This code is stolen from ciObjectFactory::insert.
+ // Really, GrowableArray should have methods for
+ // insert_at, remove_at, and binary_search.
+ int len = _intrinsics->length();
+ int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
+ if (index == len) {
+ _intrinsics->append(cg);
+ } else {
+#ifdef ASSERT
+ CallGenerator* oldcg = _intrinsics->at(index);
+ assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
+#endif
+ _intrinsics->append(_intrinsics->at(len-1));
+ int pos;
+ for (pos = len-2; pos >= index; pos--) {
+ _intrinsics->at_put(pos+1,_intrinsics->at(pos));
+ }
+ _intrinsics->at_put(index, cg);
+ }
+ assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
+}
+
+CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
+ assert(m->is_loaded(), "don't try this on unloaded methods");
+ if (_intrinsics != NULL) {
+ int index = intrinsic_insertion_index(m, is_virtual);
+ if (index < _intrinsics->length()
+ && _intrinsics->at(index)->method() == m
+ && _intrinsics->at(index)->is_virtual() == is_virtual) {
+ return _intrinsics->at(index);
+ }
+ }
+ // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
+ if (m->intrinsic_id() != vmIntrinsics::_none) {
+ CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
+ if (cg != NULL) {
+ // Save it for next time:
+ register_intrinsic(cg);
+ return cg;
+ } else {
+ gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
+ }
+ }
+ return NULL;
+}
+
+// Compile:: register_library_intrinsics and make_vm_intrinsic are defined
+// in library_call.cpp.
+
+
+#ifndef PRODUCT
+// statistics gathering...
+
+juint Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
+jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};
+
+bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
+ assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
+ int oflags = _intrinsic_hist_flags[id];
+ assert(flags != 0, "what happened?");
+ if (is_virtual) {
+ flags |= _intrinsic_virtual;
+ }
+ bool changed = (flags != oflags);
+ if ((flags & _intrinsic_worked) != 0) {
+ juint count = (_intrinsic_hist_count[id] += 1);
+ if (count == 1) {
+ changed = true; // first time
+ }
+ // increment the overall count also:
+ _intrinsic_hist_count[vmIntrinsics::_none] += 1;
+ }
+ if (changed) {
+ if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
+ // Something changed about the intrinsic's virtuality.
+ if ((flags & _intrinsic_virtual) != 0) {
+ // This is the first use of this intrinsic as a virtual call.
+ if (oflags != 0) {
+ // We already saw it as a non-virtual, so note both cases.
+ flags |= _intrinsic_both;
+ }
+ } else if ((oflags & _intrinsic_both) == 0) {
+ // This is the first use of this intrinsic as a non-virtual
+ flags |= _intrinsic_both;
+ }
+ }
+ _intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
+ }
+ // update the overall flags also:
+ _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
+ return changed;
+}
+
+static char* format_flags(int flags, char* buf) {
+ buf[0] = 0;
+ if ((flags & Compile::_intrinsic_worked) != 0) strcat(buf, ",worked");
+ if ((flags & Compile::_intrinsic_failed) != 0) strcat(buf, ",failed");
+ if ((flags & Compile::_intrinsic_disabled) != 0) strcat(buf, ",disabled");
+ if ((flags & Compile::_intrinsic_virtual) != 0) strcat(buf, ",virtual");
+ if ((flags & Compile::_intrinsic_both) != 0) strcat(buf, ",nonvirtual");
+ if (buf[0] == 0) strcat(buf, ",");
+ assert(buf[0] == ',', "must be");
+ return &buf[1];
+}
+
+void Compile::print_intrinsic_statistics() {
+ char flagsbuf[100];
+ ttyLocker ttyl;
+ if (xtty != NULL) xtty->head("statistics type='intrinsic'");
+ tty->print_cr("Compiler intrinsic usage:");
+ juint total = _intrinsic_hist_count[vmIntrinsics::_none];
+ if (total == 0) total = 1; // avoid div0 in case of no successes
+ #define PRINT_STAT_LINE(name, c, f) \
+ tty->print_cr(" %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
+ for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
+ vmIntrinsics::ID id = (vmIntrinsics::ID) index;
+ int flags = _intrinsic_hist_flags[id];
+ juint count = _intrinsic_hist_count[id];
+ if ((flags | count) != 0) {
+ PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
+ }
+ }
+ PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
+ if (xtty != NULL) xtty->tail("statistics");
+}
+
+void Compile::print_statistics() {
+ { ttyLocker ttyl;
+ if (xtty != NULL) xtty->head("statistics type='opto'");
+ Parse::print_statistics();
+ PhaseCCP::print_statistics();
+ PhaseRegAlloc::print_statistics();
+ Scheduling::print_statistics();
+ PhasePeephole::print_statistics();
+ PhaseIdealLoop::print_statistics();
+ if (xtty != NULL) xtty->tail("statistics");
+ }
+ if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
+ // put this under its own <statistics> element.
+ print_intrinsic_statistics();
+ }
+}
+#endif //PRODUCT
+
+// Support for bundling info
+Bundle* Compile::node_bundling(const Node *n) {
+ assert(valid_bundle_info(n), "oob");
+ return &_node_bundling_base[n->_idx];
+}
+
+bool Compile::valid_bundle_info(const Node *n) {
+ return (_node_bundling_limit > n->_idx);
+}
+
+
+// Identify all nodes that are reachable from below, useful.
+// Use breadth-first pass that records state in a Unique_Node_List,
+// recursive traversal is slower.
+void Compile::identify_useful_nodes(Unique_Node_List &useful) {
+ int estimated_worklist_size = unique();
+ useful.map( estimated_worklist_size, NULL ); // preallocate space
+
+ // Initialize worklist
+ if (root() != NULL) { useful.push(root()); }
+ // If 'top' is cached, declare it useful to preserve cached node
+ if( cached_top_node() ) { useful.push(cached_top_node()); }
+
+ // Push all useful nodes onto the list, breadthfirst
+ for( uint next = 0; next < useful.size(); ++next ) {
+ assert( next < unique(), "Unique useful nodes < total nodes");
+ Node *n = useful.at(next);
+ uint max = n->len();
+ for( uint i = 0; i < max; ++i ) {
+ Node *m = n->in(i);
+ if( m == NULL ) continue;
+ useful.push(m);
+ }
+ }
+}
+
+// Disconnect all useless nodes by disconnecting those at the boundary.
+void Compile::remove_useless_nodes(Unique_Node_List &useful) {
+ uint next = 0;
+ while( next < useful.size() ) {
+ Node *n = useful.at(next++);
+ // Use raw traversal of out edges since this code removes out edges
+ int max = n->outcnt();
+ for (int j = 0; j < max; ++j ) {
+ Node* child = n->raw_out(j);
+ if( ! useful.member(child) ) {
+ assert( !child->is_top() || child != top(),
+ "If top is cached in Compile object it is in useful list");
+ // Only need to remove this out-edge to the useless node
+ n->raw_del_out(j);
+ --j;
+ --max;
+ }
+ }
+ if (n->outcnt() == 1 && n->has_special_unique_user()) {
+ record_for_igvn( n->unique_out() );
+ }
+ }
+ debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
+}
+
+//------------------------------frame_size_in_words-----------------------------
+// frame_slots in units of words
+int Compile::frame_size_in_words() const {
+ // shift is 0 in LP32 and 1 in LP64
+ const int shift = (LogBytesPerWord - LogBytesPerInt);
+ int words = _frame_slots >> shift;
+ assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
+ return words;
+}
+
+// ============================================================================
+//------------------------------CompileWrapper---------------------------------
+class CompileWrapper : public StackObj {
+ Compile *const _compile;
+ public:
+ CompileWrapper(Compile* compile);
+
+ ~CompileWrapper();
+};
+
+CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
+ // the Compile* pointer is stored in the current ciEnv:
+ ciEnv* env = compile->env();
+ assert(env == ciEnv::current(), "must already be a ciEnv active");
+ assert(env->compiler_data() == NULL, "compile already active?");
+ env->set_compiler_data(compile);
+ assert(compile == Compile::current(), "sanity");
+
+ compile->set_type_dict(NULL);
+ compile->set_type_hwm(NULL);
+ compile->set_type_last_size(0);
+ compile->set_last_tf(NULL, NULL);
+ compile->set_indexSet_arena(NULL);
+ compile->set_indexSet_free_block_list(NULL);
+ compile->init_type_arena();
+ Type::Initialize(compile);
+ _compile->set_scratch_buffer_blob(NULL);
+ _compile->begin_method();
+}
+CompileWrapper::~CompileWrapper() {
+ if (_compile->failing()) {
+ _compile->print_method("Failed");
+ }
+ _compile->end_method();
+ if (_compile->scratch_buffer_blob() != NULL)
+ BufferBlob::free(_compile->scratch_buffer_blob());
+ _compile->env()->set_compiler_data(NULL);
+}
+
+
+//----------------------------print_compile_messages---------------------------
+void Compile::print_compile_messages() {
+#ifndef PRODUCT
+ // Check if recompiling
+ if (_subsume_loads == false && PrintOpto) {
+ // Recompiling without allowing machine instructions to subsume loads
+ tty->print_cr("*********************************************************");
+ tty->print_cr("** Bailout: Recompile without subsuming loads **");
+ tty->print_cr("*********************************************************");
+ }
+ if (env()->break_at_compile()) {
+ // Open the debugger when compiing this method.
+ tty->print("### Breaking when compiling: ");
+ method()->print_short_name();
+ tty->cr();
+ BREAKPOINT;
+ }
+
+ if( PrintOpto ) {
+ if (is_osr_compilation()) {
+ tty->print("[OSR]%3d", _compile_id);
+ } else {
+ tty->print("%3d", _compile_id);
+ }
+ }
+#endif
+}
+
+
+void Compile::init_scratch_buffer_blob() {
+ if( scratch_buffer_blob() != NULL ) return;
+
+ // Construct a temporary CodeBuffer to have it construct a BufferBlob
+ // Cache this BufferBlob for this compile.
+ ResourceMark rm;
+ int size = (MAX_inst_size + MAX_stubs_size + MAX_const_size);
+ BufferBlob* blob = BufferBlob::create("Compile::scratch_buffer", size);
+ // Record the buffer blob for next time.
+ set_scratch_buffer_blob(blob);
+ guarantee(scratch_buffer_blob() != NULL, "Need BufferBlob for code generation");
+
+ // Initialize the relocation buffers
+ relocInfo* locs_buf = (relocInfo*) blob->instructions_end() - MAX_locs_size;
+ set_scratch_locs_memory(locs_buf);
+}
+
+
+//-----------------------scratch_emit_size-------------------------------------
+// Helper function that computes size by emitting code
+uint Compile::scratch_emit_size(const Node* n) {
+ // Emit into a trash buffer and count bytes emitted.
+ // This is a pretty expensive way to compute a size,
+ // but it works well enough if seldom used.
+ // All common fixed-size instructions are given a size
+ // method by the AD file.
+ // Note that the scratch buffer blob and locs memory are
+ // allocated at the beginning of the compile task, and
+ // may be shared by several calls to scratch_emit_size.
+ // The allocation of the scratch buffer blob is particularly
+ // expensive, since it has to grab the code cache lock.
+ BufferBlob* blob = this->scratch_buffer_blob();
+ assert(blob != NULL, "Initialize BufferBlob at start");
+ assert(blob->size() > MAX_inst_size, "sanity");
+ relocInfo* locs_buf = scratch_locs_memory();
+ address blob_begin = blob->instructions_begin();
+ address blob_end = (address)locs_buf;
+ assert(blob->instructions_contains(blob_end), "sanity");
+ CodeBuffer buf(blob_begin, blob_end - blob_begin);
+ buf.initialize_consts_size(MAX_const_size);
+ buf.initialize_stubs_size(MAX_stubs_size);
+ assert(locs_buf != NULL, "sanity");
+ int lsize = MAX_locs_size / 2;
+ buf.insts()->initialize_shared_locs(&locs_buf[0], lsize);
+ buf.stubs()->initialize_shared_locs(&locs_buf[lsize], lsize);
+ n->emit(buf, this->regalloc());
+ return buf.code_size();
+}
+
+void Compile::record_for_escape_analysis(Node* n) {
+ if (_congraph != NULL)
+ _congraph->record_for_escape_analysis(n);
+}
+
+
+// ============================================================================
+//------------------------------Compile standard-------------------------------
+debug_only( int Compile::_debug_idx = 100000; )
+
+// Compile a method. entry_bci is -1 for normal compilations and indicates
+// the continuation bci for on stack replacement.
+
+
+Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads )
+ : Phase(Compiler),
+ _env(ci_env),
+ _log(ci_env->log()),
+ _compile_id(ci_env->compile_id()),
+ _save_argument_registers(false),
+ _stub_name(NULL),
+ _stub_function(NULL),
+ _stub_entry_point(NULL),
+ _method(target),
+ _entry_bci(osr_bci),
+ _initial_gvn(NULL),
+ _for_igvn(NULL),
+ _warm_calls(NULL),
+ _subsume_loads(subsume_loads),
+ _failure_reason(NULL),
+ _code_buffer("Compile::Fill_buffer"),
+ _orig_pc_slot(0),
+ _orig_pc_slot_offset_in_bytes(0),
+ _node_bundling_limit(0),
+ _node_bundling_base(NULL),
+#ifndef PRODUCT
+ _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
+ _printer(IdealGraphPrinter::printer()),
+#endif
+ _congraph(NULL) {
+ C = this;
+
+ CompileWrapper cw(this);
+#ifndef PRODUCT
+ if (TimeCompiler2) {
+ tty->print(" ");
+ target->holder()->name()->print();
+ tty->print(".");
+ target->print_short_name();
+ tty->print(" ");
+ }
+ TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
+ TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
+ set_print_assembly(PrintOptoAssembly || _method->should_print_assembly());
+#endif
+
+ if (ProfileTraps) {
+ // Make sure the method being compiled gets its own MDO,
+ // so we can at least track the decompile_count().
+ method()->build_method_data();
+ }
+
+ Init(::AliasLevel);
+
+
+ print_compile_messages();
+
+ if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) )
+ _ilt = InlineTree::build_inline_tree_root();
+ else
+ _ilt = NULL;
+
+ // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
+ assert(num_alias_types() >= AliasIdxRaw, "");
+
+#define MINIMUM_NODE_HASH 1023
+ // Node list that Iterative GVN will start with
+ Unique_Node_List for_igvn(comp_arena());
+ set_for_igvn(&for_igvn);
+
+ // GVN that will be run immediately on new nodes
+ uint estimated_size = method()->code_size()*4+64;
+ estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
+ PhaseGVN gvn(node_arena(), estimated_size);
+ set_initial_gvn(&gvn);
+
+ if (DoEscapeAnalysis)
+ _congraph = new ConnectionGraph(this);
+
+ { // Scope for timing the parser
+ TracePhase t3("parse", &_t_parser, true);
+
+ // Put top into the hash table ASAP.
+ initial_gvn()->transform_no_reclaim(top());
+
+ // Set up tf(), start(), and find a CallGenerator.
+ CallGenerator* cg;
+ if (is_osr_compilation()) {
+ const TypeTuple *domain = StartOSRNode::osr_domain();
+ const TypeTuple *range = TypeTuple::make_range(method()->signature());
+ init_tf(TypeFunc::make(domain, range));
+ StartNode* s = new (this, 2) StartOSRNode(root(), domain);
+ initial_gvn()->set_type_bottom(s);
+ init_start(s);
+ cg = CallGenerator::for_osr(method(), entry_bci());
+ } else {
+ // Normal case.
+ init_tf(TypeFunc::make(method()));
+ StartNode* s = new (this, 2) StartNode(root(), tf()->domain());
+ initial_gvn()->set_type_bottom(s);
+ init_start(s);
+ float past_uses = method()->interpreter_invocation_count();
+ float expected_uses = past_uses;
+ cg = CallGenerator::for_inline(method(), expected_uses);
+ }
+ if (failing()) return;
+ if (cg == NULL) {
+ record_method_not_compilable_all_tiers("cannot parse method");
+ return;
+ }
+ JVMState* jvms = build_start_state(start(), tf());
+ if ((jvms = cg->generate(jvms)) == NULL) {
+ record_method_not_compilable("method parse failed");
+ return;
+ }
+ GraphKit kit(jvms);
+
+ if (!kit.stopped()) {
+ // Accept return values, and transfer control we know not where.
+ // This is done by a special, unique ReturnNode bound to root.
+ return_values(kit.jvms());
+ }
+
+ if (kit.has_exceptions()) {
+ // Any exceptions that escape from this call must be rethrown
+ // to whatever caller is dynamically above us on the stack.
+ // This is done by a special, unique RethrowNode bound to root.
+ rethrow_exceptions(kit.transfer_exceptions_into_jvms());
+ }
+
+ // Remove clutter produced by parsing.
+ if (!failing()) {
+ ResourceMark rm;
+ PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
+ }
+ }
+
+ // Note: Large methods are capped off in do_one_bytecode().
+ if (failing()) return;
+
+ // After parsing, node notes are no longer automagic.
+ // They must be propagated by register_new_node_with_optimizer(),
+ // clone(), or the like.
+ set_default_node_notes(NULL);
+
+ for (;;) {
+ int successes = Inline_Warm();
+ if (failing()) return;
+ if (successes == 0) break;
+ }
+
+ // Drain the list.
+ Finish_Warm();
+#ifndef PRODUCT
+ if (_printer) {
+ _printer->print_inlining(this);
+ }
+#endif
+
+ if (failing()) return;
+ NOT_PRODUCT( verify_graph_edges(); )
+
+ // Perform escape analysis
+ if (_congraph != NULL) {
+ NOT_PRODUCT( TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, TimeCompiler); )
+ _congraph->compute_escape();
+#ifndef PRODUCT
+ if (PrintEscapeAnalysis) {
+ _congraph->dump();
+ }
+#endif
+ }
+ // Now optimize
+ Optimize();
+ if (failing()) return;
+ NOT_PRODUCT( verify_graph_edges(); )
+
+#ifndef PRODUCT
+ if (PrintIdeal) {
+ ttyLocker ttyl; // keep the following output all in one block
+ // This output goes directly to the tty, not the compiler log.
+ // To enable tools to match it up with the compilation activity,
+ // be sure to tag this tty output with the compile ID.
+ if (xtty != NULL) {
+ xtty->head("ideal compile_id='%d'%s", compile_id(),
+ is_osr_compilation() ? " compile_kind='osr'" :
+ "");
+ }
+ root()->dump(9999);
+ if (xtty != NULL) {
+ xtty->tail("ideal");
+ }
+ }
+#endif
+
+ // Now that we know the size of all the monitors we can add a fixed slot
+ // for the original deopt pc.
+
+ _orig_pc_slot = fixed_slots();
+ int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
+ set_fixed_slots(next_slot);
+
+ // Now generate code
+ Code_Gen();
+ if (failing()) return;
+
+ // Check if we want to skip execution of all compiled code.
+ {
+#ifndef PRODUCT
+ if (OptoNoExecute) {
+ record_method_not_compilable("+OptoNoExecute"); // Flag as failed
+ return;
+ }
+ TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
+#endif
+
+ if (is_osr_compilation()) {
+ _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
+ _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
+ } else {
+ _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
+ _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
+ }
+
+ env()->register_method(_method, _entry_bci,
+ &_code_offsets,
+ _orig_pc_slot_offset_in_bytes,
+ code_buffer(),
+ frame_size_in_words(), _oop_map_set,
+ &_handler_table, &_inc_table,
+ compiler,
+ env()->comp_level(),
+ true, /*has_debug_info*/
+ has_unsafe_access()
+ );
+ }
+}
+
+//------------------------------Compile----------------------------------------
+// Compile a runtime stub
+Compile::Compile( ciEnv* ci_env,
+ TypeFunc_generator generator,
+ address stub_function,
+ const char *stub_name,
+ int is_fancy_jump,
+ bool pass_tls,
+ bool save_arg_registers,
+ bool return_pc )
+ : Phase(Compiler),
+ _env(ci_env),
+ _log(ci_env->log()),
+ _compile_id(-1),
+ _save_argument_registers(save_arg_registers),
+ _method(NULL),
+ _stub_name(stub_name),
+ _stub_function(stub_function),
+ _stub_entry_point(NULL),
+ _entry_bci(InvocationEntryBci),
+ _initial_gvn(NULL),
+ _for_igvn(NULL),
+ _warm_calls(NULL),
+ _orig_pc_slot(0),
+ _orig_pc_slot_offset_in_bytes(0),
+ _subsume_loads(true),
+ _failure_reason(NULL),
+ _code_buffer("Compile::Fill_buffer"),
+ _node_bundling_limit(0),
+ _node_bundling_base(NULL),
+#ifndef PRODUCT
+ _trace_opto_output(TraceOptoOutput),
+ _printer(NULL),
+#endif
+ _congraph(NULL) {
+ C = this;
+
+#ifndef PRODUCT
+ TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
+ TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
+ set_print_assembly(PrintFrameConverterAssembly);
+#endif
+ CompileWrapper cw(this);
+ Init(/*AliasLevel=*/ 0);
+ init_tf((*generator)());
+
+ {
+ // The following is a dummy for the sake of GraphKit::gen_stub
+ Unique_Node_List for_igvn(comp_arena());
+ set_for_igvn(&for_igvn); // not used, but some GraphKit guys push on this
+ PhaseGVN gvn(Thread::current()->resource_area(),255);
+ set_initial_gvn(&gvn); // not significant, but GraphKit guys use it pervasively
+ gvn.transform_no_reclaim(top());
+
+ GraphKit kit;
+ kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
+ }
+
+ NOT_PRODUCT( verify_graph_edges(); )
+ Code_Gen();
+ if (failing()) return;
+
+
+ // Entry point will be accessed using compile->stub_entry_point();
+ if (code_buffer() == NULL) {
+ Matcher::soft_match_failure();
+ } else {
+ if (PrintAssembly && (WizardMode || Verbose))
+ tty->print_cr("### Stub::%s", stub_name);
+
+ if (!failing()) {
+ assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");
+
+ // Make the NMethod
+ // For now we mark the frame as never safe for profile stackwalking
+ RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
+ code_buffer(),
+ CodeOffsets::frame_never_safe,
+ // _code_offsets.value(CodeOffsets::Frame_Complete),
+ frame_size_in_words(),
+ _oop_map_set,
+ save_arg_registers);
+ assert(rs != NULL && rs->is_runtime_stub(), "sanity check");
+
+ _stub_entry_point = rs->entry_point();
+ }
+ }
+}
+
+#ifndef PRODUCT
+void print_opto_verbose_signature( const TypeFunc *j_sig, const char *stub_name ) {
+ if(PrintOpto && Verbose) {
+ tty->print("%s ", stub_name); j_sig->print_flattened(); tty->cr();
+ }
+}
+#endif
+
+void Compile::print_codes() {
+}
+
+//------------------------------Init-------------------------------------------
+// Prepare for a single compilation
+void Compile::Init(int aliaslevel) {
+ _unique = 0;
+ _regalloc = NULL;
+
+ _tf = NULL; // filled in later
+ _top = NULL; // cached later
+ _matcher = NULL; // filled in later
+ _cfg = NULL; // filled in later
+
+ set_24_bit_selection_and_mode(Use24BitFP, false);
+
+ _node_note_array = NULL;
+ _default_node_notes = NULL;
+
+ _immutable_memory = NULL; // filled in at first inquiry
+
+ // Globally visible Nodes
+ // First set TOP to NULL to give safe behavior during creation of RootNode
+ set_cached_top_node(NULL);
+ set_root(new (this, 3) RootNode());
+ // Now that you have a Root to point to, create the real TOP
+ set_cached_top_node( new (this, 1) ConNode(Type::TOP) );
+ set_recent_alloc(NULL, NULL);
+
+ // Create Debug Information Recorder to record scopes, oopmaps, etc.
+ env()->set_oop_recorder(new OopRecorder(comp_arena()));
+ env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
+ env()->set_dependencies(new Dependencies(env()));
+
+ _fixed_slots = 0;
+ set_has_split_ifs(false);
+ set_has_loops(has_method() && method()->has_loops()); // first approximation
+ _deopt_happens = true; // start out assuming the worst
+ _trap_can_recompile = false; // no traps emitted yet
+ _major_progress = true; // start out assuming good things will happen
+ set_has_unsafe_access(false);
+ Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
+ set_decompile_count(0);
+
+ // Compilation level related initialization
+ if (env()->comp_level() == CompLevel_fast_compile) {
+ set_num_loop_opts(Tier1LoopOptsCount);
+ set_do_inlining(Tier1Inline != 0);
+ set_max_inline_size(Tier1MaxInlineSize);
+ set_freq_inline_size(Tier1FreqInlineSize);
+ set_do_scheduling(false);
+ set_do_count_invocations(Tier1CountInvocations);
+ set_do_method_data_update(Tier1UpdateMethodData);
+ } else {
+ assert(env()->comp_level() == CompLevel_full_optimization, "unknown comp level");
+ set_num_loop_opts(LoopOptsCount);
+ set_do_inlining(Inline);
+ set_max_inline_size(MaxInlineSize);
+ set_freq_inline_size(FreqInlineSize);
+ set_do_scheduling(OptoScheduling);
+ set_do_count_invocations(false);
+ set_do_method_data_update(false);
+ }
+
+ if (debug_info()->recording_non_safepoints()) {
+ set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
+ (comp_arena(), 8, 0, NULL));
+ set_default_node_notes(Node_Notes::make(this));
+ }
+
+ // // -- Initialize types before each compile --
+ // // Update cached type information
+ // if( _method && _method->constants() )
+ // Type::update_loaded_types(_method, _method->constants());
+
+ // Init alias_type map.
+ if (!DoEscapeAnalysis && aliaslevel == 3)
+ aliaslevel = 2; // No unique types without escape analysis
+ _AliasLevel = aliaslevel;
+ const int grow_ats = 16;
+ _max_alias_types = grow_ats;
+ _alias_types = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
+ AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, grow_ats);
+ Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
+ {
+ for (int i = 0; i < grow_ats; i++) _alias_types[i] = &ats[i];
+ }
+ // Initialize the first few types.
+ _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
+ _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
+ _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
+ _num_alias_types = AliasIdxRaw+1;
+ // Zero out the alias type cache.
+ Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
+ // A NULL adr_type hits in the cache right away. Preload the right answer.
+ probe_alias_cache(NULL)->_index = AliasIdxTop;
+
+ _intrinsics = NULL;
+ _macro_nodes = new GrowableArray<Node*>(comp_arena(), 8, 0, NULL);
+ register_library_intrinsics();
+}
+
+//---------------------------init_start----------------------------------------
+// Install the StartNode on this compile object.
+void Compile::init_start(StartNode* s) {
+ if (failing())
+ return; // already failing
+ assert(s == start(), "");
+}
+
+StartNode* Compile::start() const {
+ assert(!failing(), "");
+ for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
+ Node* start = root()->fast_out(i);
+ if( start->is_Start() )
+ return start->as_Start();
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//-------------------------------immutable_memory-------------------------------------
+// Access immutable memory
+Node* Compile::immutable_memory() {
+ if (_immutable_memory != NULL) {
+ return _immutable_memory;
+ }
+ StartNode* s = start();
+ for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
+ Node *p = s->fast_out(i);
+ if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
+ _immutable_memory = p;
+ return _immutable_memory;
+ }
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//----------------------set_cached_top_node------------------------------------
+// Install the cached top node, and make sure Node::is_top works correctly.
+void Compile::set_cached_top_node(Node* tn) {
+ if (tn != NULL) verify_top(tn);
+ Node* old_top = _top;
+ _top = tn;
+ // Calling Node::setup_is_top allows the nodes the chance to adjust
+ // their _out arrays.
+ if (_top != NULL) _top->setup_is_top();
+ if (old_top != NULL) old_top->setup_is_top();
+ assert(_top == NULL || top()->is_top(), "");
+}
+
+#ifndef PRODUCT
+void Compile::verify_top(Node* tn) const {
+ if (tn != NULL) {
+ assert(tn->is_Con(), "top node must be a constant");
+ assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
+ assert(tn->in(0) != NULL, "must have live top node");
+ }
+}
+#endif
+
+
+///-------------------Managing Per-Node Debug & Profile Info-------------------
+
+void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
+ guarantee(arr != NULL, "");
+ int num_blocks = arr->length();
+ if (grow_by < num_blocks) grow_by = num_blocks;
+ int num_notes = grow_by * _node_notes_block_size;
+ Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
+ Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
+ while (num_notes > 0) {
+ arr->append(notes);
+ notes += _node_notes_block_size;
+ num_notes -= _node_notes_block_size;
+ }
+ assert(num_notes == 0, "exact multiple, please");
+}
+
+bool Compile::copy_node_notes_to(Node* dest, Node* source) {
+ if (source == NULL || dest == NULL) return false;
+
+ if (dest->is_Con())
+ return false; // Do not push debug info onto constants.
+
+#ifdef ASSERT
+ // Leave a bread crumb trail pointing to the original node:
+ if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
+ dest->set_debug_orig(source);
+ }
+#endif
+
+ if (node_note_array() == NULL)
+ return false; // Not collecting any notes now.
+
+ // This is a copy onto a pre-existing node, which may already have notes.
+ // If both nodes have notes, do not overwrite any pre-existing notes.
+ Node_Notes* source_notes = node_notes_at(source->_idx);
+ if (source_notes == NULL || source_notes->is_clear()) return false;
+ Node_Notes* dest_notes = node_notes_at(dest->_idx);
+ if (dest_notes == NULL || dest_notes->is_clear()) {
+ return set_node_notes_at(dest->_idx, source_notes);
+ }
+
+ Node_Notes merged_notes = (*source_notes);
+ // The order of operations here ensures that dest notes will win...
+ merged_notes.update_from(dest_notes);
+ return set_node_notes_at(dest->_idx, &merged_notes);
+}
+
+
+//--------------------------allow_range_check_smearing-------------------------
+// Gating condition for coalescing similar range checks.
+// Sometimes we try 'speculatively' replacing a series of a range checks by a
+// single covering check that is at least as strong as any of them.
+// If the optimization succeeds, the simplified (strengthened) range check
+// will always succeed. If it fails, we will deopt, and then give up
+// on the optimization.
+bool Compile::allow_range_check_smearing() const {
+ // If this method has already thrown a range-check,
+ // assume it was because we already tried range smearing
+ // and it failed.
+ uint already_trapped = trap_count(Deoptimization::Reason_range_check);
+ return !already_trapped;
+}
+
+
+//------------------------------flatten_alias_type-----------------------------
+const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
+ int offset = tj->offset();
+ TypePtr::PTR ptr = tj->ptr();
+
+ // Process weird unsafe references.
+ if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
+ assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
+ tj = TypeOopPtr::BOTTOM;
+ ptr = tj->ptr();
+ offset = tj->offset();
+ }
+
+ // Array pointers need some flattening
+ const TypeAryPtr *ta = tj->isa_aryptr();
+ if( ta && _AliasLevel >= 2 ) {
+ // For arrays indexed by constant indices, we flatten the alias
+ // space to include all of the array body. Only the header, klass
+ // and array length can be accessed un-aliased.
+ if( offset != Type::OffsetBot ) {
+ if( ta->const_oop() ) { // methodDataOop or methodOop
+ offset = Type::OffsetBot; // Flatten constant access into array body
+ tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,Type::OffsetBot, ta->instance_id());
+ } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
+ // range is OK as-is.
+ tj = ta = TypeAryPtr::RANGE;
+ } else if( offset == oopDesc::klass_offset_in_bytes() ) {
+ tj = TypeInstPtr::KLASS; // all klass loads look alike
+ ta = TypeAryPtr::RANGE; // generic ignored junk
+ ptr = TypePtr::BotPTR;
+ } else if( offset == oopDesc::mark_offset_in_bytes() ) {
+ tj = TypeInstPtr::MARK;
+ ta = TypeAryPtr::RANGE; // generic ignored junk
+ ptr = TypePtr::BotPTR;
+ } else { // Random constant offset into array body
+ offset = Type::OffsetBot; // Flatten constant access into array body
+ tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,Type::OffsetBot, ta->instance_id());
+ }
+ }
+ // Arrays of fixed size alias with arrays of unknown size.
+ if (ta->size() != TypeInt::POS) {
+ const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
+ tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset, ta->instance_id());
+ }
+ // Arrays of known objects become arrays of unknown objects.
+ if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
+ const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
+ tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset, ta->instance_id());
+ }
+ // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
+ // cannot be distinguished by bytecode alone.
+ if (ta->elem() == TypeInt::BOOL) {
+ const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
+ ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
+ tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset, ta->instance_id());
+ }
+ // During the 2nd round of IterGVN, NotNull castings are removed.
+ // Make sure the Bottom and NotNull variants alias the same.
+ // Also, make sure exact and non-exact variants alias the same.
+ if( ptr == TypePtr::NotNull || ta->klass_is_exact() ) {
+ if (ta->const_oop()) {
+ tj = ta = TypeAryPtr::make(TypePtr::Constant,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
+ } else {
+ tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
+ }
+ }
+ }
+
+ // Oop pointers need some flattening
+ const TypeInstPtr *to = tj->isa_instptr();
+ if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
+ if( ptr == TypePtr::Constant ) {
+ // No constant oop pointers (such as Strings); they alias with
+ // unknown strings.
+ tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
+ } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
+ // During the 2nd round of IterGVN, NotNull castings are removed.
+ // Make sure the Bottom and NotNull variants alias the same.
+ // Also, make sure exact and non-exact variants alias the same.
+ tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset, to->instance_id());
+ }
+ // Canonicalize the holder of this field
+ ciInstanceKlass *k = to->klass()->as_instance_klass();
+ if (offset >= 0 && offset < oopDesc::header_size() * wordSize) {
+ // First handle header references such as a LoadKlassNode, even if the
+ // object's klass is unloaded at compile time (4965979).
+ tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset, to->instance_id());
+ } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
+ to = NULL;
+ tj = TypeOopPtr::BOTTOM;
+ offset = tj->offset();
+ } else {
+ ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
+ if (!k->equals(canonical_holder) || tj->offset() != offset) {
+ tj = to = TypeInstPtr::make(TypePtr::BotPTR, canonical_holder, false, NULL, offset, to->instance_id());
+ }
+ }
+ }
+
+ // Klass pointers to object array klasses need some flattening
+ const TypeKlassPtr *tk = tj->isa_klassptr();
+ if( tk ) {
+ // If we are referencing a field within a Klass, we need
+ // to assume the worst case of an Object. Both exact and
+ // inexact types must flatten to the same alias class.
+ // Since the flattened result for a klass is defined to be
+ // precisely java.lang.Object, use a constant ptr.
+ if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
+
+ tj = tk = TypeKlassPtr::make(TypePtr::Constant,
+ TypeKlassPtr::OBJECT->klass(),
+ offset);
+ }
+
+ ciKlass* klass = tk->klass();
+ if( klass->is_obj_array_klass() ) {
+ ciKlass* k = TypeAryPtr::OOPS->klass();
+ if( !k || !k->is_loaded() ) // Only fails for some -Xcomp runs
+ k = TypeInstPtr::BOTTOM->klass();
+ tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
+ }
+
+ // Check for precise loads from the primary supertype array and force them
+ // to the supertype cache alias index. Check for generic array loads from
+ // the primary supertype array and also force them to the supertype cache
+ // alias index. Since the same load can reach both, we need to merge
+ // these 2 disparate memories into the same alias class. Since the
+ // primary supertype array is read-only, there's no chance of confusion
+ // where we bypass an array load and an array store.
+ uint off2 = offset - Klass::primary_supers_offset_in_bytes();
+ if( offset == Type::OffsetBot ||
+ off2 < Klass::primary_super_limit()*wordSize ) {
+ offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes();
+ tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
+ }
+ }
+
+ // Flatten all Raw pointers together.
+ if (tj->base() == Type::RawPtr)
+ tj = TypeRawPtr::BOTTOM;
+
+ if (tj->base() == Type::AnyPtr)
+ tj = TypePtr::BOTTOM; // An error, which the caller must check for.
+
+ // Flatten all to bottom for now
+ switch( _AliasLevel ) {
+ case 0:
+ tj = TypePtr::BOTTOM;
+ break;
+ case 1: // Flatten to: oop, static, field or array
+ switch (tj->base()) {
+ //case Type::AryPtr: tj = TypeAryPtr::RANGE; break;
+ case Type::RawPtr: tj = TypeRawPtr::BOTTOM; break;
+ case Type::AryPtr: // do not distinguish arrays at all
+ case Type::InstPtr: tj = TypeInstPtr::BOTTOM; break;
+ case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
+ case Type::AnyPtr: tj = TypePtr::BOTTOM; break; // caller checks it
+ default: ShouldNotReachHere();
+ }
+ break;
+ case 2: // No collasping at level 2; keep all splits
+ case 3: // No collasping at level 3; keep all splits
+ break;
+ default:
+ Unimplemented();
+ }
+
+ offset = tj->offset();
+ assert( offset != Type::OffsetTop, "Offset has fallen from constant" );
+
+ assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
+ (offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
+ (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
+ (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
+ (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
+ (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
+ (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr) ,
+ "For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
+ assert( tj->ptr() != TypePtr::TopPTR &&
+ tj->ptr() != TypePtr::AnyNull &&
+ tj->ptr() != TypePtr::Null, "No imprecise addresses" );
+// assert( tj->ptr() != TypePtr::Constant ||
+// tj->base() == Type::RawPtr ||
+// tj->base() == Type::KlassPtr, "No constant oop addresses" );
+
+ return tj;
+}
+
+void Compile::AliasType::Init(int i, const TypePtr* at) {
+ _index = i;
+ _adr_type = at;
+ _field = NULL;
+ _is_rewritable = true; // default
+ const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
+ if (atoop != NULL && atoop->is_instance()) {
+ const TypeOopPtr *gt = atoop->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+ _general_index = Compile::current()->get_alias_index(gt);
+ } else {
+ _general_index = 0;
+ }
+}
+
+//---------------------------------print_on------------------------------------
+#ifndef PRODUCT
+void Compile::AliasType::print_on(outputStream* st) {
+ if (index() < 10)
+ st->print("@ <%d> ", index());
+ else st->print("@ <%d>", index());
+ st->print(is_rewritable() ? " " : " RO");
+ int offset = adr_type()->offset();
+ if (offset == Type::OffsetBot)
+ st->print(" +any");
+ else st->print(" +%-3d", offset);
+ st->print(" in ");
+ adr_type()->dump_on(st);
+ const TypeOopPtr* tjp = adr_type()->isa_oopptr();
+ if (field() != NULL && tjp) {
+ if (tjp->klass() != field()->holder() ||
+ tjp->offset() != field()->offset_in_bytes()) {
+ st->print(" != ");
+ field()->print();
+ st->print(" ***");
+ }
+ }
+}
+
+void print_alias_types() {
+ Compile* C = Compile::current();
+ tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
+ for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
+ C->alias_type(idx)->print_on(tty);
+ tty->cr();
+ }
+}
+#endif
+
+
+//----------------------------probe_alias_cache--------------------------------
+Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
+ intptr_t key = (intptr_t) adr_type;
+ key ^= key >> logAliasCacheSize;
+ return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
+}
+
+
+//-----------------------------grow_alias_types--------------------------------
+void Compile::grow_alias_types() {
+ const int old_ats = _max_alias_types; // how many before?
+ const int new_ats = old_ats; // how many more?
+ const int grow_ats = old_ats+new_ats; // how many now?
+ _max_alias_types = grow_ats;
+ _alias_types = REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
+ AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
+ Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
+ for (int i = 0; i < new_ats; i++) _alias_types[old_ats+i] = &ats[i];
+}
+
+
+//--------------------------------find_alias_type------------------------------
+Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create) {
+ if (_AliasLevel == 0)
+ return alias_type(AliasIdxBot);
+
+ AliasCacheEntry* ace = probe_alias_cache(adr_type);
+ if (ace->_adr_type == adr_type) {
+ return alias_type(ace->_index);
+ }
+
+ // Handle special cases.
+ if (adr_type == NULL) return alias_type(AliasIdxTop);
+ if (adr_type == TypePtr::BOTTOM) return alias_type(AliasIdxBot);
+
+ // Do it the slow way.
+ const TypePtr* flat = flatten_alias_type(adr_type);
+
+#ifdef ASSERT
+ assert(flat == flatten_alias_type(flat), "idempotent");
+ assert(flat != TypePtr::BOTTOM, "cannot alias-analyze an untyped ptr");
+ if (flat->isa_oopptr() && !flat->isa_klassptr()) {
+ const TypeOopPtr* foop = flat->is_oopptr();
+ const TypePtr* xoop = foop->cast_to_exactness(!foop->klass_is_exact())->is_ptr();
+ assert(foop == flatten_alias_type(xoop), "exactness must not affect alias type");
+ }
+ assert(flat == flatten_alias_type(flat), "exact bit doesn't matter");
+#endif
+
+ int idx = AliasIdxTop;
+ for (int i = 0; i < num_alias_types(); i++) {
+ if (alias_type(i)->adr_type() == flat) {
+ idx = i;
+ break;
+ }
+ }
+
+ if (idx == AliasIdxTop) {
+ if (no_create) return NULL;
+ // Grow the array if necessary.
+ if (_num_alias_types == _max_alias_types) grow_alias_types();
+ // Add a new alias type.
+ idx = _num_alias_types++;
+ _alias_types[idx]->Init(idx, flat);
+ if (flat == TypeInstPtr::KLASS) alias_type(idx)->set_rewritable(false);
+ if (flat == TypeAryPtr::RANGE) alias_type(idx)->set_rewritable(false);
+ if (flat->isa_instptr()) {
+ if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
+ && flat->is_instptr()->klass() == env()->Class_klass())
+ alias_type(idx)->set_rewritable(false);
+ }
+ if (flat->isa_klassptr()) {
+ if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc))
+ alias_type(idx)->set_rewritable(false);
+ if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+ alias_type(idx)->set_rewritable(false);
+ if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+ alias_type(idx)->set_rewritable(false);
+ if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc))
+ alias_type(idx)->set_rewritable(false);
+ }
+ // %%% (We would like to finalize JavaThread::threadObj_offset(),
+ // but the base pointer type is not distinctive enough to identify
+ // references into JavaThread.)
+
+ // Check for final instance fields.
+ const TypeInstPtr* tinst = flat->isa_instptr();
+ if (tinst && tinst->offset() >= oopDesc::header_size() * wordSize) {
+ ciInstanceKlass *k = tinst->klass()->as_instance_klass();
+ ciField* field = k->get_field_by_offset(tinst->offset(), false);
+ // Set field() and is_rewritable() attributes.
+ if (field != NULL) alias_type(idx)->set_field(field);
+ }
+ const TypeKlassPtr* tklass = flat->isa_klassptr();
+ // Check for final static fields.
+ if (tklass && tklass->klass()->is_instance_klass()) {
+ ciInstanceKlass *k = tklass->klass()->as_instance_klass();
+ ciField* field = k->get_field_by_offset(tklass->offset(), true);
+ // Set field() and is_rewritable() attributes.
+ if (field != NULL) alias_type(idx)->set_field(field);
+ }
+ }
+
+ // Fill the cache for next time.
+ ace->_adr_type = adr_type;
+ ace->_index = idx;
+ assert(alias_type(adr_type) == alias_type(idx), "type must be installed");
+
+ // Might as well try to fill the cache for the flattened version, too.
+ AliasCacheEntry* face = probe_alias_cache(flat);
+ if (face->_adr_type == NULL) {
+ face->_adr_type = flat;
+ face->_index = idx;
+ assert(alias_type(flat) == alias_type(idx), "flat type must work too");
+ }
+
+ return alias_type(idx);
+}
+
+
+Compile::AliasType* Compile::alias_type(ciField* field) {
+ const TypeOopPtr* t;
+ if (field->is_static())
+ t = TypeKlassPtr::make(field->holder());
+ else
+ t = TypeOopPtr::make_from_klass_raw(field->holder());
+ AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()));
+ assert(field->is_final() == !atp->is_rewritable(), "must get the rewritable bits correct");
+ return atp;
+}
+
+
+//------------------------------have_alias_type--------------------------------
+bool Compile::have_alias_type(const TypePtr* adr_type) {
+ AliasCacheEntry* ace = probe_alias_cache(adr_type);
+ if (ace->_adr_type == adr_type) {
+ return true;
+ }
+
+ // Handle special cases.
+ if (adr_type == NULL) return true;
+ if (adr_type == TypePtr::BOTTOM) return true;
+
+ return find_alias_type(adr_type, true) != NULL;
+}
+
+//-----------------------------must_alias--------------------------------------
+// True if all values of the given address type are in the given alias category.
+bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
+ if (alias_idx == AliasIdxBot) return true; // the universal category
+ if (adr_type == NULL) return true; // NULL serves as TypePtr::TOP
+ if (alias_idx == AliasIdxTop) return false; // the empty category
+ if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins
+
+ // the only remaining possible overlap is identity
+ int adr_idx = get_alias_index(adr_type);
+ assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
+ assert(adr_idx == alias_idx ||
+ (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
+ && adr_type != TypeOopPtr::BOTTOM),
+ "should not be testing for overlap with an unsafe pointer");
+ return adr_idx == alias_idx;
+}
+
+//------------------------------can_alias--------------------------------------
+// True if any values of the given address type are in the given alias category.
+bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
+ if (alias_idx == AliasIdxTop) return false; // the empty category
+ if (adr_type == NULL) return false; // NULL serves as TypePtr::TOP
+ if (alias_idx == AliasIdxBot) return true; // the universal category
+ if (adr_type->base() == Type::AnyPtr) return true; // TypePtr::BOTTOM or its twins
+
+ // the only remaining possible overlap is identity
+ int adr_idx = get_alias_index(adr_type);
+ assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
+ return adr_idx == alias_idx;
+}
+
+
+
+//---------------------------pop_warm_call-------------------------------------
+WarmCallInfo* Compile::pop_warm_call() {
+ WarmCallInfo* wci = _warm_calls;
+ if (wci != NULL) _warm_calls = wci->remove_from(wci);
+ return wci;
+}
+
+//----------------------------Inline_Warm--------------------------------------
+int Compile::Inline_Warm() {
+ // If there is room, try to inline some more warm call sites.
+ // %%% Do a graph index compaction pass when we think we're out of space?
+ if (!InlineWarmCalls) return 0;
+
+ int calls_made_hot = 0;
+ int room_to_grow = NodeCountInliningCutoff - unique();
+ int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
+ int amount_grown = 0;
+ WarmCallInfo* call;
+ while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
+ int est_size = (int)call->size();
+ if (est_size > (room_to_grow - amount_grown)) {
+ // This one won't fit anyway. Get rid of it.
+ call->make_cold();
+ continue;
+ }
+ call->make_hot();
+ calls_made_hot++;
+ amount_grown += est_size;
+ amount_to_grow -= est_size;
+ }
+
+ if (calls_made_hot > 0) set_major_progress();
+ return calls_made_hot;
+}
+
+
+//----------------------------Finish_Warm--------------------------------------
+void Compile::Finish_Warm() {
+ if (!InlineWarmCalls) return;
+ if (failing()) return;
+ if (warm_calls() == NULL) return;
+
+ // Clean up loose ends, if we are out of space for inlining.
+ WarmCallInfo* call;
+ while ((call = pop_warm_call()) != NULL) {
+ call->make_cold();
+ }
+}
+
+
+//------------------------------Optimize---------------------------------------
+// Given a graph, optimize it.
+void Compile::Optimize() {
+ TracePhase t1("optimizer", &_t_optimizer, true);
+
+#ifndef PRODUCT
+ if (env()->break_at_compile()) {
+ BREAKPOINT;
+ }
+
+#endif
+
+ ResourceMark rm;
+ int loop_opts_cnt;
+
+ NOT_PRODUCT( verify_graph_edges(); )
+
+ print_method("Start");
+
+ {
+ // Iterative Global Value Numbering, including ideal transforms
+ // Initialize IterGVN with types and values from parse-time GVN
+ PhaseIterGVN igvn(initial_gvn());
+ {
+ NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
+ igvn.optimize();
+ }
+
+ print_method("Iter GVN 1", 2);
+
+ if (failing()) return;
+
+ // get rid of the connection graph since it's information is not
+ // updated by optimizations
+ _congraph = NULL;
+
+
+ // Loop transforms on the ideal graph. Range Check Elimination,
+ // peeling, unrolling, etc.
+
+ // Set loop opts counter
+ loop_opts_cnt = num_loop_opts();
+ if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
+ {
+ TracePhase t2("idealLoop", &_t_idealLoop, true);
+ PhaseIdealLoop ideal_loop( igvn, NULL, true );
+ loop_opts_cnt--;
+ if (major_progress()) print_method("PhaseIdealLoop 1", 2);
+ if (failing()) return;
+ }
+ // Loop opts pass if partial peeling occurred in previous pass
+ if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
+ TracePhase t3("idealLoop", &_t_idealLoop, true);
+ PhaseIdealLoop ideal_loop( igvn, NULL, false );
+ loop_opts_cnt--;
+ if (major_progress()) print_method("PhaseIdealLoop 2", 2);
+ if (failing()) return;
+ }
+ // Loop opts pass for loop-unrolling before CCP
+ if(major_progress() && (loop_opts_cnt > 0)) {
+ TracePhase t4("idealLoop", &_t_idealLoop, true);
+ PhaseIdealLoop ideal_loop( igvn, NULL, false );
+ loop_opts_cnt--;
+ if (major_progress()) print_method("PhaseIdealLoop 3", 2);
+ }
+ }
+ if (failing()) return;
+
+ // Conditional Constant Propagation;
+ PhaseCCP ccp( &igvn );
+ assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
+ {
+ TracePhase t2("ccp", &_t_ccp, true);
+ ccp.do_transform();
+ }
+ print_method("PhaseCPP 1", 2);
+
+ assert( true, "Break here to ccp.dump_old2new_map()");
+
+ // Iterative Global Value Numbering, including ideal transforms
+ {
+ NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
+ igvn = ccp;
+ igvn.optimize();
+ }
+
+ print_method("Iter GVN 2", 2);
+
+ if (failing()) return;
+
+ // Loop transforms on the ideal graph. Range Check Elimination,
+ // peeling, unrolling, etc.
+ if(loop_opts_cnt > 0) {
+ debug_only( int cnt = 0; );
+ while(major_progress() && (loop_opts_cnt > 0)) {
+ TracePhase t2("idealLoop", &_t_idealLoop, true);
+ assert( cnt++ < 40, "infinite cycle in loop optimization" );
+ PhaseIdealLoop ideal_loop( igvn, NULL, true );
+ loop_opts_cnt--;
+ if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
+ if (failing()) return;
+ }
+ }
+ {
+ NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
+ PhaseMacroExpand mex(igvn);
+ if (mex.expand_macro_nodes()) {
+ assert(failing(), "must bail out w/ explicit message");
+ return;
+ }
+ }
+
+ } // (End scope of igvn; run destructor if necessary for asserts.)
+
+ // A method with only infinite loops has no edges entering loops from root
+ {
+ NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
+ if (final_graph_reshaping()) {
+ assert(failing(), "must bail out w/ explicit message");
+ return;
+ }
+ }
+
+ print_method("Optimize finished", 2);
+}
+
+
+//------------------------------Code_Gen---------------------------------------
+// Given a graph, generate code for it
+void Compile::Code_Gen() {
+ if (failing()) return;
+
+ // Perform instruction selection. You might think we could reclaim Matcher
+ // memory PDQ, but actually the Matcher is used in generating spill code.
+ // Internals of the Matcher (including some VectorSets) must remain live
+ // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
+ // set a bit in reclaimed memory.
+
+ // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
+ // nodes. Mapping is only valid at the root of each matched subtree.
+ NOT_PRODUCT( verify_graph_edges(); )
+
+ Node_List proj_list;
+ Matcher m(proj_list);
+ _matcher = &m;
+ {
+ TracePhase t2("matcher", &_t_matcher, true);
+ m.match();
+ }
+ // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
+ // nodes. Mapping is only valid at the root of each matched subtree.
+ NOT_PRODUCT( verify_graph_edges(); )
+
+ // If you have too many nodes, or if matching has failed, bail out
+ check_node_count(0, "out of nodes matching instructions");
+ if (failing()) return;
+
+ // Build a proper-looking CFG
+ PhaseCFG cfg(node_arena(), root(), m);
+ _cfg = &cfg;
+ {
+ NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
+ cfg.Dominators();
+ if (failing()) return;
+
+ NOT_PRODUCT( verify_graph_edges(); )
+
+ cfg.Estimate_Block_Frequency();
+ cfg.GlobalCodeMotion(m,unique(),proj_list);
+
+ print_method("Global code motion", 2);
+
+ if (failing()) return;
+ NOT_PRODUCT( verify_graph_edges(); )
+
+ debug_only( cfg.verify(); )
+ }
+ NOT_PRODUCT( verify_graph_edges(); )
+
+ PhaseChaitin regalloc(unique(),cfg,m);
+ _regalloc = &regalloc;
+ {
+ TracePhase t2("regalloc", &_t_registerAllocation, true);
+ // Perform any platform dependent preallocation actions. This is used,
+ // for example, to avoid taking an implicit null pointer exception
+ // using the frame pointer on win95.
+ _regalloc->pd_preallocate_hook();
+
+ // Perform register allocation. After Chaitin, use-def chains are
+ // no longer accurate (at spill code) and so must be ignored.
+ // Node->LRG->reg mappings are still accurate.
+ _regalloc->Register_Allocate();
+
+ // Bail out if the allocator builds too many nodes
+ if (failing()) return;
+ }
+
+ // Prior to register allocation we kept empty basic blocks in case the
+ // the allocator needed a place to spill. After register allocation we
+ // are not adding any new instructions. If any basic block is empty, we
+ // can now safely remove it.
+ {
+ NOT_PRODUCT( TracePhase t2("removeEmpty", &_t_removeEmptyBlocks, TimeCompiler); )
+ cfg.RemoveEmpty();
+ }
+
+ // Perform any platform dependent postallocation verifications.
+ debug_only( _regalloc->pd_postallocate_verify_hook(); )
+
+ // Apply peephole optimizations
+ if( OptoPeephole ) {
+ NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
+ PhasePeephole peep( _regalloc, cfg);
+ peep.do_transform();
+ }
+
+ // Convert Nodes to instruction bits in a buffer
+ {
+ // %%%% workspace merge brought two timers together for one job
+ TracePhase t2a("output", &_t_output, true);
+ NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
+ Output();
+ }
+
+ print_method("End");
+
+ // He's dead, Jim.
+ _cfg = (PhaseCFG*)0xdeadbeef;
+ _regalloc = (PhaseChaitin*)0xdeadbeef;
+}
+
+
+//------------------------------dump_asm---------------------------------------
+// Dump formatted assembly
+#ifndef PRODUCT
+void Compile::dump_asm(int *pcs, uint pc_limit) {
+ bool cut_short = false;
+ tty->print_cr("#");
+ tty->print("# "); _tf->dump(); tty->cr();
+ tty->print_cr("#");
+
+ // For all blocks
+ int pc = 0x0; // Program counter
+ char starts_bundle = ' ';
+ _regalloc->dump_frame();
+
+ Node *n = NULL;
+ for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+ if (VMThread::should_terminate()) { cut_short = true; break; }
+ Block *b = _cfg->_blocks[i];
+ if (b->is_connector() && !Verbose) continue;
+ n = b->_nodes[0];
+ if (pcs && n->_idx < pc_limit)
+ tty->print("%3.3x ", pcs[n->_idx]);
+ else
+ tty->print(" ");
+ b->dump_head( &_cfg->_bbs );
+ if (b->is_connector()) {
+ tty->print_cr(" # Empty connector block");
+ } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
+ tty->print_cr(" # Block is sole successor of call");
+ }
+
+ // For all instructions
+ Node *delay = NULL;
+ for( uint j = 0; j<b->_nodes.size(); j++ ) {
+ if (VMThread::should_terminate()) { cut_short = true; break; }
+ n = b->_nodes[j];
+ if (valid_bundle_info(n)) {
+ Bundle *bundle = node_bundling(n);
+ if (bundle->used_in_unconditional_delay()) {
+ delay = n;
+ continue;
+ }
+ if (bundle->starts_bundle())
+ starts_bundle = '+';
+ }
+
+ if( !n->is_Region() && // Dont print in the Assembly
+ !n->is_Phi() && // a few noisely useless nodes
+ !n->is_Proj() &&
+ !n->is_MachTemp() &&
+ !n->is_Catch() && // Would be nice to print exception table targets
+ !n->is_MergeMem() && // Not very interesting
+ !n->is_top() && // Debug info table constants
+ !(n->is_Con() && !n->is_Mach())// Debug info table constants
+ ) {
+ if (pcs && n->_idx < pc_limit)
+ tty->print("%3.3x", pcs[n->_idx]);
+ else
+ tty->print(" ");
+ tty->print(" %c ", starts_bundle);
+ starts_bundle = ' ';
+ tty->print("\t");
+ n->format(_regalloc, tty);
+ tty->cr();
+ }
+
+ // If we have an instruction with a delay slot, and have seen a delay,
+ // then back up and print it
+ if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
+ assert(delay != NULL, "no unconditional delay instruction");
+ if (node_bundling(delay)->starts_bundle())
+ starts_bundle = '+';
+ if (pcs && n->_idx < pc_limit)
+ tty->print("%3.3x", pcs[n->_idx]);
+ else
+ tty->print(" ");
+ tty->print(" %c ", starts_bundle);
+ starts_bundle = ' ';
+ tty->print("\t");
+ delay->format(_regalloc, tty);
+ tty->print_cr("");
+ delay = NULL;
+ }
+
+ // Dump the exception table as well
+ if( n->is_Catch() && (Verbose || WizardMode) ) {
+ // Print the exception table for this offset
+ _handler_table.print_subtable_for(pc);
+ }
+ }
+
+ if (pcs && n->_idx < pc_limit)
+ tty->print_cr("%3.3x", pcs[n->_idx]);
+ else
+ tty->print_cr("");
+
+ assert(cut_short || delay == NULL, "no unconditional delay branch");
+
+ } // End of per-block dump
+ tty->print_cr("");
+
+ if (cut_short) tty->print_cr("*** disassembly is cut short ***");
+}
+#endif
+
+//------------------------------Final_Reshape_Counts---------------------------
+// This class defines counters to help identify when a method
+// may/must be executed using hardware with only 24-bit precision.
+struct Final_Reshape_Counts : public StackObj {
+ int _call_count; // count non-inlined 'common' calls
+ int _float_count; // count float ops requiring 24-bit precision
+ int _double_count; // count double ops requiring more precision
+ int _java_call_count; // count non-inlined 'java' calls
+ VectorSet _visited; // Visitation flags
+ Node_List _tests; // Set of IfNodes & PCTableNodes
+
+ Final_Reshape_Counts() :
+ _call_count(0), _float_count(0), _double_count(0), _java_call_count(0),
+ _visited( Thread::current()->resource_area() ) { }
+
+ void inc_call_count () { _call_count ++; }
+ void inc_float_count () { _float_count ++; }
+ void inc_double_count() { _double_count++; }
+ void inc_java_call_count() { _java_call_count++; }
+
+ int get_call_count () const { return _call_count ; }
+ int get_float_count () const { return _float_count ; }
+ int get_double_count() const { return _double_count; }
+ int get_java_call_count() const { return _java_call_count; }
+};
+
+static bool oop_offset_is_sane(const TypeInstPtr* tp) {
+ ciInstanceKlass *k = tp->klass()->as_instance_klass();
+ // Make sure the offset goes inside the instance layout.
+ return (uint)tp->offset() < (uint)(oopDesc::header_size() + k->nonstatic_field_size())*wordSize;
+ // Note that OffsetBot and OffsetTop are very negative.
+}
+
+//------------------------------final_graph_reshaping_impl----------------------
+// Implement items 1-5 from final_graph_reshaping below.
+static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
+
+ uint nop = n->Opcode();
+
+ // Check for 2-input instruction with "last use" on right input.
+ // Swap to left input. Implements item (2).
+ if( n->req() == 3 && // two-input instruction
+ n->in(1)->outcnt() > 1 && // left use is NOT a last use
+ (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
+ n->in(2)->outcnt() == 1 &&// right use IS a last use
+ !n->in(2)->is_Con() ) { // right use is not a constant
+ // Check for commutative opcode
+ switch( nop ) {
+ case Op_AddI: case Op_AddF: case Op_AddD: case Op_AddL:
+ case Op_MaxI: case Op_MinI:
+ case Op_MulI: case Op_MulF: case Op_MulD: case Op_MulL:
+ case Op_AndL: case Op_XorL: case Op_OrL:
+ case Op_AndI: case Op_XorI: case Op_OrI: {
+ // Move "last use" input to left by swapping inputs
+ n->swap_edges(1, 2);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ // Count FPU ops and common calls, implements item (3)
+ switch( nop ) {
+ // Count all float operations that may use FPU
+ case Op_AddF:
+ case Op_SubF:
+ case Op_MulF:
+ case Op_DivF:
+ case Op_NegF:
+ case Op_ModF:
+ case Op_ConvI2F:
+ case Op_ConF:
+ case Op_CmpF:
+ case Op_CmpF3:
+ // case Op_ConvL2F: // longs are split into 32-bit halves
+ fpu.inc_float_count();
+ break;
+
+ case Op_ConvF2D:
+ case Op_ConvD2F:
+ fpu.inc_float_count();
+ fpu.inc_double_count();
+ break;
+
+ // Count all double operations that may use FPU
+ case Op_AddD:
+ case Op_SubD:
+ case Op_MulD:
+ case Op_DivD:
+ case Op_NegD:
+ case Op_ModD:
+ case Op_ConvI2D:
+ case Op_ConvD2I:
+ // case Op_ConvL2D: // handled by leaf call
+ // case Op_ConvD2L: // handled by leaf call
+ case Op_ConD:
+ case Op_CmpD:
+ case Op_CmpD3:
+ fpu.inc_double_count();
+ break;
+ case Op_Opaque1: // Remove Opaque Nodes before matching
+ case Op_Opaque2: // Remove Opaque Nodes before matching
+ n->replace_by(n->in(1));
+ break;
+ case Op_CallStaticJava:
+ case Op_CallJava:
+ case Op_CallDynamicJava:
+ fpu.inc_java_call_count(); // Count java call site;
+ case Op_CallRuntime:
+ case Op_CallLeaf:
+ case Op_CallLeafNoFP: {
+ assert( n->is_Call(), "" );
+ CallNode *call = n->as_Call();
+ // Count call sites where the FP mode bit would have to be flipped.
+ // Do not count uncommon runtime calls:
+ // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
+ // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
+ if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
+ fpu.inc_call_count(); // Count the call site
+ } else { // See if uncommon argument is shared
+ Node *n = call->in(TypeFunc::Parms);
+ int nop = n->Opcode();
+ // Clone shared simple arguments to uncommon calls, item (1).
+ if( n->outcnt() > 1 &&
+ !n->is_Proj() &&
+ nop != Op_CreateEx &&
+ nop != Op_CheckCastPP &&
+ !n->is_Mem() ) {
+ Node *x = n->clone();
+ call->set_req( TypeFunc::Parms, x );
+ }
+ }
+ break;
+ }
+
+ case Op_StoreD:
+ case Op_LoadD:
+ case Op_LoadD_unaligned:
+ fpu.inc_double_count();
+ goto handle_mem;
+ case Op_StoreF:
+ case Op_LoadF:
+ fpu.inc_float_count();
+ goto handle_mem;
+
+ case Op_StoreB:
+ case Op_StoreC:
+ case Op_StoreCM:
+ case Op_StorePConditional:
+ case Op_StoreI:
+ case Op_StoreL:
+ case Op_StoreLConditional:
+ case Op_CompareAndSwapI:
+ case Op_CompareAndSwapL:
+ case Op_CompareAndSwapP:
+ case Op_StoreP:
+ case Op_LoadB:
+ case Op_LoadC:
+ case Op_LoadI:
+ case Op_LoadKlass:
+ case Op_LoadL:
+ case Op_LoadL_unaligned:
+ case Op_LoadPLocked:
+ case Op_LoadLLocked:
+ case Op_LoadP:
+ case Op_LoadRange:
+ case Op_LoadS: {
+ handle_mem:
+#ifdef ASSERT
+ if( VerifyOptoOopOffsets ) {
+ assert( n->is_Mem(), "" );
+ MemNode *mem = (MemNode*)n;
+ // Check to see if address types have grounded out somehow.
+ const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
+ assert( !tp || oop_offset_is_sane(tp), "" );
+ }
+#endif
+ break;
+ }
+ case Op_If:
+ case Op_CountedLoopEnd:
+ fpu._tests.push(n); // Collect CFG split points
+ break;
+
+ case Op_AddP: { // Assert sane base pointers
+ const Node *addp = n->in(AddPNode::Address);
+ assert( !addp->is_AddP() ||
+ addp->in(AddPNode::Base)->is_top() || // Top OK for allocation
+ addp->in(AddPNode::Base) == n->in(AddPNode::Base),
+ "Base pointers must match" );
+ break;
+ }
+
+ case Op_ModI:
+ if (UseDivMod) {
+ // Check if a%b and a/b both exist
+ Node* d = n->find_similar(Op_DivI);
+ if (d) {
+ // Replace them with a fused divmod if supported
+ Compile* C = Compile::current();
+ if (Matcher::has_match_rule(Op_DivModI)) {
+ DivModINode* divmod = DivModINode::make(C, n);
+ d->replace_by(divmod->div_proj());
+ n->replace_by(divmod->mod_proj());
+ } else {
+ // replace a%b with a-((a/b)*b)
+ Node* mult = new (C, 3) MulINode(d, d->in(2));
+ Node* sub = new (C, 3) SubINode(d->in(1), mult);
+ n->replace_by( sub );
+ }
+ }
+ }
+ break;
+
+ case Op_ModL:
+ if (UseDivMod) {
+ // Check if a%b and a/b both exist
+ Node* d = n->find_similar(Op_DivL);
+ if (d) {
+ // Replace them with a fused divmod if supported
+ Compile* C = Compile::current();
+ if (Matcher::has_match_rule(Op_DivModL)) {
+ DivModLNode* divmod = DivModLNode::make(C, n);
+ d->replace_by(divmod->div_proj());
+ n->replace_by(divmod->mod_proj());
+ } else {
+ // replace a%b with a-((a/b)*b)
+ Node* mult = new (C, 3) MulLNode(d, d->in(2));
+ Node* sub = new (C, 3) SubLNode(d->in(1), mult);
+ n->replace_by( sub );
+ }
+ }
+ }
+ break;
+
+ case Op_Load16B:
+ case Op_Load8B:
+ case Op_Load4B:
+ case Op_Load8S:
+ case Op_Load4S:
+ case Op_Load2S:
+ case Op_Load8C:
+ case Op_Load4C:
+ case Op_Load2C:
+ case Op_Load4I:
+ case Op_Load2I:
+ case Op_Load2L:
+ case Op_Load4F:
+ case Op_Load2F:
+ case Op_Load2D:
+ case Op_Store16B:
+ case Op_Store8B:
+ case Op_Store4B:
+ case Op_Store8C:
+ case Op_Store4C:
+ case Op_Store2C:
+ case Op_Store4I:
+ case Op_Store2I:
+ case Op_Store2L:
+ case Op_Store4F:
+ case Op_Store2F:
+ case Op_Store2D:
+ break;
+
+ case Op_PackB:
+ case Op_PackS:
+ case Op_PackC:
+ case Op_PackI:
+ case Op_PackF:
+ case Op_PackL:
+ case Op_PackD:
+ if (n->req()-1 > 2) {
+ // Replace many operand PackNodes with a binary tree for matching
+ PackNode* p = (PackNode*) n;
+ Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
+ n->replace_by(btp);
+ }
+ break;
+ default:
+ assert( !n->is_Call(), "" );
+ assert( !n->is_Mem(), "" );
+ if( n->is_If() || n->is_PCTable() )
+ fpu._tests.push(n); // Collect CFG split points
+ break;
+ }
+}
+
+//------------------------------final_graph_reshaping_walk---------------------
+// Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
+// requires that the walk visits a node's inputs before visiting the node.
+static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
+ fpu._visited.set(root->_idx); // first, mark node as visited
+ uint cnt = root->req();
+ Node *n = root;
+ uint i = 0;
+ while (true) {
+ if (i < cnt) {
+ // Place all non-visited non-null inputs onto stack
+ Node* m = n->in(i);
+ ++i;
+ if (m != NULL && !fpu._visited.test_set(m->_idx)) {
+ cnt = m->req();
+ nstack.push(n, i); // put on stack parent and next input's index
+ n = m;
+ i = 0;
+ }
+ } else {
+ // Now do post-visit work
+ final_graph_reshaping_impl( n, fpu );
+ if (nstack.is_empty())
+ break; // finished
+ n = nstack.node(); // Get node from stack
+ cnt = n->req();
+ i = nstack.index();
+ nstack.pop(); // Shift to the next node on stack
+ }
+ }
+}
+
+//------------------------------final_graph_reshaping--------------------------
+// Final Graph Reshaping.
+//
+// (1) Clone simple inputs to uncommon calls, so they can be scheduled late
+// and not commoned up and forced early. Must come after regular
+// optimizations to avoid GVN undoing the cloning. Clone constant
+// inputs to Loop Phis; these will be split by the allocator anyways.
+// Remove Opaque nodes.
+// (2) Move last-uses by commutative operations to the left input to encourage
+// Intel update-in-place two-address operations and better register usage
+// on RISCs. Must come after regular optimizations to avoid GVN Ideal
+// calls canonicalizing them back.
+// (3) Count the number of double-precision FP ops, single-precision FP ops
+// and call sites. On Intel, we can get correct rounding either by
+// forcing singles to memory (requires extra stores and loads after each
+// FP bytecode) or we can set a rounding mode bit (requires setting and
+// clearing the mode bit around call sites). The mode bit is only used
+// if the relative frequency of single FP ops to calls is low enough.
+// This is a key transform for SPEC mpeg_audio.
+// (4) Detect infinite loops; blobs of code reachable from above but not
+// below. Several of the Code_Gen algorithms fail on such code shapes,
+// so we simply bail out. Happens a lot in ZKM.jar, but also happens
+// from time to time in other codes (such as -Xcomp finalizer loops, etc).
+// Detection is by looking for IfNodes where only 1 projection is
+// reachable from below or CatchNodes missing some targets.
+// (5) Assert for insane oop offsets in debug mode.
+
+bool Compile::final_graph_reshaping() {
+ // an infinite loop may have been eliminated by the optimizer,
+ // in which case the graph will be empty.
+ if (root()->req() == 1) {
+ record_method_not_compilable("trivial infinite loop");
+ return true;
+ }
+
+ Final_Reshape_Counts fpu;
+
+ // Visit everybody reachable!
+ // Allocate stack of size C->unique()/2 to avoid frequent realloc
+ Node_Stack nstack(unique() >> 1);
+ final_graph_reshaping_walk(nstack, root(), fpu);
+
+ // Check for unreachable (from below) code (i.e., infinite loops).
+ for( uint i = 0; i < fpu._tests.size(); i++ ) {
+ Node *n = fpu._tests[i];
+ assert( n->is_PCTable() || n->is_If(), "either PCTables or IfNodes" );
+ // Get number of CFG targets; 2 for IfNodes or _size for PCTables.
+ // Note that PCTables include exception targets after calls.
+ uint expected_kids = n->is_PCTable() ? n->as_PCTable()->_size : 2;
+ if (n->outcnt() != expected_kids) {
+ // Check for a few special cases. Rethrow Nodes never take the
+ // 'fall-thru' path, so expected kids is 1 less.
+ if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {
+ if (n->in(0)->in(0)->is_Call()) {
+ CallNode *call = n->in(0)->in(0)->as_Call();
+ if (call->entry_point() == OptoRuntime::rethrow_stub()) {
+ expected_kids--; // Rethrow always has 1 less kid
+ } else if (call->req() > TypeFunc::Parms &&
+ call->is_CallDynamicJava()) {
+ // Check for null receiver. In such case, the optimizer has
+ // detected that the virtual call will always result in a null
+ // pointer exception. The fall-through projection of this CatchNode
+ // will not be populated.
+ Node *arg0 = call->in(TypeFunc::Parms);
+ if (arg0->is_Type() &&
+ arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {
+ expected_kids--;
+ }
+ } else if (call->entry_point() == OptoRuntime::new_array_Java() &&
+ call->req() > TypeFunc::Parms+1 &&
+ call->is_CallStaticJava()) {
+ // Check for negative array length. In such case, the optimizer has
+ // detected that the allocation attempt will always result in an
+ // exception. There is no fall-through projection of this CatchNode .
+ Node *arg1 = call->in(TypeFunc::Parms+1);
+ if (arg1->is_Type() &&
+ arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {
+ expected_kids--;
+ }
+ }
+ }
+ }
+ // Recheck with a better notion of 'expected_kids'
+ if (n->outcnt() != expected_kids) {
+ record_method_not_compilable("malformed control flow");
+ return true; // Not all targets reachable!
+ }
+ }
+ // Check that I actually visited all kids. Unreached kids
+ // must be infinite loops.
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
+ if (!fpu._visited.test(n->fast_out(j)->_idx)) {
+ record_method_not_compilable("infinite loop");
+ return true; // Found unvisited kid; must be unreach
+ }
+ }
+
+ // If original bytecodes contained a mixture of floats and doubles
+ // check if the optimizer has made it homogenous, item (3).
+ if( Use24BitFPMode && Use24BitFP &&
+ fpu.get_float_count() > 32 &&
+ fpu.get_double_count() == 0 &&
+ (10 * fpu.get_call_count() < fpu.get_float_count()) ) {
+ set_24_bit_selection_and_mode( false, true );
+ }
+
+ set_has_java_calls(fpu.get_java_call_count() > 0);
+
+ // No infinite loops, no reason to bail out.
+ return false;
+}
+
+//-----------------------------too_many_traps----------------------------------
+// Report if there are too many traps at the current method and bci.
+// Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.
+bool Compile::too_many_traps(ciMethod* method,
+ int bci,
+ Deoptimization::DeoptReason reason) {
+ ciMethodData* md = method->method_data();
+ if (md->is_empty()) {
+ // Assume the trap has not occurred, or that it occurred only
+ // because of a transient condition during start-up in the interpreter.
+ return false;
+ }
+ if (md->has_trap_at(bci, reason) != 0) {
+ // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
+ // Also, if there are multiple reasons, or if there is no per-BCI record,
+ // assume the worst.
+ if (log())
+ log()->elem("observe trap='%s' count='%d'",
+ Deoptimization::trap_reason_name(reason),
+ md->trap_count(reason));
+ return true;
+ } else {
+ // Ignore method/bci and see if there have been too many globally.
+ return too_many_traps(reason, md);
+ }
+}
+
+// Less-accurate variant which does not require a method and bci.
+bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
+ ciMethodData* logmd) {
+ if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
+ // Too many traps globally.
+ // Note that we use cumulative trap_count, not just md->trap_count.
+ if (log()) {
+ int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);
+ log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",
+ Deoptimization::trap_reason_name(reason),
+ mcount, trap_count(reason));
+ }
+ return true;
+ } else {
+ // The coast is clear.
+ return false;
+ }
+}
+
+//--------------------------too_many_recompiles--------------------------------
+// Report if there are too many recompiles at the current method and bci.
+// Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.
+// Is not eager to return true, since this will cause the compiler to use
+// Action_none for a trap point, to avoid too many recompilations.
+bool Compile::too_many_recompiles(ciMethod* method,
+ int bci,
+ Deoptimization::DeoptReason reason) {
+ ciMethodData* md = method->method_data();
+ if (md->is_empty()) {
+ // Assume the trap has not occurred, or that it occurred only
+ // because of a transient condition during start-up in the interpreter.
+ return false;
+ }
+ // Pick a cutoff point well within PerBytecodeRecompilationCutoff.
+ uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;
+ uint m_cutoff = (uint) PerMethodRecompilationCutoff / 2 + 1; // not zero
+ Deoptimization::DeoptReason per_bc_reason
+ = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
+ if ((per_bc_reason == Deoptimization::Reason_none
+ || md->has_trap_at(bci, reason) != 0)
+ // The trap frequency measure we care about is the recompile count:
+ && md->trap_recompiled_at(bci)
+ && md->overflow_recompile_count() >= bc_cutoff) {
+ // Do not emit a trap here if it has already caused recompilations.
+ // Also, if there are multiple reasons, or if there is no per-BCI record,
+ // assume the worst.
+ if (log())
+ log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",
+ Deoptimization::trap_reason_name(reason),
+ md->trap_count(reason),
+ md->overflow_recompile_count());
+ return true;
+ } else if (trap_count(reason) != 0
+ && decompile_count() >= m_cutoff) {
+ // Too many recompiles globally, and we have seen this sort of trap.
+ // Use cumulative decompile_count, not just md->decompile_count.
+ if (log())
+ log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",
+ Deoptimization::trap_reason_name(reason),
+ md->trap_count(reason), trap_count(reason),
+ md->decompile_count(), decompile_count());
+ return true;
+ } else {
+ // The coast is clear.
+ return false;
+ }
+}
+
+
+#ifndef PRODUCT
+//------------------------------verify_graph_edges---------------------------
+// Walk the Graph and verify that there is a one-to-one correspondence
+// between Use-Def edges and Def-Use edges in the graph.
+void Compile::verify_graph_edges(bool no_dead_code) {
+ if (VerifyGraphEdges) {
+ ResourceArea *area = Thread::current()->resource_area();
+ Unique_Node_List visited(area);
+ // Call recursive graph walk to check edges
+ _root->verify_edges(visited);
+ if (no_dead_code) {
+ // Now make sure that no visited node is used by an unvisited node.
+ bool dead_nodes = 0;
+ Unique_Node_List checked(area);
+ while (visited.size() > 0) {
+ Node* n = visited.pop();
+ checked.push(n);
+ for (uint i = 0; i < n->outcnt(); i++) {
+ Node* use = n->raw_out(i);
+ if (checked.member(use)) continue; // already checked
+ if (visited.member(use)) continue; // already in the graph
+ if (use->is_Con()) continue; // a dead ConNode is OK
+ // At this point, we have found a dead node which is DU-reachable.
+ if (dead_nodes++ == 0)
+ tty->print_cr("*** Dead nodes reachable via DU edges:");
+ use->dump(2);
+ tty->print_cr("---");
+ checked.push(use); // No repeats; pretend it is now checked.
+ }
+ }
+ assert(dead_nodes == 0, "using nodes must be reachable from root");
+ }
+ }
+}
+#endif
+
+// The Compile object keeps track of failure reasons separately from the ciEnv.
+// This is required because there is not quite a 1-1 relation between the
+// ciEnv and its compilation task and the Compile object. Note that one
+// ciEnv might use two Compile objects, if C2Compiler::compile_method decides
+// to backtrack and retry without subsuming loads. Other than this backtracking
+// behavior, the Compile's failure reason is quietly copied up to the ciEnv
+// by the logic in C2Compiler.
+void Compile::record_failure(const char* reason) {
+ if (log() != NULL) {
+ log()->elem("failure reason='%s' phase='compile'", reason);
+ }
+ if (_failure_reason == NULL) {
+ // Record the first failure reason.
+ _failure_reason = reason;
+ }
+ _root = NULL; // flush the graph, too
+}
+
+Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
+ : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false)
+{
+ if (dolog) {
+ C = Compile::current();
+ _log = C->log();
+ } else {
+ C = NULL;
+ _log = NULL;
+ }
+ if (_log != NULL) {
+ _log->begin_head("phase name='%s' nodes='%d'", name, C->unique());
+ _log->stamp();
+ _log->end_head();
+ }
+}
+
+Compile::TracePhase::~TracePhase() {
+ if (_log != NULL) {
+ _log->done("phase nodes='%d'", C->unique());
+ }
+}
diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp
new file mode 100644
index 000000000..fcab3a378
--- /dev/null
+++ b/src/share/vm/opto/compile.hpp
@@ -0,0 +1,720 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Block;
+class Bundle;
+class C2Compiler;
+class CallGenerator;
+class ConnectionGraph;
+class InlineTree;
+class Int_Array;
+class Matcher;
+class MachNode;
+class Node;
+class Node_Array;
+class Node_Notes;
+class OptoReg;
+class PhaseCFG;
+class PhaseGVN;
+class PhaseRegAlloc;
+class PhaseCCP;
+class PhaseCCP_DCE;
+class RootNode;
+class relocInfo;
+class Scope;
+class StartNode;
+class SafePointNode;
+class JVMState;
+class TypeData;
+class TypePtr;
+class TypeFunc;
+class Unique_Node_List;
+class nmethod;
+class WarmCallInfo;
+#ifdef ENABLE_ZAP_DEAD_LOCALS
+class MachSafePointNode;
+#endif
+
+//------------------------------Compile----------------------------------------
+// This class defines a top-level Compiler invocation.
+
+class Compile : public Phase {
+ public:
+ // Fixed alias indexes. (See also MergeMemNode.)
+ enum {
+ AliasIdxTop = 1, // pseudo-index, aliases to nothing (used as sentinel value)
+ AliasIdxBot = 2, // pseudo-index, aliases to everything
+ AliasIdxRaw = 3 // hard-wired index for TypeRawPtr::BOTTOM
+ };
+
+ // Variant of TraceTime(NULL, &_t_accumulator, TimeCompiler);
+ // Integrated with logging. If logging is turned on, and dolog is true,
+ // then brackets are put into the log, with time stamps and node counts.
+ // (The time collection itself is always conditionalized on TimeCompiler.)
+ class TracePhase : public TraceTime {
+ private:
+ Compile* C;
+ CompileLog* _log;
+ public:
+ TracePhase(const char* name, elapsedTimer* accumulator, bool dolog);
+ ~TracePhase();
+ };
+
+ // Information per category of alias (memory slice)
+ class AliasType {
+ private:
+ friend class Compile;
+
+ int _index; // unique index, used with MergeMemNode
+ const TypePtr* _adr_type; // normalized address type
+ ciField* _field; // relevant instance field, or null if none
+ bool _is_rewritable; // false if the memory is write-once only
+ int _general_index; // if this is type is an instance, the general
+ // type that this is an instance of
+
+ void Init(int i, const TypePtr* at);
+
+ public:
+ int index() const { return _index; }
+ const TypePtr* adr_type() const { return _adr_type; }
+ ciField* field() const { return _field; }
+ bool is_rewritable() const { return _is_rewritable; }
+ bool is_volatile() const { return (_field ? _field->is_volatile() : false); }
+ int general_index() const { return (_general_index != 0) ? _general_index : _index; }
+
+ void set_rewritable(bool z) { _is_rewritable = z; }
+ void set_field(ciField* f) {
+ assert(!_field,"");
+ _field = f;
+ if (f->is_final()) _is_rewritable = false;
+ }
+
+ void print_on(outputStream* st) PRODUCT_RETURN;
+ };
+
+ enum {
+ logAliasCacheSize = 6,
+ AliasCacheSize = (1<<logAliasCacheSize)
+ };
+ struct AliasCacheEntry { const TypePtr* _adr_type; int _index; }; // simple duple type
+ enum {
+ trapHistLength = methodDataOopDesc::_trap_hist_limit
+ };
+
+ private:
+ // Fixed parameters to this compilation.
+ const int _compile_id;
+ const bool _save_argument_registers; // save/restore arg regs for trampolines
+ const bool _subsume_loads; // Load can be matched as part of a larger op.
+ ciMethod* _method; // The method being compiled.
+ int _entry_bci; // entry bci for osr methods.
+ const TypeFunc* _tf; // My kind of signature
+ InlineTree* _ilt; // Ditto (temporary).
+ address _stub_function; // VM entry for stub being compiled, or NULL
+ const char* _stub_name; // Name of stub or adapter being compiled, or NULL
+ address _stub_entry_point; // Compile code entry for generated stub, or NULL
+
+ // Control of this compilation.
+ int _num_loop_opts; // Number of iterations for doing loop optimiztions
+ int _max_inline_size; // Max inline size for this compilation
+ int _freq_inline_size; // Max hot method inline size for this compilation
+ int _fixed_slots; // count of frame slots not allocated by the register
+ // allocator i.e. locks, original deopt pc, etc.
+ // For deopt
+ int _orig_pc_slot;
+ int _orig_pc_slot_offset_in_bytes;
+
+ int _major_progress; // Count of something big happening
+ bool _deopt_happens; // TRUE if de-optimization CAN happen
+ bool _has_loops; // True if the method _may_ have some loops
+ bool _has_split_ifs; // True if the method _may_ have some split-if
+ bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores.
+ uint _trap_hist[trapHistLength]; // Cumulative traps
+ bool _trap_can_recompile; // Have we emitted a recompiling trap?
+ uint _decompile_count; // Cumulative decompilation counts.
+ bool _do_inlining; // True if we intend to do inlining
+ bool _do_scheduling; // True if we intend to do scheduling
+ bool _do_count_invocations; // True if we generate code to count invocations
+ bool _do_method_data_update; // True if we generate code to update methodDataOops
+ int _AliasLevel; // Locally-adjusted version of AliasLevel flag.
+ bool _print_assembly; // True if we should dump assembly code for this compilation
+#ifndef PRODUCT
+ bool _trace_opto_output;
+#endif
+
+ // Compilation environment.
+ Arena _comp_arena; // Arena with lifetime equivalent to Compile
+ ciEnv* _env; // CI interface
+ CompileLog* _log; // from CompilerThread
+ const char* _failure_reason; // for record_failure/failing pattern
+ GrowableArray<CallGenerator*>* _intrinsics; // List of intrinsics.
+ GrowableArray<Node*>* _macro_nodes; // List of nodes which need to be expanded before matching.
+ ConnectionGraph* _congraph;
+#ifndef PRODUCT
+ IdealGraphPrinter* _printer;
+#endif
+
+ // Node management
+ uint _unique; // Counter for unique Node indices
+ debug_only(static int _debug_idx;) // Monotonic counter (not reset), use -XX:BreakAtNode=<idx>
+ Arena _node_arena; // Arena for new-space Nodes
+ Arena _old_arena; // Arena for old-space Nodes, lifetime during xform
+ RootNode* _root; // Unique root of compilation, or NULL after bail-out.
+ Node* _top; // Unique top node. (Reset by various phases.)
+
+ Node* _immutable_memory; // Initial memory state
+
+ Node* _recent_alloc_obj;
+ Node* _recent_alloc_ctl;
+
+ // Blocked array of debugging and profiling information,
+ // tracked per node.
+ enum { _log2_node_notes_block_size = 8,
+ _node_notes_block_size = (1<<_log2_node_notes_block_size)
+ };
+ GrowableArray<Node_Notes*>* _node_note_array;
+ Node_Notes* _default_node_notes; // default notes for new nodes
+
+ // After parsing and every bulk phase we hang onto the Root instruction.
+ // The RootNode instruction is where the whole program begins. It produces
+ // the initial Control and BOTTOM for everybody else.
+
+ // Type management
+ Arena _Compile_types; // Arena for all types
+ Arena* _type_arena; // Alias for _Compile_types except in Initialize_shared()
+ Dict* _type_dict; // Intern table
+ void* _type_hwm; // Last allocation (see Type::operator new/delete)
+ size_t _type_last_size; // Last allocation size (see Type::operator new/delete)
+ ciMethod* _last_tf_m; // Cache for
+ const TypeFunc* _last_tf; // TypeFunc::make
+ AliasType** _alias_types; // List of alias types seen so far.
+ int _num_alias_types; // Logical length of _alias_types
+ int _max_alias_types; // Physical length of _alias_types
+ AliasCacheEntry _alias_cache[AliasCacheSize]; // Gets aliases w/o data structure walking
+
+ // Parsing, optimization
+ PhaseGVN* _initial_gvn; // Results of parse-time PhaseGVN
+ Unique_Node_List* _for_igvn; // Initial work-list for next round of Iterative GVN
+ WarmCallInfo* _warm_calls; // Sorted work-list for heat-based inlining.
+
+ // Matching, CFG layout, allocation, code generation
+ PhaseCFG* _cfg; // Results of CFG finding
+ bool _select_24_bit_instr; // We selected an instruction with a 24-bit result
+ bool _in_24_bit_fp_mode; // We are emitting instructions with 24-bit results
+ bool _has_java_calls; // True if the method has java calls
+ Matcher* _matcher; // Engine to map ideal to machine instructions
+ PhaseRegAlloc* _regalloc; // Results of register allocation.
+ int _frame_slots; // Size of total frame in stack slots
+ CodeOffsets _code_offsets; // Offsets into the code for various interesting entries
+ RegMask _FIRST_STACK_mask; // All stack slots usable for spills (depends on frame layout)
+ Arena* _indexSet_arena; // control IndexSet allocation within PhaseChaitin
+ void* _indexSet_free_block_list; // free list of IndexSet bit blocks
+
+ uint _node_bundling_limit;
+ Bundle* _node_bundling_base; // Information for instruction bundling
+
+ // Instruction bits passed off to the VM
+ int _method_size; // Size of nmethod code segment in bytes
+ CodeBuffer _code_buffer; // Where the code is assembled
+ int _first_block_size; // Size of unvalidated entry point code / OSR poison code
+ ExceptionHandlerTable _handler_table; // Table of native-code exception handlers
+ ImplicitExceptionTable _inc_table; // Table of implicit null checks in native code
+ OopMapSet* _oop_map_set; // Table of oop maps (one for each safepoint location)
+ static int _CompiledZap_count; // counter compared against CompileZap[First/Last]
+ BufferBlob* _scratch_buffer_blob; // For temporary code buffers.
+ relocInfo* _scratch_locs_memory; // For temporary code buffers.
+
+ public:
+ // Accessors
+
+ // The Compile instance currently active in this (compiler) thread.
+ static Compile* current() {
+ return (Compile*) ciEnv::current()->compiler_data();
+ }
+
+ // ID for this compilation. Useful for setting breakpoints in the debugger.
+ int compile_id() const { return _compile_id; }
+
+ // Does this compilation allow instructions to subsume loads? User
+ // instructions that subsume a load may result in an unschedulable
+ // instruction sequence.
+ bool subsume_loads() const { return _subsume_loads; }
+ bool save_argument_registers() const { return _save_argument_registers; }
+
+
+ // Other fixed compilation parameters.
+ ciMethod* method() const { return _method; }
+ int entry_bci() const { return _entry_bci; }
+ bool is_osr_compilation() const { return _entry_bci != InvocationEntryBci; }
+ bool is_method_compilation() const { return (_method != NULL && !_method->flags().is_native()); }
+ const TypeFunc* tf() const { assert(_tf!=NULL, ""); return _tf; }
+ void init_tf(const TypeFunc* tf) { assert(_tf==NULL, ""); _tf = tf; }
+ InlineTree* ilt() const { return _ilt; }
+ address stub_function() const { return _stub_function; }
+ const char* stub_name() const { return _stub_name; }
+ address stub_entry_point() const { return _stub_entry_point; }
+
+ // Control of this compilation.
+ int fixed_slots() const { assert(_fixed_slots >= 0, ""); return _fixed_slots; }
+ void set_fixed_slots(int n) { _fixed_slots = n; }
+ int major_progress() const { return _major_progress; }
+ void set_major_progress() { _major_progress++; }
+ void clear_major_progress() { _major_progress = 0; }
+ int num_loop_opts() const { return _num_loop_opts; }
+ void set_num_loop_opts(int n) { _num_loop_opts = n; }
+ int max_inline_size() const { return _max_inline_size; }
+ void set_freq_inline_size(int n) { _freq_inline_size = n; }
+ int freq_inline_size() const { return _freq_inline_size; }
+ void set_max_inline_size(int n) { _max_inline_size = n; }
+ bool deopt_happens() const { return _deopt_happens; }
+ bool has_loops() const { return _has_loops; }
+ void set_has_loops(bool z) { _has_loops = z; }
+ bool has_split_ifs() const { return _has_split_ifs; }
+ void set_has_split_ifs(bool z) { _has_split_ifs = z; }
+ bool has_unsafe_access() const { return _has_unsafe_access; }
+ void set_has_unsafe_access(bool z) { _has_unsafe_access = z; }
+ void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; }
+ uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
+ bool trap_can_recompile() const { return _trap_can_recompile; }
+ void set_trap_can_recompile(bool z) { _trap_can_recompile = z; }
+ uint decompile_count() const { return _decompile_count; }
+ void set_decompile_count(uint c) { _decompile_count = c; }
+ bool allow_range_check_smearing() const;
+ bool do_inlining() const { return _do_inlining; }
+ void set_do_inlining(bool z) { _do_inlining = z; }
+ bool do_scheduling() const { return _do_scheduling; }
+ void set_do_scheduling(bool z) { _do_scheduling = z; }
+ bool do_count_invocations() const{ return _do_count_invocations; }
+ void set_do_count_invocations(bool z){ _do_count_invocations = z; }
+ bool do_method_data_update() const { return _do_method_data_update; }
+ void set_do_method_data_update(bool z) { _do_method_data_update = z; }
+ int AliasLevel() const { return _AliasLevel; }
+ bool print_assembly() const { return _print_assembly; }
+ void set_print_assembly(bool z) { _print_assembly = z; }
+ // check the CompilerOracle for special behaviours for this compile
+ bool method_has_option(const char * option) {
+ return method() != NULL && method()->has_option(option);
+ }
+#ifndef PRODUCT
+ bool trace_opto_output() const { return _trace_opto_output; }
+#endif
+
+ void begin_method() {
+#ifndef PRODUCT
+ if (_printer) _printer->begin_method(this);
+#endif
+ }
+ void print_method(const char * name, int level = 1) {
+#ifndef PRODUCT
+ if (_printer) _printer->print_method(this, name, level);
+#endif
+ }
+ void end_method() {
+#ifndef PRODUCT
+ if (_printer) _printer->end_method();
+#endif
+ }
+
+ int macro_count() { return _macro_nodes->length(); }
+ Node* macro_node(int idx) { return _macro_nodes->at(idx); }
+ ConnectionGraph* congraph() { return _congraph;}
+ void add_macro_node(Node * n) {
+ //assert(n->is_macro(), "must be a macro node");
+ assert(!_macro_nodes->contains(n), " duplicate entry in expand list");
+ _macro_nodes->append(n);
+ }
+ void remove_macro_node(Node * n) {
+ // this function may be called twice for a node so check
+ // that the node is in the array before attempting to remove it
+ if (_macro_nodes->contains(n))
+ _macro_nodes->remove(n);
+ }
+
+ // Compilation environment.
+ Arena* comp_arena() { return &_comp_arena; }
+ ciEnv* env() const { return _env; }
+ CompileLog* log() const { return _log; }
+ bool failing() const { return _env->failing() || _failure_reason != NULL; }
+ const char* failure_reason() { return _failure_reason; }
+ bool failure_reason_is(const char* r) { return (r==_failure_reason) || (r!=NULL && _failure_reason!=NULL && strcmp(r, _failure_reason)==0); }
+
+ void record_failure(const char* reason);
+ void record_method_not_compilable(const char* reason, bool all_tiers = false) {
+ // All bailouts cover "all_tiers" when TieredCompilation is off.
+ if (!TieredCompilation) all_tiers = true;
+ env()->record_method_not_compilable(reason, all_tiers);
+ // Record failure reason.
+ record_failure(reason);
+ }
+ void record_method_not_compilable_all_tiers(const char* reason) {
+ record_method_not_compilable(reason, true);
+ }
+ bool check_node_count(uint margin, const char* reason) {
+ if (unique() + margin > (uint)MaxNodeLimit) {
+ record_method_not_compilable(reason);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ // Node management
+ uint unique() const { return _unique; }
+ uint next_unique() { return _unique++; }
+ void set_unique(uint i) { _unique = i; }
+ static int debug_idx() { return debug_only(_debug_idx)+0; }
+ static void set_debug_idx(int i) { debug_only(_debug_idx = i); }
+ Arena* node_arena() { return &_node_arena; }
+ Arena* old_arena() { return &_old_arena; }
+ RootNode* root() const { return _root; }
+ void set_root(RootNode* r) { _root = r; }
+ StartNode* start() const; // (Derived from root.)
+ void init_start(StartNode* s);
+ Node* immutable_memory();
+
+ Node* recent_alloc_ctl() const { return _recent_alloc_ctl; }
+ Node* recent_alloc_obj() const { return _recent_alloc_obj; }
+ void set_recent_alloc(Node* ctl, Node* obj) {
+ _recent_alloc_ctl = ctl;
+ _recent_alloc_obj = obj;
+ }
+
+ // Handy undefined Node
+ Node* top() const { return _top; }
+
+ // these are used by guys who need to know about creation and transformation of top:
+ Node* cached_top_node() { return _top; }
+ void set_cached_top_node(Node* tn);
+
+ GrowableArray<Node_Notes*>* node_note_array() const { return _node_note_array; }
+ void set_node_note_array(GrowableArray<Node_Notes*>* arr) { _node_note_array = arr; }
+ Node_Notes* default_node_notes() const { return _default_node_notes; }
+ void set_default_node_notes(Node_Notes* n) { _default_node_notes = n; }
+
+ Node_Notes* node_notes_at(int idx) {
+ return locate_node_notes(_node_note_array, idx, false);
+ }
+ inline bool set_node_notes_at(int idx, Node_Notes* value);
+
+ // Copy notes from source to dest, if they exist.
+ // Overwrite dest only if source provides something.
+ // Return true if information was moved.
+ bool copy_node_notes_to(Node* dest, Node* source);
+
+ // Workhorse function to sort out the blocked Node_Notes array:
+ inline Node_Notes* locate_node_notes(GrowableArray<Node_Notes*>* arr,
+ int idx, bool can_grow = false);
+
+ void grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by);
+
+ // Type management
+ Arena* type_arena() { return _type_arena; }
+ Dict* type_dict() { return _type_dict; }
+ void* type_hwm() { return _type_hwm; }
+ size_t type_last_size() { return _type_last_size; }
+ int num_alias_types() { return _num_alias_types; }
+
+ void init_type_arena() { _type_arena = &_Compile_types; }
+ void set_type_arena(Arena* a) { _type_arena = a; }
+ void set_type_dict(Dict* d) { _type_dict = d; }
+ void set_type_hwm(void* p) { _type_hwm = p; }
+ void set_type_last_size(size_t sz) { _type_last_size = sz; }
+
+ const TypeFunc* last_tf(ciMethod* m) {
+ return (m == _last_tf_m) ? _last_tf : NULL;
+ }
+ void set_last_tf(ciMethod* m, const TypeFunc* tf) {
+ assert(m != NULL || tf == NULL, "");
+ _last_tf_m = m;
+ _last_tf = tf;
+ }
+
+ AliasType* alias_type(int idx) { assert(idx < num_alias_types(), "oob"); return _alias_types[idx]; }
+ AliasType* alias_type(const TypePtr* adr_type) { return find_alias_type(adr_type, false); }
+ bool have_alias_type(const TypePtr* adr_type);
+ AliasType* alias_type(ciField* field);
+
+ int get_alias_index(const TypePtr* at) { return alias_type(at)->index(); }
+ const TypePtr* get_adr_type(uint aidx) { return alias_type(aidx)->adr_type(); }
+ int get_general_index(uint aidx) { return alias_type(aidx)->general_index(); }
+
+ // Building nodes
+ void rethrow_exceptions(JVMState* jvms);
+ void return_values(JVMState* jvms);
+ JVMState* build_start_state(StartNode* start, const TypeFunc* tf);
+
+ // Decide how to build a call.
+ // The profile factor is a discount to apply to this site's interp. profile.
+ CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor);
+
+ // Report if there were too many traps at a current method and bci.
+ // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
+ // If there is no MDO at all, report no trap unless told to assume it.
+ bool too_many_traps(ciMethod* method, int bci, Deoptimization::DeoptReason reason);
+ // This version, unspecific to a particular bci, asks if
+ // PerMethodTrapLimit was exceeded for all inlined methods seen so far.
+ bool too_many_traps(Deoptimization::DeoptReason reason,
+ // Privately used parameter for logging:
+ ciMethodData* logmd = NULL);
+ // Report if there were too many recompiles at a method and bci.
+ bool too_many_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason);
+
+ // Parsing, optimization
+ PhaseGVN* initial_gvn() { return _initial_gvn; }
+ Unique_Node_List* for_igvn() { return _for_igvn; }
+ inline void record_for_igvn(Node* n); // Body is after class Unique_Node_List.
+ void record_for_escape_analysis(Node* n);
+ void set_initial_gvn(PhaseGVN *gvn) { _initial_gvn = gvn; }
+ void set_for_igvn(Unique_Node_List *for_igvn) { _for_igvn = for_igvn; }
+
+ void identify_useful_nodes(Unique_Node_List &useful);
+ void remove_useless_nodes (Unique_Node_List &useful);
+
+ WarmCallInfo* warm_calls() const { return _warm_calls; }
+ void set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
+ WarmCallInfo* pop_warm_call();
+
+ // Matching, CFG layout, allocation, code generation
+ PhaseCFG* cfg() { return _cfg; }
+ bool select_24_bit_instr() const { return _select_24_bit_instr; }
+ bool in_24_bit_fp_mode() const { return _in_24_bit_fp_mode; }
+ bool has_java_calls() const { return _has_java_calls; }
+ Matcher* matcher() { return _matcher; }
+ PhaseRegAlloc* regalloc() { return _regalloc; }
+ int frame_slots() const { return _frame_slots; }
+ int frame_size_in_words() const; // frame_slots in units of the polymorphic 'words'
+ RegMask& FIRST_STACK_mask() { return _FIRST_STACK_mask; }
+ Arena* indexSet_arena() { return _indexSet_arena; }
+ void* indexSet_free_block_list() { return _indexSet_free_block_list; }
+ uint node_bundling_limit() { return _node_bundling_limit; }
+ Bundle* node_bundling_base() { return _node_bundling_base; }
+ void set_node_bundling_limit(uint n) { _node_bundling_limit = n; }
+ void set_node_bundling_base(Bundle* b) { _node_bundling_base = b; }
+ bool starts_bundle(const Node *n) const;
+ bool need_stack_bang(int frame_size_in_bytes) const;
+ bool need_register_stack_bang() const;
+
+ void set_matcher(Matcher* m) { _matcher = m; }
+//void set_regalloc(PhaseRegAlloc* ra) { _regalloc = ra; }
+ void set_indexSet_arena(Arena* a) { _indexSet_arena = a; }
+ void set_indexSet_free_block_list(void* p) { _indexSet_free_block_list = p; }
+
+ // Remember if this compilation changes hardware mode to 24-bit precision
+ void set_24_bit_selection_and_mode(bool selection, bool mode) {
+ _select_24_bit_instr = selection;
+ _in_24_bit_fp_mode = mode;
+ }
+
+ void set_has_java_calls(bool z) { _has_java_calls = z; }
+
+ // Instruction bits passed off to the VM
+ int code_size() { return _method_size; }
+ CodeBuffer* code_buffer() { return &_code_buffer; }
+ int first_block_size() { return _first_block_size; }
+ void set_frame_complete(int off) { _code_offsets.set_value(CodeOffsets::Frame_Complete, off); }
+ ExceptionHandlerTable* handler_table() { return &_handler_table; }
+ ImplicitExceptionTable* inc_table() { return &_inc_table; }
+ OopMapSet* oop_map_set() { return _oop_map_set; }
+ DebugInformationRecorder* debug_info() { return env()->debug_info(); }
+ Dependencies* dependencies() { return env()->dependencies(); }
+ static int CompiledZap_count() { return _CompiledZap_count; }
+ BufferBlob* scratch_buffer_blob() { return _scratch_buffer_blob; }
+ void init_scratch_buffer_blob();
+ void set_scratch_buffer_blob(BufferBlob* b) { _scratch_buffer_blob = b; }
+ relocInfo* scratch_locs_memory() { return _scratch_locs_memory; }
+ void set_scratch_locs_memory(relocInfo* b) { _scratch_locs_memory = b; }
+
+ // emit to scratch blob, report resulting size
+ uint scratch_emit_size(const Node* n);
+
+ enum ScratchBufferBlob {
+ MAX_inst_size = 1024,
+ MAX_locs_size = 128, // number of relocInfo elements
+ MAX_const_size = 128,
+ MAX_stubs_size = 128
+ };
+
+ // Major entry point. Given a Scope, compile the associated method.
+ // For normal compilations, entry_bci is InvocationEntryBci. For on stack
+ // replacement, entry_bci indicates the bytecode for which to compile a
+ // continuation.
+ Compile(ciEnv* ci_env, C2Compiler* compiler, ciMethod* target,
+ int entry_bci, bool subsume_loads);
+
+ // Second major entry point. From the TypeFunc signature, generate code
+ // to pass arguments from the Java calling convention to the C calling
+ // convention.
+ Compile(ciEnv* ci_env, const TypeFunc *(*gen)(),
+ address stub_function, const char *stub_name,
+ int is_fancy_jump, bool pass_tls,
+ bool save_arg_registers, bool return_pc);
+
+ // From the TypeFunc signature, generate code to pass arguments
+ // from Compiled calling convention to Interpreter's calling convention
+ void Generate_Compiled_To_Interpreter_Graph(const TypeFunc *tf, address interpreter_entry);
+
+ // From the TypeFunc signature, generate code to pass arguments
+ // from Interpreter's calling convention to Compiler's calling convention
+ void Generate_Interpreter_To_Compiled_Graph(const TypeFunc *tf);
+
+ // Are we compiling a method?
+ bool has_method() { return method() != NULL; }
+
+ // Maybe print some information about this compile.
+ void print_compile_messages();
+
+ // Final graph reshaping, a post-pass after the regular optimizer is done.
+ bool final_graph_reshaping();
+
+ // returns true if adr is completely contained in the given alias category
+ bool must_alias(const TypePtr* adr, int alias_idx);
+
+ // returns true if adr overlaps with the given alias category
+ bool can_alias(const TypePtr* adr, int alias_idx);
+
+ // Driver for converting compiler's IR into machine code bits
+ void Output();
+
+ // Accessors for node bundling info.
+ Bundle* node_bundling(const Node *n);
+ bool valid_bundle_info(const Node *n);
+
+ // Schedule and Bundle the instructions
+ void ScheduleAndBundle();
+
+ // Build OopMaps for each GC point
+ void BuildOopMaps();
+ // Append debug info for the node to the array
+ void FillLocArray( int idx, Node *local, GrowableArray<ScopeValue*> *array );
+
+ // Process an OopMap Element while emitting nodes
+ void Process_OopMap_Node(MachNode *mach, int code_offset);
+
+ // Write out basic block data to code buffer
+ void Fill_buffer();
+
+ // Determine which variable sized branches can be shortened
+ void Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size);
+
+ // Compute the size of first NumberOfLoopInstrToAlign instructions
+ // at the head of a loop.
+ void compute_loop_first_inst_sizes();
+
+ // Compute the information for the exception tables
+ void FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels);
+
+ // Stack slots that may be unused by the calling convention but must
+ // otherwise be preserved. On Intel this includes the return address.
+ // On PowerPC it includes the 4 words holding the old TOC & LR glue.
+ uint in_preserve_stack_slots();
+
+ // "Top of Stack" slots that may be unused by the calling convention but must
+ // otherwise be preserved.
+ // On Intel these are not necessary and the value can be zero.
+ // On Sparc this describes the words reserved for storing a register window
+ // when an interrupt occurs.
+ static uint out_preserve_stack_slots();
+
+ // Number of outgoing stack slots killed above the out_preserve_stack_slots
+ // for calls to C. Supports the var-args backing area for register parms.
+ uint varargs_C_out_slots_killed() const;
+
+ // Number of Stack Slots consumed by a synchronization entry
+ int sync_stack_slots() const;
+
+ // Compute the name of old_SP. See <arch>.ad for frame layout.
+ OptoReg::Name compute_old_SP();
+
+#ifdef ENABLE_ZAP_DEAD_LOCALS
+ static bool is_node_getting_a_safepoint(Node*);
+ void Insert_zap_nodes();
+ Node* call_zap_node(MachSafePointNode* n, int block_no);
+#endif
+
+ private:
+ // Phase control:
+ void Init(int aliaslevel); // Prepare for a single compilation
+ int Inline_Warm(); // Find more inlining work.
+ void Finish_Warm(); // Give up on further inlines.
+ void Optimize(); // Given a graph, optimize it
+ void Code_Gen(); // Generate code from a graph
+
+ // Management of the AliasType table.
+ void grow_alias_types();
+ AliasCacheEntry* probe_alias_cache(const TypePtr* adr_type);
+ const TypePtr *flatten_alias_type(const TypePtr* adr_type) const;
+ AliasType* find_alias_type(const TypePtr* adr_type, bool no_create);
+
+ void verify_top(Node*) const PRODUCT_RETURN;
+
+ // Intrinsic setup.
+ void register_library_intrinsics(); // initializer
+ CallGenerator* make_vm_intrinsic(ciMethod* m, bool is_virtual); // constructor
+ int intrinsic_insertion_index(ciMethod* m, bool is_virtual); // helper
+ CallGenerator* find_intrinsic(ciMethod* m, bool is_virtual); // query fn
+ void register_intrinsic(CallGenerator* cg); // update fn
+
+#ifndef PRODUCT
+ static juint _intrinsic_hist_count[vmIntrinsics::ID_LIMIT];
+ static jubyte _intrinsic_hist_flags[vmIntrinsics::ID_LIMIT];
+#endif
+
+ public:
+
+ // Note: Histogram array size is about 1 Kb.
+ enum { // flag bits:
+ _intrinsic_worked = 1, // succeeded at least once
+ _intrinsic_failed = 2, // tried it but it failed
+ _intrinsic_disabled = 4, // was requested but disabled (e.g., -XX:-InlineUnsafeOps)
+ _intrinsic_virtual = 8, // was seen in the virtual form (rare)
+ _intrinsic_both = 16 // was seen in the non-virtual form (usual)
+ };
+ // Update histogram. Return boolean if this is a first-time occurrence.
+ static bool gather_intrinsic_statistics(vmIntrinsics::ID id,
+ bool is_virtual, int flags) PRODUCT_RETURN0;
+ static void print_intrinsic_statistics() PRODUCT_RETURN;
+
+ // Graph verification code
+ // Walk the node list, verifying that there is a one-to-one
+ // correspondence between Use-Def edges and Def-Use edges
+ // The option no_dead_code enables stronger checks that the
+ // graph is strongly connected from root in both directions.
+ void verify_graph_edges(bool no_dead_code = false) PRODUCT_RETURN;
+
+ // Print bytecodes, including the scope inlining tree
+ void print_codes();
+
+ // End-of-run dumps.
+ static void print_statistics() PRODUCT_RETURN;
+
+ // Dump formatted assembly
+ void dump_asm(int *pcs = NULL, uint pc_limit = 0) PRODUCT_RETURN;
+ void dump_pc(int *pcs, int pc_limit, Node *n);
+
+ // Verify ADLC assumptions during startup
+ static void adlc_verification() PRODUCT_RETURN;
+
+ // Definitions of pd methods
+ static void pd_compiler2_init();
+};
diff --git a/src/share/vm/opto/connode.cpp b/src/share/vm/opto/connode.cpp
new file mode 100644
index 000000000..6896f3471
--- /dev/null
+++ b/src/share/vm/opto/connode.cpp
@@ -0,0 +1,1227 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_connode.cpp.incl"
+
+//=============================================================================
+//------------------------------hash-------------------------------------------
+uint ConNode::hash() const {
+ return (uintptr_t)in(TypeFunc::Control) + _type->hash();
+}
+
+//------------------------------make-------------------------------------------
+ConNode *ConNode::make( Compile* C, const Type *t ) {
+ switch( t->basic_type() ) {
+ case T_INT: return new (C, 1) ConINode( t->is_int() );
+ case T_ARRAY: return new (C, 1) ConPNode( t->is_aryptr() );
+ case T_LONG: return new (C, 1) ConLNode( t->is_long() );
+ case T_FLOAT: return new (C, 1) ConFNode( t->is_float_constant() );
+ case T_DOUBLE: return new (C, 1) ConDNode( t->is_double_constant() );
+ case T_VOID: return new (C, 1) ConNode ( Type::TOP );
+ case T_OBJECT: return new (C, 1) ConPNode( t->is_oopptr() );
+ case T_ADDRESS: return new (C, 1) ConPNode( t->is_ptr() );
+ // Expected cases: TypePtr::NULL_PTR, any is_rawptr()
+ // Also seen: AnyPtr(TopPTR *+top); from command line:
+ // r -XX:+PrintOpto -XX:CIStart=285 -XX:+CompileTheWorld -XX:CompileTheWorldStartAt=660
+ // %%%% Stop using TypePtr::NULL_PTR to represent nulls: use either TypeRawPtr::NULL_PTR
+ // or else TypeOopPtr::NULL_PTR. Then set Type::_basic_type[AnyPtr] = T_ILLEGAL
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//=============================================================================
+/*
+The major change is for CMoveP and StrComp. They have related but slightly
+different problems. They both take in TWO oops which are both null-checked
+independently before the using Node. After CCP removes the CastPP's they need
+to pick up the guarding test edge - in this case TWO control edges. I tried
+various solutions, all have problems:
+
+(1) Do nothing. This leads to a bug where we hoist a Load from a CMoveP or a
+StrComp above a guarding null check. I've seen both cases in normal -Xcomp
+testing.
+
+(2) Plug the control edge from 1 of the 2 oops in. Apparent problem here is
+to figure out which test post-dominates. The real problem is that it doesn't
+matter which one you pick. After you pick up, the dominating-test elider in
+IGVN can remove the test and allow you to hoist up to the dominating test on
+the choosen oop bypassing the test on the not-choosen oop. Seen in testing.
+Oops.
+
+(3) Leave the CastPP's in. This makes the graph more accurate in some sense;
+we get to keep around the knowledge that an oop is not-null after some test.
+Alas, the CastPP's interfere with GVN (some values are the regular oop, some
+are the CastPP of the oop, all merge at Phi's which cannot collapse, etc).
+This cost us 10% on SpecJVM, even when I removed some of the more trivial
+cases in the optimizer. Removing more useless Phi's started allowing Loads to
+illegally float above null checks. I gave up on this approach.
+
+(4) Add BOTH control edges to both tests. Alas, too much code knows that
+control edges are in slot-zero ONLY. Many quick asserts fail; no way to do
+this one. Note that I really want to allow the CMoveP to float and add both
+control edges to the dependent Load op - meaning I can select early but I
+cannot Load until I pass both tests.
+
+(5) Do not hoist CMoveP and StrComp. To this end I added the v-call
+depends_only_on_test(). No obvious performance loss on Spec, but we are
+clearly conservative on CMoveP (also so on StrComp but that's unlikely to
+matter ever).
+
+*/
+
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Move constants to the right.
+Node *CMoveNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( in(0) && remove_dead_region(phase, can_reshape) ) return this;
+ assert( !phase->eqv(in(Condition), this) &&
+ !phase->eqv(in(IfFalse), this) &&
+ !phase->eqv(in(IfTrue), this), "dead loop in CMoveNode::Ideal" );
+ if( phase->type(in(Condition)) == Type::TOP )
+ return NULL; // return NULL when Condition is dead
+
+ if( in(IfFalse)->is_Con() && !in(IfTrue)->is_Con() ) {
+ if( in(Condition)->is_Bool() ) {
+ BoolNode* b = in(Condition)->as_Bool();
+ BoolNode* b2 = b->negate(phase);
+ return make( phase->C, in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type );
+ }
+ }
+ return NULL;
+}
+
+//------------------------------is_cmove_id------------------------------------
+// Helper function to check for CMOVE identity. Shared with PhiNode::Identity
+Node *CMoveNode::is_cmove_id( PhaseTransform *phase, Node *cmp, Node *t, Node *f, BoolNode *b ) {
+ // Check for Cmp'ing and CMove'ing same values
+ if( (phase->eqv(cmp->in(1),f) &&
+ phase->eqv(cmp->in(2),t)) ||
+ // Swapped Cmp is OK
+ (phase->eqv(cmp->in(2),f) &&
+ phase->eqv(cmp->in(1),t)) ) {
+ // Check for "(t==f)?t:f;" and replace with "f"
+ if( b->_test._test == BoolTest::eq )
+ return f;
+ // Allow the inverted case as well
+ // Check for "(t!=f)?t:f;" and replace with "t"
+ if( b->_test._test == BoolTest::ne )
+ return t;
+ }
+ return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Conditional-move is an identity if both inputs are the same, or the test
+// true or false.
+Node *CMoveNode::Identity( PhaseTransform *phase ) {
+ if( phase->eqv(in(IfFalse),in(IfTrue)) ) // C-moving identical inputs?
+ return in(IfFalse); // Then it doesn't matter
+ if( phase->type(in(Condition)) == TypeInt::ZERO )
+ return in(IfFalse); // Always pick left(false) input
+ if( phase->type(in(Condition)) == TypeInt::ONE )
+ return in(IfTrue); // Always pick right(true) input
+
+ // Check for CMove'ing a constant after comparing against the constant.
+ // Happens all the time now, since if we compare equality vs a constant in
+ // the parser, we "know" the variable is constant on one path and we force
+ // it. Thus code like "if( x==0 ) {/*EMPTY*/}" ends up inserting a
+ // conditional move: "x = (x==0)?0:x;". Yucko. This fix is slightly more
+ // general in that we don't need constants.
+ if( in(Condition)->is_Bool() ) {
+ BoolNode *b = in(Condition)->as_Bool();
+ Node *cmp = b->in(1);
+ if( cmp->is_Cmp() ) {
+ Node *id = is_cmove_id( phase, cmp, in(IfTrue), in(IfFalse), b );
+ if( id ) return id;
+ }
+ }
+
+ return this;
+}
+
+//------------------------------Value------------------------------------------
+// Result is the meet of inputs
+const Type *CMoveNode::Value( PhaseTransform *phase ) const {
+ if( phase->type(in(Condition)) == Type::TOP )
+ return Type::TOP;
+ return phase->type(in(IfFalse))->meet(phase->type(in(IfTrue)));
+}
+
+//------------------------------make-------------------------------------------
+// Make a correctly-flavored CMove. Since _type is directly determined
+// from the inputs we do not need to specify it here.
+CMoveNode *CMoveNode::make( Compile *C, Node *c, Node *bol, Node *left, Node *right, const Type *t ) {
+ switch( t->basic_type() ) {
+ case T_INT: return new (C, 4) CMoveINode( bol, left, right, t->is_int() );
+ case T_FLOAT: return new (C, 4) CMoveFNode( bol, left, right, t );
+ case T_DOUBLE: return new (C, 4) CMoveDNode( bol, left, right, t );
+ case T_LONG: return new (C, 4) CMoveLNode( bol, left, right, t->is_long() );
+ case T_OBJECT: return new (C, 4) CMovePNode( c, bol, left, right, t->is_oopptr() );
+ case T_ADDRESS: return new (C, 4) CMovePNode( c, bol, left, right, t->is_ptr() );
+ default:
+ ShouldNotReachHere();
+ return NULL;
+ }
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Check for conversions to boolean
+Node *CMoveINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Try generic ideal's first
+ Node *x = CMoveNode::Ideal(phase, can_reshape);
+ if( x ) return x;
+
+ // If zero is on the left (false-case, no-move-case) it must mean another
+ // constant is on the right (otherwise the shared CMove::Ideal code would
+ // have moved the constant to the right). This situation is bad for Intel
+ // and a don't-care for Sparc. It's bad for Intel because the zero has to
+ // be manifested in a register with a XOR which kills flags, which are live
+ // on input to the CMoveI, leading to a situation which causes excessive
+ // spilling on Intel. For Sparc, if the zero in on the left the Sparc will
+ // zero a register via G0 and conditionally-move the other constant. If the
+ // zero is on the right, the Sparc will load the first constant with a
+ // 13-bit set-lo and conditionally move G0. See bug 4677505.
+ if( phase->type(in(IfFalse)) == TypeInt::ZERO && !(phase->type(in(IfTrue)) == TypeInt::ZERO) ) {
+ if( in(Condition)->is_Bool() ) {
+ BoolNode* b = in(Condition)->as_Bool();
+ BoolNode* b2 = b->negate(phase);
+ return make( phase->C, in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type );
+ }
+ }
+
+ // Now check for booleans
+ int flip = 0;
+
+ // Check for picking from zero/one
+ if( phase->type(in(IfFalse)) == TypeInt::ZERO && phase->type(in(IfTrue)) == TypeInt::ONE ) {
+ flip = 1 - flip;
+ } else if( phase->type(in(IfFalse)) == TypeInt::ONE && phase->type(in(IfTrue)) == TypeInt::ZERO ) {
+ } else return NULL;
+
+ // Check for eq/ne test
+ if( !in(1)->is_Bool() ) return NULL;
+ BoolNode *bol = in(1)->as_Bool();
+ if( bol->_test._test == BoolTest::eq ) {
+ } else if( bol->_test._test == BoolTest::ne ) {
+ flip = 1-flip;
+ } else return NULL;
+
+ // Check for vs 0 or 1
+ if( !bol->in(1)->is_Cmp() ) return NULL;
+ const CmpNode *cmp = bol->in(1)->as_Cmp();
+ if( phase->type(cmp->in(2)) == TypeInt::ZERO ) {
+ } else if( phase->type(cmp->in(2)) == TypeInt::ONE ) {
+ // Allow cmp-vs-1 if the other input is bounded by 0-1
+ if( phase->type(cmp->in(1)) != TypeInt::BOOL )
+ return NULL;
+ flip = 1 - flip;
+ } else return NULL;
+
+ // Convert to a bool (flipped)
+ // Build int->bool conversion
+#ifndef PRODUCT
+ if( PrintOpto ) tty->print_cr("CMOV to I2B");
+#endif
+ Node *n = new (phase->C, 2) Conv2BNode( cmp->in(1) );
+ if( flip )
+ n = new (phase->C, 3) XorINode( phase->transform(n), phase->intcon(1) );
+
+ return n;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Check for absolute value
+Node *CMoveFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Try generic ideal's first
+ Node *x = CMoveNode::Ideal(phase, can_reshape);
+ if( x ) return x;
+
+ int cmp_zero_idx = 0; // Index of compare input where to look for zero
+ int phi_x_idx = 0; // Index of phi input where to find naked x
+
+ // Find the Bool
+ if( !in(1)->is_Bool() ) return NULL;
+ BoolNode *bol = in(1)->as_Bool();
+ // Check bool sense
+ switch( bol->_test._test ) {
+ case BoolTest::lt: cmp_zero_idx = 1; phi_x_idx = IfTrue; break;
+ case BoolTest::le: cmp_zero_idx = 2; phi_x_idx = IfFalse; break;
+ case BoolTest::gt: cmp_zero_idx = 2; phi_x_idx = IfTrue; break;
+ case BoolTest::ge: cmp_zero_idx = 1; phi_x_idx = IfFalse; break;
+ default: return NULL; break;
+ }
+
+ // Find zero input of CmpF; the other input is being abs'd
+ Node *cmpf = bol->in(1);
+ if( cmpf->Opcode() != Op_CmpF ) return NULL;
+ Node *X = NULL;
+ bool flip = false;
+ if( phase->type(cmpf->in(cmp_zero_idx)) == TypeF::ZERO ) {
+ X = cmpf->in(3 - cmp_zero_idx);
+ } else if (phase->type(cmpf->in(3 - cmp_zero_idx)) == TypeF::ZERO) {
+ // The test is inverted, we should invert the result...
+ X = cmpf->in(cmp_zero_idx);
+ flip = true;
+ } else {
+ return NULL;
+ }
+
+ // If X is found on the appropriate phi input, find the subtract on the other
+ if( X != in(phi_x_idx) ) return NULL;
+ int phi_sub_idx = phi_x_idx == IfTrue ? IfFalse : IfTrue;
+ Node *sub = in(phi_sub_idx);
+
+ // Allow only SubF(0,X) and fail out for all others; NegF is not OK
+ if( sub->Opcode() != Op_SubF ||
+ sub->in(2) != X ||
+ phase->type(sub->in(1)) != TypeF::ZERO ) return NULL;
+
+ Node *abs = new (phase->C, 2) AbsFNode( X );
+ if( flip )
+ abs = new (phase->C, 3) SubFNode(sub->in(1), phase->transform(abs));
+
+ return abs;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Check for absolute value
+Node *CMoveDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Try generic ideal's first
+ Node *x = CMoveNode::Ideal(phase, can_reshape);
+ if( x ) return x;
+
+ int cmp_zero_idx = 0; // Index of compare input where to look for zero
+ int phi_x_idx = 0; // Index of phi input where to find naked x
+
+ // Find the Bool
+ if( !in(1)->is_Bool() ) return NULL;
+ BoolNode *bol = in(1)->as_Bool();
+ // Check bool sense
+ switch( bol->_test._test ) {
+ case BoolTest::lt: cmp_zero_idx = 1; phi_x_idx = IfTrue; break;
+ case BoolTest::le: cmp_zero_idx = 2; phi_x_idx = IfFalse; break;
+ case BoolTest::gt: cmp_zero_idx = 2; phi_x_idx = IfTrue; break;
+ case BoolTest::ge: cmp_zero_idx = 1; phi_x_idx = IfFalse; break;
+ default: return NULL; break;
+ }
+
+ // Find zero input of CmpD; the other input is being abs'd
+ Node *cmpd = bol->in(1);
+ if( cmpd->Opcode() != Op_CmpD ) return NULL;
+ Node *X = NULL;
+ bool flip = false;
+ if( phase->type(cmpd->in(cmp_zero_idx)) == TypeD::ZERO ) {
+ X = cmpd->in(3 - cmp_zero_idx);
+ } else if (phase->type(cmpd->in(3 - cmp_zero_idx)) == TypeD::ZERO) {
+ // The test is inverted, we should invert the result...
+ X = cmpd->in(cmp_zero_idx);
+ flip = true;
+ } else {
+ return NULL;
+ }
+
+ // If X is found on the appropriate phi input, find the subtract on the other
+ if( X != in(phi_x_idx) ) return NULL;
+ int phi_sub_idx = phi_x_idx == IfTrue ? IfFalse : IfTrue;
+ Node *sub = in(phi_sub_idx);
+
+ // Allow only SubD(0,X) and fail out for all others; NegD is not OK
+ if( sub->Opcode() != Op_SubD ||
+ sub->in(2) != X ||
+ phase->type(sub->in(1)) != TypeD::ZERO ) return NULL;
+
+ Node *abs = new (phase->C, 2) AbsDNode( X );
+ if( flip )
+ abs = new (phase->C, 3) SubDNode(sub->in(1), phase->transform(abs));
+
+ return abs;
+}
+
+
+//=============================================================================
+// If input is already higher or equal to cast type, then this is an identity.
+Node *ConstraintCastNode::Identity( PhaseTransform *phase ) {
+ return phase->type(in(1))->higher_equal(_type) ? in(1) : this;
+}
+
+//------------------------------Value------------------------------------------
+// Take 'join' of input and cast-up type
+const Type *ConstraintCastNode::Value( PhaseTransform *phase ) const {
+ if( in(0) && phase->type(in(0)) == Type::TOP ) return Type::TOP;
+ const Type* ft = phase->type(in(1))->filter(_type);
+
+#ifdef ASSERT
+ // Previous versions of this function had some special case logic,
+ // which is no longer necessary. Make sure of the required effects.
+ switch (Opcode()) {
+ case Op_CastII:
+ {
+ const Type* t1 = phase->type(in(1));
+ if( t1 == Type::TOP ) assert(ft == Type::TOP, "special case #1");
+ const Type* rt = t1->join(_type);
+ if (rt->empty()) assert(ft == Type::TOP, "special case #2");
+ break;
+ }
+ case Op_CastPP:
+ if (phase->type(in(1)) == TypePtr::NULL_PTR &&
+ _type->isa_ptr() && _type->is_ptr()->_ptr == TypePtr::NotNull)
+ assert(ft == Type::TOP, "special case #3");
+ break;
+ }
+#endif //ASSERT
+
+ return ft;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Strip out
+// control copies
+Node *ConstraintCastNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL;
+}
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// Throw away cast after constant propagation
+Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
+ const Type *t = ccp->type(in(1));
+ ccp->hash_delete(this);
+ set_type(t); // Turn into ID function
+ ccp->hash_insert(this);
+ return this;
+}
+
+
+//=============================================================================
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// If not converting int->oop, throw away cast after constant propagation
+Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
+ const Type *t = ccp->type(in(1));
+ if (!t->isa_oop_ptr()) {
+ return NULL; // do not transform raw pointers
+ }
+ return ConstraintCastNode::Ideal_DU_postCCP(ccp);
+}
+
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If input is already higher or equal to cast type, then this is an identity.
+Node *CheckCastPPNode::Identity( PhaseTransform *phase ) {
+ // Toned down to rescue meeting at a Phi 3 different oops all implementing
+ // the same interface. CompileTheWorld starting at 502, kd12rc1.zip.
+ return (phase->type(in(1)) == phase->type(this)) ? in(1) : this;
+}
+
+// Determine whether "n" is a node which can cause an alias of one of its inputs. Node types
+// which can create aliases are: CheckCastPP, Phi, and any store (if there is also a load from
+// the location.)
+// Note: this checks for aliases created in this compilation, not ones which may
+// be potentially created at call sites.
+static bool can_cause_alias(Node *n, PhaseTransform *phase) {
+ bool possible_alias = false;
+
+ if (n->is_Store()) {
+ possible_alias = !n->as_Store()->value_never_loaded(phase);
+ } else {
+ int opc = n->Opcode();
+ possible_alias = n->is_Phi() ||
+ opc == Op_CheckCastPP ||
+ opc == Op_StorePConditional ||
+ opc == Op_CompareAndSwapP;
+ }
+ return possible_alias;
+}
+
+//------------------------------Value------------------------------------------
+// Take 'join' of input and cast-up type, unless working with an Interface
+const Type *CheckCastPPNode::Value( PhaseTransform *phase ) const {
+ if( in(0) && phase->type(in(0)) == Type::TOP ) return Type::TOP;
+
+ const Type *inn = phase->type(in(1));
+ if( inn == Type::TOP ) return Type::TOP; // No information yet
+
+ const TypePtr *in_type = inn->isa_ptr();
+ const TypePtr *my_type = _type->isa_ptr();
+ const Type *result = _type;
+ if( in_type != NULL && my_type != NULL ) {
+ TypePtr::PTR in_ptr = in_type->ptr();
+ if( in_ptr == TypePtr::Null ) {
+ result = in_type;
+ } else if( in_ptr == TypePtr::Constant ) {
+ // Casting a constant oop to an interface?
+ // (i.e., a String to a Comparable?)
+ // Then return the interface.
+ const TypeOopPtr *jptr = my_type->isa_oopptr();
+ assert( jptr, "" );
+ result = (jptr->klass()->is_interface() || !in_type->higher_equal(_type))
+ ? my_type->cast_to_ptr_type( TypePtr::NotNull )
+ : in_type;
+ } else {
+ result = my_type->cast_to_ptr_type( my_type->join_ptr(in_ptr) );
+ }
+ }
+ return result;
+
+ // JOIN NOT DONE HERE BECAUSE OF INTERFACE ISSUES.
+ // FIX THIS (DO THE JOIN) WHEN UNION TYPES APPEAR!
+
+ //
+ // Remove this code after overnight run indicates no performance
+ // loss from not performing JOIN at CheckCastPPNode
+ //
+ // const TypeInstPtr *in_oop = in->isa_instptr();
+ // const TypeInstPtr *my_oop = _type->isa_instptr();
+ // // If either input is an 'interface', return destination type
+ // assert (in_oop == NULL || in_oop->klass() != NULL, "");
+ // assert (my_oop == NULL || my_oop->klass() != NULL, "");
+ // if( (in_oop && in_oop->klass()->klass_part()->is_interface())
+ // ||(my_oop && my_oop->klass()->klass_part()->is_interface()) ) {
+ // TypePtr::PTR in_ptr = in->isa_ptr() ? in->is_ptr()->_ptr : TypePtr::BotPTR;
+ // // Preserve cast away nullness for interfaces
+ // if( in_ptr == TypePtr::NotNull && my_oop && my_oop->_ptr == TypePtr::BotPTR ) {
+ // return my_oop->cast_to_ptr_type(TypePtr::NotNull);
+ // }
+ // return _type;
+ // }
+ //
+ // // Neither the input nor the destination type is an interface,
+ //
+ // // history: JOIN used to cause weird corner case bugs
+ // // return (in == TypeOopPtr::NULL_PTR) ? in : _type;
+ // // JOIN picks up NotNull in common instance-of/check-cast idioms, both oops.
+ // // JOIN does not preserve NotNull in other cases, e.g. RawPtr vs InstPtr
+ // const Type *join = in->join(_type);
+ // // Check if join preserved NotNull'ness for pointers
+ // if( join->isa_ptr() && _type->isa_ptr() ) {
+ // TypePtr::PTR join_ptr = join->is_ptr()->_ptr;
+ // TypePtr::PTR type_ptr = _type->is_ptr()->_ptr;
+ // // If there isn't any NotNull'ness to preserve
+ // // OR if join preserved NotNull'ness then return it
+ // if( type_ptr == TypePtr::BotPTR || type_ptr == TypePtr::Null ||
+ // join_ptr == TypePtr::NotNull || join_ptr == TypePtr::Constant ) {
+ // return join;
+ // }
+ // // ELSE return same old type as before
+ // return _type;
+ // }
+ // // Not joining two pointers
+ // return join;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Strip out
+// control copies
+Node *CheckCastPPNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL;
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *Conv2BNode::Identity( PhaseTransform *phase ) {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return in(1);
+ if( t == TypeInt::ZERO ) return in(1);
+ if( t == TypeInt::ONE ) return in(1);
+ if( t == TypeInt::BOOL ) return in(1);
+ return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *Conv2BNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == TypeInt::ZERO ) return TypeInt::ZERO;
+ if( t == TypePtr::NULL_PTR ) return TypeInt::ZERO;
+ const TypePtr *tp = t->isa_ptr();
+ if( tp != NULL ) {
+ if( tp->ptr() == TypePtr::AnyNull ) return Type::TOP;
+ if( tp->ptr() == TypePtr::Constant) return TypeInt::ONE;
+ if (tp->ptr() == TypePtr::NotNull) return TypeInt::ONE;
+ return TypeInt::BOOL;
+ }
+ if (t->base() != Type::Int) return TypeInt::BOOL;
+ const TypeInt *ti = t->is_int();
+ if( ti->_hi < 0 || ti->_lo > 0 ) return TypeInt::ONE;
+ return TypeInt::BOOL;
+}
+
+
+// The conversions operations are all Alpha sorted. Please keep it that way!
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvD2FNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::DOUBLE ) return Type::FLOAT;
+ const TypeD *td = t->is_double_constant();
+ return TypeF::make( (float)td->getd() );
+}
+
+//------------------------------Identity---------------------------------------
+// Float's can be converted to doubles with no loss of bits. Hence
+// converting a float to a double and back to a float is a NOP.
+Node *ConvD2FNode::Identity(PhaseTransform *phase) {
+ return (in(1)->Opcode() == Op_ConvF2D) ? in(1)->in(1) : this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvD2INode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::DOUBLE ) return TypeInt::INT;
+ const TypeD *td = t->is_double_constant();
+ return TypeInt::make( SharedRuntime::d2i( td->getd() ) );
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvD2INode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( in(1)->Opcode() == Op_RoundDouble )
+ set_req(1,in(1)->in(1));
+ return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Int's can be converted to doubles with no loss of bits. Hence
+// converting an integer to a double and back to an integer is a NOP.
+Node *ConvD2INode::Identity(PhaseTransform *phase) {
+ return (in(1)->Opcode() == Op_ConvI2D) ? in(1)->in(1) : this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvD2LNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::DOUBLE ) return TypeLong::LONG;
+ const TypeD *td = t->is_double_constant();
+ return TypeLong::make( SharedRuntime::d2l( td->getd() ) );
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvD2LNode::Identity(PhaseTransform *phase) {
+ // Remove ConvD2L->ConvL2D->ConvD2L sequences.
+ if( in(1) ->Opcode() == Op_ConvL2D &&
+ in(1)->in(1)->Opcode() == Op_ConvD2L )
+ return in(1)->in(1);
+ return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvD2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( in(1)->Opcode() == Op_RoundDouble )
+ set_req(1,in(1)->in(1));
+ return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvF2DNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::FLOAT ) return Type::DOUBLE;
+ const TypeF *tf = t->is_float_constant();
+#ifndef IA64
+ return TypeD::make( (double)tf->getf() );
+#else
+ float x = tf->getf();
+ return TypeD::make( (x == 0.0f) ? (double)x : (double)x + ia64_double_zero );
+#endif
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvF2INode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::FLOAT ) return TypeInt::INT;
+ const TypeF *tf = t->is_float_constant();
+ return TypeInt::make( SharedRuntime::f2i( tf->getf() ) );
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvF2INode::Identity(PhaseTransform *phase) {
+ // Remove ConvF2I->ConvI2F->ConvF2I sequences.
+ if( in(1) ->Opcode() == Op_ConvI2F &&
+ in(1)->in(1)->Opcode() == Op_ConvF2I )
+ return in(1)->in(1);
+ return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvF2INode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( in(1)->Opcode() == Op_RoundFloat )
+ set_req(1,in(1)->in(1));
+ return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvF2LNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::FLOAT ) return TypeLong::LONG;
+ const TypeF *tf = t->is_float_constant();
+ return TypeLong::make( SharedRuntime::f2l( tf->getf() ) );
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvF2LNode::Identity(PhaseTransform *phase) {
+ // Remove ConvF2L->ConvL2F->ConvF2L sequences.
+ if( in(1) ->Opcode() == Op_ConvL2F &&
+ in(1)->in(1)->Opcode() == Op_ConvF2L )
+ return in(1)->in(1);
+ return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvF2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( in(1)->Opcode() == Op_RoundFloat )
+ set_req(1,in(1)->in(1));
+ return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvI2DNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeInt *ti = t->is_int();
+ if( ti->is_con() ) return TypeD::make( (double)ti->get_con() );
+ return bottom_type();
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvI2FNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeInt *ti = t->is_int();
+ if( ti->is_con() ) return TypeF::make( (float)ti->get_con() );
+ return bottom_type();
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvI2FNode::Identity(PhaseTransform *phase) {
+ // Remove ConvI2F->ConvF2I->ConvI2F sequences.
+ if( in(1) ->Opcode() == Op_ConvF2I &&
+ in(1)->in(1)->Opcode() == Op_ConvI2F )
+ return in(1)->in(1);
+ return this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvI2LNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeInt *ti = t->is_int();
+ const Type* tl = TypeLong::make(ti->_lo, ti->_hi, ti->_widen);
+ // Join my declared type against my incoming type.
+ tl = tl->filter(_type);
+ return tl;
+}
+
+#ifdef _LP64
+static inline bool long_ranges_overlap(jlong lo1, jlong hi1,
+ jlong lo2, jlong hi2) {
+ // Two ranges overlap iff one range's low point falls in the other range.
+ return (lo2 <= lo1 && lo1 <= hi2) || (lo1 <= lo2 && lo2 <= hi1);
+}
+#endif
+
+//------------------------------Ideal------------------------------------------
+Node *ConvI2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const TypeLong* this_type = this->type()->is_long();
+ Node* this_changed = NULL;
+
+ // If _major_progress, then more loop optimizations follow. Do NOT
+ // remove this node's type assertion until no more loop ops can happen.
+ // The progress bit is set in the major loop optimizations THEN comes the
+ // call to IterGVN and any chance of hitting this code. Cf. Opaque1Node.
+ if (can_reshape && !phase->C->major_progress()) {
+ const TypeInt* in_type = phase->type(in(1))->isa_int();
+ if (in_type != NULL && this_type != NULL &&
+ (in_type->_lo != this_type->_lo ||
+ in_type->_hi != this_type->_hi)) {
+ // Although this WORSENS the type, it increases GVN opportunities,
+ // because I2L nodes with the same input will common up, regardless
+ // of slightly differing type assertions. Such slight differences
+ // arise routinely as a result of loop unrolling, so this is a
+ // post-unrolling graph cleanup. Choose a type which depends only
+ // on my input. (Exception: Keep a range assertion of >=0 or <0.)
+ jlong lo1 = this_type->_lo;
+ jlong hi1 = this_type->_hi;
+ int w1 = this_type->_widen;
+ if (lo1 != (jint)lo1 ||
+ hi1 != (jint)hi1 ||
+ lo1 > hi1) {
+ // Overflow leads to wraparound, wraparound leads to range saturation.
+ lo1 = min_jint; hi1 = max_jint;
+ } else if (lo1 >= 0) {
+ // Keep a range assertion of >=0.
+ lo1 = 0; hi1 = max_jint;
+ } else if (hi1 < 0) {
+ // Keep a range assertion of <0.
+ lo1 = min_jint; hi1 = -1;
+ } else {
+ lo1 = min_jint; hi1 = max_jint;
+ }
+ const TypeLong* wtype = TypeLong::make(MAX2((jlong)in_type->_lo, lo1),
+ MIN2((jlong)in_type->_hi, hi1),
+ MAX2((int)in_type->_widen, w1));
+ if (wtype != type()) {
+ set_type(wtype);
+ // Note: this_type still has old type value, for the logic below.
+ this_changed = this;
+ }
+ }
+ }
+
+#ifdef _LP64
+ // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) ,
+ // but only if x and y have subranges that cannot cause 32-bit overflow,
+ // under the assumption that x+y is in my own subrange this->type().
+
+ // This assumption is based on a constraint (i.e., type assertion)
+ // established in Parse::array_addressing or perhaps elsewhere.
+ // This constraint has been adjoined to the "natural" type of
+ // the incoming argument in(0). We know (because of runtime
+ // checks) - that the result value I2L(x+y) is in the joined range.
+ // Hence we can restrict the incoming terms (x, y) to values such
+ // that their sum also lands in that range.
+
+ // This optimization is useful only on 64-bit systems, where we hope
+ // the addition will end up subsumed in an addressing mode.
+ // It is necessary to do this when optimizing an unrolled array
+ // copy loop such as x[i++] = y[i++].
+
+ // On 32-bit systems, it's better to perform as much 32-bit math as
+ // possible before the I2L conversion, because 32-bit math is cheaper.
+ // There's no common reason to "leak" a constant offset through the I2L.
+ // Addressing arithmetic will not absorb it as part of a 64-bit AddL.
+
+ Node* z = in(1);
+ int op = z->Opcode();
+ if (op == Op_AddI || op == Op_SubI) {
+ Node* x = z->in(1);
+ Node* y = z->in(2);
+ assert (x != z && y != z, "dead loop in ConvI2LNode::Ideal");
+ if (phase->type(x) == Type::TOP) return this_changed;
+ if (phase->type(y) == Type::TOP) return this_changed;
+ const TypeInt* tx = phase->type(x)->is_int();
+ const TypeInt* ty = phase->type(y)->is_int();
+ const TypeLong* tz = this_type;
+ jlong xlo = tx->_lo;
+ jlong xhi = tx->_hi;
+ jlong ylo = ty->_lo;
+ jlong yhi = ty->_hi;
+ jlong zlo = tz->_lo;
+ jlong zhi = tz->_hi;
+ jlong vbit = CONST64(1) << BitsPerInt;
+ int widen = MAX2(tx->_widen, ty->_widen);
+ if (op == Op_SubI) {
+ jlong ylo0 = ylo;
+ ylo = -yhi;
+ yhi = -ylo0;
+ }
+ // See if x+y can cause positive overflow into z+2**32
+ if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo+vbit, zhi+vbit)) {
+ return this_changed;
+ }
+ // See if x+y can cause negative overflow into z-2**32
+ if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo-vbit, zhi-vbit)) {
+ return this_changed;
+ }
+ // Now it's always safe to assume x+y does not overflow.
+ // This is true even if some pairs x,y might cause overflow, as long
+ // as that overflow value cannot fall into [zlo,zhi].
+
+ // Confident that the arithmetic is "as if infinite precision",
+ // we can now use z's range to put constraints on those of x and y.
+ // The "natural" range of x [xlo,xhi] can perhaps be narrowed to a
+ // more "restricted" range by intersecting [xlo,xhi] with the
+ // range obtained by subtracting y's range from the asserted range
+ // of the I2L conversion. Here's the interval arithmetic algebra:
+ // x == z-y == [zlo,zhi]-[ylo,yhi] == [zlo,zhi]+[-yhi,-ylo]
+ // => x in [zlo-yhi, zhi-ylo]
+ // => x in [zlo-yhi, zhi-ylo] INTERSECT [xlo,xhi]
+ // => x in [xlo MAX zlo-yhi, xhi MIN zhi-ylo]
+ jlong rxlo = MAX2(xlo, zlo - yhi);
+ jlong rxhi = MIN2(xhi, zhi - ylo);
+ // And similarly, x changing place with y:
+ jlong rylo = MAX2(ylo, zlo - xhi);
+ jlong ryhi = MIN2(yhi, zhi - xlo);
+ if (rxlo > rxhi || rylo > ryhi) {
+ return this_changed; // x or y is dying; don't mess w/ it
+ }
+ if (op == Op_SubI) {
+ jlong rylo0 = rylo;
+ rylo = -ryhi;
+ ryhi = -rylo0;
+ }
+
+ Node* cx = phase->transform( new (phase->C, 2) ConvI2LNode(x, TypeLong::make(rxlo, rxhi, widen)) );
+ Node* cy = phase->transform( new (phase->C, 2) ConvI2LNode(y, TypeLong::make(rylo, ryhi, widen)) );
+ switch (op) {
+ case Op_AddI: return new (phase->C, 3) AddLNode(cx, cy);
+ case Op_SubI: return new (phase->C, 3) SubLNode(cx, cy);
+ default: ShouldNotReachHere();
+ }
+ }
+#endif //_LP64
+
+ return this_changed;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvL2DNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeLong *tl = t->is_long();
+ if( tl->is_con() ) return TypeD::make( (double)tl->get_con() );
+ return bottom_type();
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvL2FNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeLong *tl = t->is_long();
+ if( tl->is_con() ) return TypeF::make( (float)tl->get_con() );
+ return bottom_type();
+}
+
+//=============================================================================
+//----------------------------Identity-----------------------------------------
+Node *ConvL2INode::Identity( PhaseTransform *phase ) {
+ // Convert L2I(I2L(x)) => x
+ if (in(1)->Opcode() == Op_ConvI2L) return in(1)->in(1);
+ return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *ConvL2INode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeLong *tl = t->is_long();
+ if (tl->is_con())
+ // Easy case.
+ return TypeInt::make((jint)tl->get_con());
+ return bottom_type();
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Blow off prior masking to int
+Node *ConvL2INode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node *andl = in(1);
+ uint andl_op = andl->Opcode();
+ if( andl_op == Op_AndL ) {
+ // Blow off prior masking to int
+ if( phase->type(andl->in(2)) == TypeLong::make( 0xFFFFFFFF ) ) {
+ set_req(1,andl->in(1));
+ return this;
+ }
+ }
+
+ // Swap with a prior add: convL2I(addL(x,y)) ==> addI(convL2I(x),convL2I(y))
+ // This replaces an 'AddL' with an 'AddI'.
+ if( andl_op == Op_AddL ) {
+ // Don't do this for nodes which have more than one user since
+ // we'll end up computing the long add anyway.
+ if (andl->outcnt() > 1) return NULL;
+
+ Node* x = andl->in(1);
+ Node* y = andl->in(2);
+ assert( x != andl && y != andl, "dead loop in ConvL2INode::Ideal" );
+ if (phase->type(x) == Type::TOP) return NULL;
+ if (phase->type(y) == Type::TOP) return NULL;
+ Node *add1 = phase->transform(new (phase->C, 2) ConvL2INode(x));
+ Node *add2 = phase->transform(new (phase->C, 2) ConvL2INode(y));
+ return new (phase->C, 3) AddINode(add1,add2);
+ }
+
+ // Fold up with a prior LoadL: LoadL->ConvL2I ==> LoadI
+ // Requires we understand the 'endianess' of Longs.
+ if( andl_op == Op_LoadL ) {
+ Node *adr = andl->in(MemNode::Address);
+ // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
+#ifndef VM_LITTLE_ENDIAN
+ // The transformation can cause problems on BIG_ENDIAN architectures
+ // where the jint is not the same address as the jlong. Specifically, we
+ // will fail to insert an anti-dependence in GCM between the LoadI and a
+ // subsequent StoreL because different memory offsets provoke
+ // flatten_alias_type() into indicating two different types. See bug
+ // 4755222.
+
+ // Node *base = adr->is_AddP() ? adr->in(AddPNode::Base) : adr;
+ // adr = phase->transform( new (phase->C, 4) AddPNode(base,adr,phase->MakeConX(sizeof(jint))));
+ return NULL;
+#else
+ if (phase->C->alias_type(andl->adr_type())->is_volatile()) {
+ // Picking up the low half by itself bypasses the atomic load and we could
+ // end up with more than one non-atomic load. See bugs 4432655 and 4526490.
+ // We could go to the trouble of iterating over andl's output edges and
+ // punting only if there's more than one real use, but we don't bother.
+ return NULL;
+ }
+ return new (phase->C, 3) LoadINode(andl->in(MemNode::Control),andl->in(MemNode::Memory),adr,((LoadLNode*)andl)->raw_adr_type());
+#endif
+ }
+
+ return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *CastX2PNode::Value( PhaseTransform *phase ) const {
+ const Type* t = phase->type(in(1));
+ if (t->base() == Type_X && t->singleton()) {
+ uintptr_t bits = (uintptr_t) t->is_intptr_t()->get_con();
+ if (bits == 0) return TypePtr::NULL_PTR;
+ return TypeRawPtr::make((address) bits);
+ }
+ return CastX2PNode::bottom_type();
+}
+
+//------------------------------Idealize---------------------------------------
+static inline bool fits_in_int(const Type* t, bool but_not_min_int = false) {
+ if (t == Type::TOP) return false;
+ const TypeX* tl = t->is_intptr_t();
+ jint lo = min_jint;
+ jint hi = max_jint;
+ if (but_not_min_int) ++lo; // caller wants to negate the value w/o overflow
+ return (tl->_lo >= lo) && (tl->_hi <= hi);
+}
+
+static inline Node* addP_of_X2P(PhaseGVN *phase,
+ Node* base,
+ Node* dispX,
+ bool negate = false) {
+ if (negate) {
+ dispX = new (phase->C, 3) SubXNode(phase->MakeConX(0), phase->transform(dispX));
+ }
+ return new (phase->C, 4) AddPNode(phase->C->top(),
+ phase->transform(new (phase->C, 2) CastX2PNode(base)),
+ phase->transform(dispX));
+}
+
+Node *CastX2PNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // convert CastX2P(AddX(x, y)) to AddP(CastX2P(x), y) if y fits in an int
+ int op = in(1)->Opcode();
+ Node* x;
+ Node* y;
+ switch (op) {
+ case Op_SubX:
+ x = in(1)->in(1);
+ y = in(1)->in(2);
+ if (fits_in_int(phase->type(y), true)) {
+ return addP_of_X2P(phase, x, y, true);
+ }
+ break;
+ case Op_AddX:
+ x = in(1)->in(1);
+ y = in(1)->in(2);
+ if (fits_in_int(phase->type(y))) {
+ return addP_of_X2P(phase, x, y);
+ }
+ if (fits_in_int(phase->type(x))) {
+ return addP_of_X2P(phase, y, x);
+ }
+ break;
+ }
+ return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+Node *CastX2PNode::Identity( PhaseTransform *phase ) {
+ if (in(1)->Opcode() == Op_CastP2X) return in(1)->in(1);
+ return this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *CastP2XNode::Value( PhaseTransform *phase ) const {
+ const Type* t = phase->type(in(1));
+ if (t->base() == Type::RawPtr && t->singleton()) {
+ uintptr_t bits = (uintptr_t) t->is_rawptr()->get_con();
+ return TypeX::make(bits);
+ }
+ return CastP2XNode::bottom_type();
+}
+
+Node *CastP2XNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL;
+}
+
+//------------------------------Identity---------------------------------------
+Node *CastP2XNode::Identity( PhaseTransform *phase ) {
+ if (in(1)->Opcode() == Op_CastX2P) return in(1)->in(1);
+ return this;
+}
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// Remove redundant roundings
+Node *RoundFloatNode::Identity( PhaseTransform *phase ) {
+ assert(Matcher::strict_fp_requires_explicit_rounding, "should only generate for Intel");
+ // Do not round constants
+ if (phase->type(in(1))->base() == Type::FloatCon) return in(1);
+ int op = in(1)->Opcode();
+ // Redundant rounding
+ if( op == Op_RoundFloat ) return in(1);
+ // Already rounded
+ if( op == Op_Parm ) return in(1);
+ if( op == Op_LoadF ) return in(1);
+ return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *RoundFloatNode::Value( PhaseTransform *phase ) const {
+ return phase->type( in(1) );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// Remove redundant roundings. Incoming arguments are already rounded.
+Node *RoundDoubleNode::Identity( PhaseTransform *phase ) {
+ assert(Matcher::strict_fp_requires_explicit_rounding, "should only generate for Intel");
+ // Do not round constants
+ if (phase->type(in(1))->base() == Type::DoubleCon) return in(1);
+ int op = in(1)->Opcode();
+ // Redundant rounding
+ if( op == Op_RoundDouble ) return in(1);
+ // Already rounded
+ if( op == Op_Parm ) return in(1);
+ if( op == Op_LoadD ) return in(1);
+ if( op == Op_ConvF2D ) return in(1);
+ if( op == Op_ConvI2D ) return in(1);
+ return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *RoundDoubleNode::Value( PhaseTransform *phase ) const {
+ return phase->type( in(1) );
+}
+
+
+//=============================================================================
+// Do not allow value-numbering
+uint Opaque1Node::hash() const { return NO_HASH; }
+uint Opaque1Node::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+//------------------------------Identity---------------------------------------
+// If _major_progress, then more loop optimizations follow. Do NOT remove
+// the opaque Node until no more loop ops can happen. Note the timing of
+// _major_progress; it's set in the major loop optimizations THEN comes the
+// call to IterGVN and any chance of hitting this code. Hence there's no
+// phase-ordering problem with stripping Opaque1 in IGVN followed by some
+// more loop optimizations that require it.
+Node *Opaque1Node::Identity( PhaseTransform *phase ) {
+ return phase->C->major_progress() ? this : in(1);
+}
+
+//=============================================================================
+// A node to prevent unwanted optimizations. Allows constant folding. Stops
+// value-numbering, most Ideal calls or Identity functions. This Node is
+// specifically designed to prevent the pre-increment value of a loop trip
+// counter from being live out of the bottom of the loop (hence causing the
+// pre- and post-increment values both being live and thus requiring an extra
+// temp register and an extra move). If we "accidentally" optimize through
+// this kind of a Node, we'll get slightly pessimal, but correct, code. Thus
+// it's OK to be slightly sloppy on optimizations here.
+
+// Do not allow value-numbering
+uint Opaque2Node::hash() const { return NO_HASH; }
+uint Opaque2Node::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+
+//------------------------------Value------------------------------------------
+const Type *MoveL2DNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeLong *tl = t->is_long();
+ if( !tl->is_con() ) return bottom_type();
+ JavaValue v;
+ v.set_jlong(tl->get_con());
+ return TypeD::make( v.get_jdouble() );
+}
+
+//------------------------------Value------------------------------------------
+const Type *MoveI2FNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ const TypeInt *ti = t->is_int();
+ if( !ti->is_con() ) return bottom_type();
+ JavaValue v;
+ v.set_jint(ti->get_con());
+ return TypeF::make( v.get_jfloat() );
+}
+
+//------------------------------Value------------------------------------------
+const Type *MoveF2INode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::FLOAT ) return TypeInt::INT;
+ const TypeF *tf = t->is_float_constant();
+ JavaValue v;
+ v.set_jfloat(tf->getf());
+ return TypeInt::make( v.get_jint() );
+}
+
+//------------------------------Value------------------------------------------
+const Type *MoveD2LNode::Value( PhaseTransform *phase ) const {
+ const Type *t = phase->type( in(1) );
+ if( t == Type::TOP ) return Type::TOP;
+ if( t == Type::DOUBLE ) return TypeLong::LONG;
+ const TypeD *td = t->is_double_constant();
+ JavaValue v;
+ v.set_jdouble(td->getd());
+ return TypeLong::make( v.get_jlong() );
+}
diff --git a/src/share/vm/opto/connode.hpp b/src/share/vm/opto/connode.hpp
new file mode 100644
index 000000000..1c1b96a19
--- /dev/null
+++ b/src/share/vm/opto/connode.hpp
@@ -0,0 +1,578 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class PhaseTransform;
+class MachNode;
+
+//------------------------------ConNode----------------------------------------
+// Simple constants
+class ConNode : public TypeNode {
+public:
+ ConNode( const Type *t ) : TypeNode(t,1) {
+ init_req(0, (Node*)Compile::current()->root());
+ init_flags(Flag_is_Con);
+ }
+ virtual int Opcode() const;
+ virtual uint hash() const;
+ virtual const RegMask &out_RegMask() const { return RegMask::Empty; }
+ virtual const RegMask &in_RegMask(uint) const { return RegMask::Empty; }
+
+ // Polymorphic factory method:
+ static ConNode* make( Compile* C, const Type *t );
+};
+
+//------------------------------ConINode---------------------------------------
+// Simple integer constants
+class ConINode : public ConNode {
+public:
+ ConINode( const TypeInt *t ) : ConNode(t) {}
+ virtual int Opcode() const;
+
+ // Factory method:
+ static ConINode* make( Compile* C, int con ) {
+ return new (C, 1) ConINode( TypeInt::make(con) );
+ }
+
+};
+
+//------------------------------ConPNode---------------------------------------
+// Simple pointer constants
+class ConPNode : public ConNode {
+public:
+ ConPNode( const TypePtr *t ) : ConNode(t) {}
+ virtual int Opcode() const;
+
+ // Factory methods:
+ static ConPNode* make( Compile *C ,address con ) {
+ if (con == NULL)
+ return new (C, 1) ConPNode( TypePtr::NULL_PTR ) ;
+ else
+ return new (C, 1) ConPNode( TypeRawPtr::make(con) );
+ }
+
+ static ConPNode* make( Compile *C, ciObject* con ) {
+ return new (C, 1) ConPNode( TypeOopPtr::make_from_constant(con) );
+ }
+
+};
+
+
+//------------------------------ConLNode---------------------------------------
+// Simple long constants
+class ConLNode : public ConNode {
+public:
+ ConLNode( const TypeLong *t ) : ConNode(t) {}
+ virtual int Opcode() const;
+
+ // Factory method:
+ static ConLNode* make( Compile *C ,jlong con ) {
+ return new (C, 1) ConLNode( TypeLong::make(con) );
+ }
+
+};
+
+//------------------------------ConFNode---------------------------------------
+// Simple float constants
+class ConFNode : public ConNode {
+public:
+ ConFNode( const TypeF *t ) : ConNode(t) {}
+ virtual int Opcode() const;
+
+ // Factory method:
+ static ConFNode* make( Compile *C, float con ) {
+ return new (C, 1) ConFNode( TypeF::make(con) );
+ }
+
+};
+
+//------------------------------ConDNode---------------------------------------
+// Simple double constants
+class ConDNode : public ConNode {
+public:
+ ConDNode( const TypeD *t ) : ConNode(t) {}
+ virtual int Opcode() const;
+
+ // Factory method:
+ static ConDNode* make( Compile *C, double con ) {
+ return new (C, 1) ConDNode( TypeD::make(con) );
+ }
+
+};
+
+//------------------------------BinaryNode-------------------------------------
+// Place holder for the 2 conditional inputs to a CMove. CMove needs 4
+// inputs: the Bool (for the lt/gt/eq/ne bits), the flags (result of some
+// compare), and the 2 values to select between. The Matcher requires a
+// binary tree so we break it down like this:
+// (CMove (Binary bol cmp) (Binary src1 src2))
+class BinaryNode : public Node {
+public:
+ BinaryNode( Node *n1, Node *n2 ) : Node(0,n1,n2) { }
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return 0; }
+};
+
+//------------------------------CMoveNode--------------------------------------
+// Conditional move
+class CMoveNode : public TypeNode {
+public:
+ enum { Control, // When is it safe to do this cmove?
+ Condition, // Condition controlling the cmove
+ IfFalse, // Value if condition is false
+ IfTrue }; // Value if condition is true
+ CMoveNode( Node *bol, Node *left, Node *right, const Type *t ) : TypeNode(t,4)
+ {
+ init_class_id(Class_CMove);
+ // all inputs are nullified in Node::Node(int)
+ // init_req(Control,NULL);
+ init_req(Condition,bol);
+ init_req(IfFalse,left);
+ init_req(IfTrue,right);
+ }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ static CMoveNode *make( Compile *C, Node *c, Node *bol, Node *left, Node *right, const Type *t );
+ // Helper function to spot cmove graph shapes
+ static Node *is_cmove_id( PhaseTransform *phase, Node *cmp, Node *t, Node *f, BoolNode *b );
+};
+
+//------------------------------CMoveDNode-------------------------------------
+class CMoveDNode : public CMoveNode {
+public:
+ CMoveDNode( Node *bol, Node *left, Node *right, const Type* t) : CMoveNode(bol,left,right,t){}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CMoveFNode-------------------------------------
+class CMoveFNode : public CMoveNode {
+public:
+ CMoveFNode( Node *bol, Node *left, Node *right, const Type* t ) : CMoveNode(bol,left,right,t) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CMoveINode-------------------------------------
+class CMoveINode : public CMoveNode {
+public:
+ CMoveINode( Node *bol, Node *left, Node *right, const TypeInt *ti ) : CMoveNode(bol,left,right,ti){}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CMoveLNode-------------------------------------
+class CMoveLNode : public CMoveNode {
+public:
+ CMoveLNode(Node *bol, Node *left, Node *right, const TypeLong *tl ) : CMoveNode(bol,left,right,tl){}
+ virtual int Opcode() const;
+};
+
+//------------------------------CMovePNode-------------------------------------
+class CMovePNode : public CMoveNode {
+public:
+ CMovePNode( Node *c, Node *bol, Node *left, Node *right, const TypePtr* t ) : CMoveNode(bol,left,right,t) { init_req(Control,c); }
+ virtual int Opcode() const;
+};
+
+//------------------------------ConstraintCastNode-------------------------------------
+// cast to a different range
+class ConstraintCastNode: public TypeNode {
+public:
+ ConstraintCastNode (Node *n, const Type *t ): TypeNode(t,2) {
+ init_class_id(Class_ConstraintCast);
+ init_req(1, n);
+ }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const = 0;
+ virtual Node *Ideal_DU_postCCP( PhaseCCP * );
+};
+
+//------------------------------CastIINode-------------------------------------
+// cast integer to integer (different range)
+class CastIINode: public ConstraintCastNode {
+public:
+ CastIINode (Node *n, const Type *t ): ConstraintCastNode(n,t) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------CastPPNode-------------------------------------
+// cast pointer to pointer (different type)
+class CastPPNode: public ConstraintCastNode {
+public:
+ CastPPNode (Node *n, const Type *t ): ConstraintCastNode(n, t) {
+ // Only CastPP is safe. CastII can cause optimizer loops.
+ init_flags(Flag_is_dead_loop_safe);
+ }
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegP; }
+ virtual Node *Ideal_DU_postCCP( PhaseCCP * );
+};
+
+//------------------------------CheckCastPPNode--------------------------------
+// for _checkcast, cast pointer to pointer (different type), without JOIN,
+class CheckCastPPNode: public TypeNode {
+public:
+ CheckCastPPNode( Node *c, Node *n, const Type *t ) : TypeNode(t,2) {
+ init_class_id(Class_CheckCastPP);
+ init_flags(Flag_is_dead_loop_safe);
+ init_req(0, c);
+ init_req(1, n);
+ }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegP; }
+ // No longer remove CheckCast after CCP as it gives me a place to hang
+ // the proper address type - which is required to compute anti-deps.
+ //virtual Node *Ideal_DU_postCCP( PhaseCCP * );
+};
+
+//------------------------------Conv2BNode-------------------------------------
+// Convert int/pointer to a Boolean. Map zero to zero, all else to 1.
+class Conv2BNode : public Node {
+public:
+ Conv2BNode( Node *i ) : Node(0,i) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+// The conversions operations are all Alpha sorted. Please keep it that way!
+//------------------------------ConvD2FNode------------------------------------
+// Convert double to float
+class ConvD2FNode : public Node {
+public:
+ ConvD2FNode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ConvD2INode------------------------------------
+// Convert Double to Integer
+class ConvD2INode : public Node {
+public:
+ ConvD2INode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ConvD2LNode------------------------------------
+// Convert Double to Long
+class ConvD2LNode : public Node {
+public:
+ ConvD2LNode( Node *dbl ) : Node(0,dbl) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ConvF2DNode------------------------------------
+// Convert Float to a Double.
+class ConvF2DNode : public Node {
+public:
+ ConvF2DNode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ConvF2INode------------------------------------
+// Convert float to integer
+class ConvF2INode : public Node {
+public:
+ ConvF2INode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ConvF2LNode------------------------------------
+// Convert float to long
+class ConvF2LNode : public Node {
+public:
+ ConvF2LNode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ConvI2DNode------------------------------------
+// Convert Integer to Double
+class ConvI2DNode : public Node {
+public:
+ ConvI2DNode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ConvI2FNode------------------------------------
+// Convert Integer to Float
+class ConvI2FNode : public Node {
+public:
+ ConvI2FNode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ConvI2LNode------------------------------------
+// Convert integer to long
+class ConvI2LNode : public TypeNode {
+public:
+ ConvI2LNode(Node *in1, const TypeLong* t = TypeLong::INT)
+ : TypeNode(t, 2)
+ { init_req(1, in1); }
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ConvL2DNode------------------------------------
+// Convert Long to Double
+class ConvL2DNode : public Node {
+public:
+ ConvL2DNode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ConvL2FNode------------------------------------
+// Convert Long to Float
+class ConvL2FNode : public Node {
+public:
+ ConvL2FNode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ConvL2INode------------------------------------
+// Convert long to integer
+class ConvL2INode : public Node {
+public:
+ ConvL2INode( Node *in1 ) : Node(0,in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------CastX2PNode-------------------------------------
+// convert a machine-pointer-sized integer to a raw pointer
+class CastX2PNode : public Node {
+public:
+ CastX2PNode( Node *n ) : Node(NULL, n) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegP; }
+ virtual const Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
+};
+
+//------------------------------CastP2XNode-------------------------------------
+// Used in both 32-bit and 64-bit land.
+// Used for card-marks and unsafe pointer math.
+class CastP2XNode : public Node {
+public:
+ CastP2XNode( Node *ctrl, Node *n ) : Node(ctrl, n) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual uint ideal_reg() const { return Op_RegX; }
+ virtual const Type *bottom_type() const { return TypeX_X; }
+ // Return false to keep node from moving away from an associated card mark.
+ virtual bool depends_only_on_test() const { return false; }
+};
+
+//------------------------------MemMoveNode------------------------------------
+// Memory to memory move. Inserted very late, after allocation.
+class MemMoveNode : public Node {
+public:
+ MemMoveNode( Node *dst, Node *src ) : Node(0,dst,src) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------ThreadLocalNode--------------------------------
+// Ideal Node which returns the base of ThreadLocalStorage.
+class ThreadLocalNode : public Node {
+public:
+ ThreadLocalNode( ) : Node((Node*)Compile::current()->root()) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeRawPtr::BOTTOM;}
+ virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+//------------------------------LoadReturnPCNode-------------------------------
+class LoadReturnPCNode: public Node {
+public:
+ LoadReturnPCNode(Node *c) : Node(c) { }
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+
+//-----------------------------RoundFloatNode----------------------------------
+class RoundFloatNode: public Node {
+public:
+ RoundFloatNode(Node* c, Node *in1): Node(c, in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+
+//-----------------------------RoundDoubleNode---------------------------------
+class RoundDoubleNode: public Node {
+public:
+ RoundDoubleNode(Node* c, Node *in1): Node(c, in1) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------Opaque1Node------------------------------------
+// A node to prevent unwanted optimizations. Allows constant folding.
+// Stops value-numbering, Ideal calls or Identity functions.
+class Opaque1Node : public Node {
+ virtual uint hash() const ; // { return NO_HASH; }
+ virtual uint cmp( const Node &n ) const;
+public:
+ Opaque1Node( Node *n ) : Node(0,n) {}
+ // Special version for the pre-loop to hold the original loop limit
+ // which is consumed by range check elimination.
+ Opaque1Node( Node *n, Node* orig_limit ) : Node(0,n,orig_limit) {}
+ Node* original_loop_limit() { return req()==3 ? in(2) : NULL; }
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual Node *Identity( PhaseTransform *phase );
+};
+
+//------------------------------Opaque2Node------------------------------------
+// A node to prevent unwanted optimizations. Allows constant folding. Stops
+// value-numbering, most Ideal calls or Identity functions. This Node is
+// specifically designed to prevent the pre-increment value of a loop trip
+// counter from being live out of the bottom of the loop (hence causing the
+// pre- and post-increment values both being live and thus requiring an extra
+// temp register and an extra move). If we "accidentally" optimize through
+// this kind of a Node, we'll get slightly pessimal, but correct, code. Thus
+// it's OK to be slightly sloppy on optimizations here.
+class Opaque2Node : public Node {
+ virtual uint hash() const ; // { return NO_HASH; }
+ virtual uint cmp( const Node &n ) const;
+public:
+ Opaque2Node( Node *n ) : Node(0,n) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+};
+
+//----------------------PartialSubtypeCheckNode--------------------------------
+// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass
+// array for an instance of the superklass. Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()). Return
+// not zero for a miss or zero for a hit.
+class PartialSubtypeCheckNode : public Node {
+public:
+ PartialSubtypeCheckNode(Node* c, Node* sub, Node* super) : Node(c,sub,super) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
+ virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+//
+class MoveI2FNode : public Node {
+ public:
+ MoveI2FNode( Node *value ) : Node(0,value) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+ virtual const Type* Value( PhaseTransform *phase ) const;
+};
+
+class MoveL2DNode : public Node {
+ public:
+ MoveL2DNode( Node *value ) : Node(0,value) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type* Value( PhaseTransform *phase ) const;
+};
+
+class MoveF2INode : public Node {
+ public:
+ MoveF2INode( Node *value ) : Node(0,value) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual const Type* Value( PhaseTransform *phase ) const;
+};
+
+class MoveD2LNode : public Node {
+ public:
+ MoveD2LNode( Node *value ) : Node(0,value) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+ virtual const Type* Value( PhaseTransform *phase ) const;
+};
diff --git a/src/share/vm/opto/divnode.cpp b/src/share/vm/opto/divnode.cpp
new file mode 100644
index 000000000..5443495ff
--- /dev/null
+++ b/src/share/vm/opto/divnode.cpp
@@ -0,0 +1,1031 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_divnode.cpp.incl"
+#include <math.h>
+
+// Implement the integer constant divide -> long multiply transform found in
+// "Division by Invariant Integers using Multiplication"
+// by Granlund and Montgomery
+static Node *transform_int_divide_to_long_multiply( PhaseGVN *phase, Node *dividend, int divisor ) {
+
+ // Check for invalid divisors
+ assert( divisor != 0 && divisor != min_jint && divisor != 1,
+ "bad divisor for transforming to long multiply" );
+
+ // Compute l = ceiling(log2(d))
+ // presumes d is more likely small
+ bool d_pos = divisor >= 0;
+ int d = d_pos ? divisor : -divisor;
+ unsigned ud = (unsigned)d;
+ const int N = 32;
+ int l = log2_intptr(d-1)+1;
+ int sh_post = l;
+
+ const uint64_t U1 = (uint64_t)1;
+
+ // Cliff pointed out how to prevent overflow (from the paper)
+ uint64_t m_low = (((U1 << l) - ud) << N) / ud + (U1 << N);
+ uint64_t m_high = ((((U1 << l) - ud) << N) + (U1 << (l+1))) / ud + (U1 << N);
+
+ // Reduce to lowest terms
+ for ( ; sh_post > 0; sh_post-- ) {
+ uint64_t m_low_1 = m_low >> 1;
+ uint64_t m_high_1 = m_high >> 1;
+ if ( m_low_1 >= m_high_1 )
+ break;
+ m_low = m_low_1;
+ m_high = m_high_1;
+ }
+
+ // Result
+ Node *q;
+
+ // division by +/- 1
+ if (d == 1) {
+ // Filtered out as identity above
+ if (d_pos)
+ return NULL;
+
+ // Just negate the value
+ else {
+ q = new (phase->C, 3) SubINode(phase->intcon(0), dividend);
+ }
+ }
+
+ // division by +/- a power of 2
+ else if ( is_power_of_2(d) ) {
+
+ // See if we can simply do a shift without rounding
+ bool needs_rounding = true;
+ const Type *dt = phase->type(dividend);
+ const TypeInt *dti = dt->isa_int();
+
+ // we don't need to round a positive dividend
+ if (dti && dti->_lo >= 0)
+ needs_rounding = false;
+
+ // An AND mask of sufficient size clears the low bits and
+ // I can avoid rounding.
+ else if( dividend->Opcode() == Op_AndI ) {
+ const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int();
+ if( andconi && andconi->is_con(-d) ) {
+ dividend = dividend->in(1);
+ needs_rounding = false;
+ }
+ }
+
+ // Add rounding to the shift to handle the sign bit
+ if( needs_rounding ) {
+ Node *t1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(l - 1)));
+ Node *t2 = phase->transform(new (phase->C, 3) URShiftINode(t1, phase->intcon(N - l)));
+ dividend = phase->transform(new (phase->C, 3) AddINode(dividend, t2));
+ }
+
+ q = new (phase->C, 3) RShiftINode(dividend, phase->intcon(l));
+
+ if (!d_pos)
+ q = new (phase->C, 3) SubINode(phase->intcon(0), phase->transform(q));
+ }
+
+ // division by something else
+ else if (m_high < (U1 << (N-1))) {
+ Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend));
+ Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high)));
+ Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(sh_post+N)));
+ Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3));
+ Node *t5 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1)));
+
+ q = new (phase->C, 3) SubINode(d_pos ? t4 : t5, d_pos ? t5 : t4);
+ }
+
+ // This handles that case where m_high is >= 2**(N-1). In that case,
+ // we subtract out 2**N from the multiply and add it in later as
+ // "dividend" in the equation (t5). This case computes the same result
+ // as the immediately preceeding case, save that rounding and overflow
+ // are accounted for.
+ else {
+ Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend));
+ Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high - (U1 << N))));
+ Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N)));
+ Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3));
+ Node *t5 = phase->transform(new (phase->C, 3) AddINode(dividend, t4));
+ Node *t6 = phase->transform(new (phase->C, 3) RShiftINode(t5, phase->intcon(sh_post)));
+ Node *t7 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1)));
+
+ q = new (phase->C, 3) SubINode(d_pos ? t6 : t7, d_pos ? t7 : t6);
+ }
+
+ return (q);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivINode::Identity( PhaseTransform *phase ) {
+ return (phase->type( in(2) )->higher_equal(TypeInt::ONE)) ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+// Divides can be changed to multiplies and/or shifts
+Node *DivINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (in(0) && remove_dead_region(phase, can_reshape)) return this;
+
+ const Type *t = phase->type( in(2) );
+ if( t == TypeInt::ONE ) // Identity?
+ return NULL; // Skip it
+
+ const TypeInt *ti = t->isa_int();
+ if( !ti ) return NULL;
+ if( !ti->is_con() ) return NULL;
+ int i = ti->get_con(); // Get divisor
+
+ if (i == 0) return NULL; // Dividing by zero constant does not idealize
+
+ set_req(0,NULL); // Dividing by a not-zero constant; no faulting
+
+ // Dividing by MININT does not optimize as a power-of-2 shift.
+ if( i == min_jint ) return NULL;
+
+ return transform_int_divide_to_long_multiply( phase, in(1), i );
+}
+
+//------------------------------Value------------------------------------------
+// A DivINode divides its inputs. The third input is a Control input, used to
+// prevent hoisting the divide above an unsafe test.
+const Type *DivINode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // x/x == 1 since we always generate the dynamic divisor check for 0.
+ if( phase->eqv( in(1), in(2) ) )
+ return TypeInt::ONE;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ // Divide the two numbers. We approximate.
+ // If divisor is a constant and not zero
+ const TypeInt *i1 = t1->is_int();
+ const TypeInt *i2 = t2->is_int();
+ int widen = MAX2(i1->_widen, i2->_widen);
+
+ if( i2->is_con() && i2->get_con() != 0 ) {
+ int32 d = i2->get_con(); // Divisor
+ jint lo, hi;
+ if( d >= 0 ) {
+ lo = i1->_lo/d;
+ hi = i1->_hi/d;
+ } else {
+ if( d == -1 && i1->_lo == min_jint ) {
+ // 'min_jint/-1' throws arithmetic exception during compilation
+ lo = min_jint;
+ // do not support holes, 'hi' must go to either min_jint or max_jint:
+ // [min_jint, -10]/[-1,-1] ==> [min_jint] UNION [10,max_jint]
+ hi = i1->_hi == min_jint ? min_jint : max_jint;
+ } else {
+ lo = i1->_hi/d;
+ hi = i1->_lo/d;
+ }
+ }
+ return TypeInt::make(lo, hi, widen);
+ }
+
+ // If the dividend is a constant
+ if( i1->is_con() ) {
+ int32 d = i1->get_con();
+ if( d < 0 ) {
+ if( d == min_jint ) {
+ // (-min_jint) == min_jint == (min_jint / -1)
+ return TypeInt::make(min_jint, max_jint/2 + 1, widen);
+ } else {
+ return TypeInt::make(d, -d, widen);
+ }
+ }
+ return TypeInt::make(-d, d, widen);
+ }
+
+ // Otherwise we give up all hope
+ return TypeInt::INT;
+}
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivLNode::Identity( PhaseTransform *phase ) {
+ return (phase->type( in(2) )->higher_equal(TypeLong::ONE)) ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+// Dividing by a power of 2 is a shift.
+Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) {
+ if (in(0) && remove_dead_region(phase, can_reshape)) return this;
+
+ const Type *t = phase->type( in(2) );
+ if( t == TypeLong::ONE ) // Identity?
+ return NULL; // Skip it
+
+ const TypeLong *ti = t->isa_long();
+ if( !ti ) return NULL;
+ if( !ti->is_con() ) return NULL;
+ jlong i = ti->get_con(); // Get divisor
+ if( i ) set_req(0, NULL); // Dividing by a not-zero constant; no faulting
+
+ // Dividing by MININT does not optimize as a power-of-2 shift.
+ if( i == min_jlong ) return NULL;
+
+ // Check for negative power of 2 divisor, if so, negate it and set a flag
+ // to indicate result needs to be negated. Note that negating the dividend
+ // here does not work when it has the value MININT
+ Node *dividend = in(1);
+ bool negate_res = false;
+ if (is_power_of_2_long(-i)) {
+ i = -i; // Flip divisor
+ negate_res = true;
+ }
+
+ // Check for power of 2
+ if (!is_power_of_2_long(i)) // Is divisor a power of 2?
+ return NULL; // Not a power of 2
+
+ // Compute number of bits to shift
+ int log_i = log2_long(i);
+
+ // See if we can simply do a shift without rounding
+ bool needs_rounding = true;
+ const Type *dt = phase->type(dividend);
+ const TypeLong *dtl = dt->isa_long();
+
+ if (dtl && dtl->_lo > 0) {
+ // we don't need to round a positive dividend
+ needs_rounding = false;
+ } else if( dividend->Opcode() == Op_AndL ) {
+ // An AND mask of sufficient size clears the low bits and
+ // I can avoid rounding.
+ const TypeLong *andconi = phase->type( dividend->in(2) )->isa_long();
+ if( andconi &&
+ andconi->is_con() &&
+ andconi->get_con() == -i ) {
+ dividend = dividend->in(1);
+ needs_rounding = false;
+ }
+ }
+
+ if (!needs_rounding) {
+ Node *result = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(log_i));
+ if (negate_res) {
+ result = phase->transform(result);
+ result = new (phase->C, 3) SubLNode(phase->longcon(0), result);
+ }
+ return result;
+ }
+
+ // Divide-by-power-of-2 can be made into a shift, but you have to do
+ // more math for the rounding. You need to add 0 for positive
+ // numbers, and "i-1" for negative numbers. Example: i=4, so the
+ // shift is by 2. You need to add 3 to negative dividends and 0 to
+ // positive ones. So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1,
+ // (-2+3)>>2 becomes 0, etc.
+
+ // Compute 0 or -1, based on sign bit
+ Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend,phase->intcon(63)));
+ // Mask sign bit to the low sign bits
+ Node *round = phase->transform(new (phase->C, 3) AndLNode(sign,phase->longcon(i-1)));
+ // Round up before shifting
+ Node *sum = phase->transform(new (phase->C, 3) AddLNode(dividend,round));
+ // Shift for division
+ Node *result = new (phase->C, 3) RShiftLNode(sum, phase->intcon(log_i));
+ if (negate_res) {
+ result = phase->transform(result);
+ result = new (phase->C, 3) SubLNode(phase->longcon(0), result);
+ }
+
+ return result;
+}
+
+//------------------------------Value------------------------------------------
+// A DivLNode divides its inputs. The third input is a Control input, used to
+// prevent hoisting the divide above an unsafe test.
+const Type *DivLNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // x/x == 1 since we always generate the dynamic divisor check for 0.
+ if( phase->eqv( in(1), in(2) ) )
+ return TypeLong::ONE;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ // Divide the two numbers. We approximate.
+ // If divisor is a constant and not zero
+ const TypeLong *i1 = t1->is_long();
+ const TypeLong *i2 = t2->is_long();
+ int widen = MAX2(i1->_widen, i2->_widen);
+
+ if( i2->is_con() && i2->get_con() != 0 ) {
+ jlong d = i2->get_con(); // Divisor
+ jlong lo, hi;
+ if( d >= 0 ) {
+ lo = i1->_lo/d;
+ hi = i1->_hi/d;
+ } else {
+ if( d == CONST64(-1) && i1->_lo == min_jlong ) {
+ // 'min_jlong/-1' throws arithmetic exception during compilation
+ lo = min_jlong;
+ // do not support holes, 'hi' must go to either min_jlong or max_jlong:
+ // [min_jlong, -10]/[-1,-1] ==> [min_jlong] UNION [10,max_jlong]
+ hi = i1->_hi == min_jlong ? min_jlong : max_jlong;
+ } else {
+ lo = i1->_hi/d;
+ hi = i1->_lo/d;
+ }
+ }
+ return TypeLong::make(lo, hi, widen);
+ }
+
+ // If the dividend is a constant
+ if( i1->is_con() ) {
+ jlong d = i1->get_con();
+ if( d < 0 ) {
+ if( d == min_jlong ) {
+ // (-min_jlong) == min_jlong == (min_jlong / -1)
+ return TypeLong::make(min_jlong, max_jlong/2 + 1, widen);
+ } else {
+ return TypeLong::make(d, -d, widen);
+ }
+ }
+ return TypeLong::make(-d, d, widen);
+ }
+
+ // Otherwise we give up all hope
+ return TypeLong::LONG;
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// An DivFNode divides its inputs. The third input is a Control input, used to
+// prevent hoisting the divide above an unsafe test.
+const Type *DivFNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ // x/x == 1, we ignore 0/0.
+ // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+ // does not work for variables because of NaN's
+ if( phase->eqv( in(1), in(2) ) && t1->base() == Type::FloatCon)
+ if (!g_isnan(t1->getf()) && g_isfinite(t1->getf()) && t1->getf() != 0.0) // could be negative ZERO or NaN
+ return TypeF::ONE;
+
+ if( t2 == TypeF::ONE )
+ return t1;
+
+ // If divisor is a constant and not zero, divide them numbers
+ if( t1->base() == Type::FloatCon &&
+ t2->base() == Type::FloatCon &&
+ t2->getf() != 0.0 ) // could be negative zero
+ return TypeF::make( t1->getf()/t2->getf() );
+
+ // If the dividend is a constant zero
+ // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+ // Test TypeF::ZERO is not sufficient as it could be negative zero
+
+ if( t1 == TypeF::ZERO && !g_isnan(t2->getf()) && t2->getf() != 0.0 )
+ return TypeF::ZERO;
+
+ // Otherwise we give up all hope
+ return Type::FLOAT;
+}
+
+//------------------------------isA_Copy---------------------------------------
+// Dividing by self is 1.
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivFNode::Identity( PhaseTransform *phase ) {
+ return (phase->type( in(2) ) == TypeF::ONE) ? in(1) : this;
+}
+
+
+//------------------------------Idealize---------------------------------------
+Node *DivFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (in(0) && remove_dead_region(phase, can_reshape)) return this;
+
+ const Type *t2 = phase->type( in(2) );
+ if( t2 == TypeF::ONE ) // Identity?
+ return NULL; // Skip it
+
+ const TypeF *tf = t2->isa_float_constant();
+ if( !tf ) return NULL;
+ if( tf->base() != Type::FloatCon ) return NULL;
+
+ // Check for out of range values
+ if( tf->is_nan() || !tf->is_finite() ) return NULL;
+
+ // Get the value
+ float f = tf->getf();
+ int exp;
+
+ // Only for special case of dividing by a power of 2
+ if( frexp((double)f, &exp) != 0.5 ) return NULL;
+
+ // Limit the range of acceptable exponents
+ if( exp < -126 || exp > 126 ) return NULL;
+
+ // Compute the reciprocal
+ float reciprocal = ((float)1.0) / f;
+
+ assert( frexp((double)reciprocal, &exp) == 0.5, "reciprocal should be power of 2" );
+
+ // return multiplication by the reciprocal
+ return (new (phase->C, 3) MulFNode(in(1), phase->makecon(TypeF::make(reciprocal))));
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// An DivDNode divides its inputs. The third input is a Control input, used to
+// prvent hoisting the divide above an unsafe test.
+const Type *DivDNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ // x/x == 1, we ignore 0/0.
+ // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+ // Does not work for variables because of NaN's
+ if( phase->eqv( in(1), in(2) ) && t1->base() == Type::DoubleCon)
+ if (!g_isnan(t1->getd()) && g_isfinite(t1->getd()) && t1->getd() != 0.0) // could be negative ZERO or NaN
+ return TypeD::ONE;
+
+ if( t2 == TypeD::ONE )
+ return t1;
+
+ // If divisor is a constant and not zero, divide them numbers
+ if( t1->base() == Type::DoubleCon &&
+ t2->base() == Type::DoubleCon &&
+ t2->getd() != 0.0 ) // could be negative zero
+ return TypeD::make( t1->getd()/t2->getd() );
+
+ // If the dividend is a constant zero
+ // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+ // Test TypeF::ZERO is not sufficient as it could be negative zero
+ if( t1 == TypeD::ZERO && !g_isnan(t2->getd()) && t2->getd() != 0.0 )
+ return TypeD::ZERO;
+
+ // Otherwise we give up all hope
+ return Type::DOUBLE;
+}
+
+
+//------------------------------isA_Copy---------------------------------------
+// Dividing by self is 1.
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivDNode::Identity( PhaseTransform *phase ) {
+ return (phase->type( in(2) ) == TypeD::ONE) ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+Node *DivDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (in(0) && remove_dead_region(phase, can_reshape)) return this;
+
+ const Type *t2 = phase->type( in(2) );
+ if( t2 == TypeD::ONE ) // Identity?
+ return NULL; // Skip it
+
+ const TypeD *td = t2->isa_double_constant();
+ if( !td ) return NULL;
+ if( td->base() != Type::DoubleCon ) return NULL;
+
+ // Check for out of range values
+ if( td->is_nan() || !td->is_finite() ) return NULL;
+
+ // Get the value
+ double d = td->getd();
+ int exp;
+
+ // Only for special case of dividing by a power of 2
+ if( frexp(d, &exp) != 0.5 ) return NULL;
+
+ // Limit the range of acceptable exponents
+ if( exp < -1021 || exp > 1022 ) return NULL;
+
+ // Compute the reciprocal
+ double reciprocal = 1.0 / d;
+
+ assert( frexp(reciprocal, &exp) == 0.5, "reciprocal should be power of 2" );
+
+ // return multiplication by the reciprocal
+ return (new (phase->C, 3) MulDNode(in(1), phase->makecon(TypeD::make(reciprocal))));
+}
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *ModINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Check for dead control input
+ if( remove_dead_region(phase, can_reshape) ) return this;
+
+ // Get the modulus
+ const Type *t = phase->type( in(2) );
+ if( t == Type::TOP ) return NULL;
+ const TypeInt *ti = t->is_int();
+
+ // Check for useless control input
+ // Check for excluding mod-zero case
+ if( in(0) && (ti->_hi < 0 || ti->_lo > 0) ) {
+ set_req(0, NULL); // Yank control input
+ return this;
+ }
+
+ // See if we are MOD'ing by 2^k or 2^k-1.
+ if( !ti->is_con() ) return NULL;
+ jint con = ti->get_con();
+
+ Node *hook = new (phase->C, 1) Node(1);
+
+ // First, special check for modulo 2^k-1
+ if( con >= 0 && con < max_jint && is_power_of_2(con+1) ) {
+ uint k = exact_log2(con+1); // Extract k
+
+ // Basic algorithm by David Detlefs. See fastmod_int.java for gory details.
+ static int unroll_factor[] = { 999, 999, 29, 14, 9, 7, 5, 4, 4, 3, 3, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/};
+ int trip_count = 1;
+ if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k];
+
+ // If the unroll factor is not too large, and if conditional moves are
+ // ok, then use this case
+ if( trip_count <= 5 && ConditionalMoveLimit != 0 ) {
+ Node *x = in(1); // Value being mod'd
+ Node *divisor = in(2); // Also is mask
+
+ hook->init_req(0, x); // Add a use to x to prevent him from dying
+ // Generate code to reduce X rapidly to nearly 2^k-1.
+ for( int i = 0; i < trip_count; i++ ) {
+ Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) );
+ Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed
+ x = phase->transform( new (phase->C, 3) AddINode(xh,xl) );
+ hook->set_req(0, x);
+ }
+
+ // Generate sign-fixup code. Was original value positive?
+ // int hack_res = (i >= 0) ? divisor : 1;
+ Node *cmp1 = phase->transform( new (phase->C, 3) CmpINode( in(1), phase->intcon(0) ) );
+ Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) );
+ Node *cmov1= phase->transform( new (phase->C, 4) CMoveINode(bol1, phase->intcon(1), divisor, TypeInt::POS) );
+ // if( x >= hack_res ) x -= divisor;
+ Node *sub = phase->transform( new (phase->C, 3) SubINode( x, divisor ) );
+ Node *cmp2 = phase->transform( new (phase->C, 3) CmpINode( x, cmov1 ) );
+ Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) );
+ // Convention is to not transform the return value of an Ideal
+ // since Ideal is expected to return a modified 'this' or a new node.
+ Node *cmov2= new (phase->C, 4) CMoveINode(bol2, x, sub, TypeInt::INT);
+ // cmov2 is now the mod
+
+ // Now remove the bogus extra edges used to keep things alive
+ if (can_reshape) {
+ phase->is_IterGVN()->remove_dead_node(hook);
+ } else {
+ hook->set_req(0, NULL); // Just yank bogus edge during Parse phase
+ }
+ return cmov2;
+ }
+ }
+
+ // Fell thru, the unroll case is not appropriate. Transform the modulo
+ // into a long multiply/int multiply/subtract case
+
+ // Cannot handle mod 0, and min_jint isn't handled by the transform
+ if( con == 0 || con == min_jint ) return NULL;
+
+ // Get the absolute value of the constant; at this point, we can use this
+ jint pos_con = (con >= 0) ? con : -con;
+
+ // integer Mod 1 is always 0
+ if( pos_con == 1 ) return new (phase->C, 1) ConINode(TypeInt::ZERO);
+
+ int log2_con = -1;
+
+ // If this is a power of two, they maybe we can mask it
+ if( is_power_of_2(pos_con) ) {
+ log2_con = log2_intptr((intptr_t)pos_con);
+
+ const Type *dt = phase->type(in(1));
+ const TypeInt *dti = dt->isa_int();
+
+ // See if this can be masked, if the dividend is non-negative
+ if( dti && dti->_lo >= 0 )
+ return ( new (phase->C, 3) AndINode( in(1), phase->intcon( pos_con-1 ) ) );
+ }
+
+ // Save in(1) so that it cannot be changed or deleted
+ hook->init_req(0, in(1));
+
+ // Divide using the transform from DivI to MulL
+ Node *divide = phase->transform( transform_int_divide_to_long_multiply( phase, in(1), pos_con ) );
+
+ // Re-multiply, using a shift if this is a power of two
+ Node *mult = NULL;
+
+ if( log2_con >= 0 )
+ mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) );
+ else
+ mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) );
+
+ // Finally, subtract the multiplied divided value from the original
+ Node *result = new (phase->C, 3) SubINode( in(1), mult );
+
+ // Now remove the bogus extra edges used to keep things alive
+ if (can_reshape) {
+ phase->is_IterGVN()->remove_dead_node(hook);
+ } else {
+ hook->set_req(0, NULL); // Just yank bogus edge during Parse phase
+ }
+
+ // return the value
+ return result;
+}
+
+//------------------------------Value------------------------------------------
+const Type *ModINode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // We always generate the dynamic check for 0.
+ // 0 MOD X is 0
+ if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+ // X MOD X is 0
+ if( phase->eqv( in(1), in(2) ) ) return TypeInt::ZERO;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ const TypeInt *i1 = t1->is_int();
+ const TypeInt *i2 = t2->is_int();
+ if( !i1->is_con() || !i2->is_con() ) {
+ if( i1->_lo >= 0 && i2->_lo >= 0 )
+ return TypeInt::POS;
+ // If both numbers are not constants, we know little.
+ return TypeInt::INT;
+ }
+ // Mod by zero? Throw exception at runtime!
+ if( !i2->get_con() ) return TypeInt::POS;
+
+ // We must be modulo'ing 2 float constants.
+ // Check for min_jint % '-1', result is defined to be '0'.
+ if( i1->get_con() == min_jint && i2->get_con() == -1 )
+ return TypeInt::ZERO;
+
+ return TypeInt::make( i1->get_con() % i2->get_con() );
+}
+
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Check for dead control input
+ if( remove_dead_region(phase, can_reshape) ) return this;
+
+ // Get the modulus
+ const Type *t = phase->type( in(2) );
+ if( t == Type::TOP ) return NULL;
+ const TypeLong *ti = t->is_long();
+
+ // Check for useless control input
+ // Check for excluding mod-zero case
+ if( in(0) && (ti->_hi < 0 || ti->_lo > 0) ) {
+ set_req(0, NULL); // Yank control input
+ return this;
+ }
+
+ // See if we are MOD'ing by 2^k or 2^k-1.
+ if( !ti->is_con() ) return NULL;
+ jlong con = ti->get_con();
+ bool m1 = false;
+ if( !is_power_of_2_long(con) ) { // Not 2^k
+ if( !is_power_of_2_long(con+1) ) // Not 2^k-1?
+ return NULL; // No interesting mod hacks
+ m1 = true; // Found 2^k-1
+ con++; // Convert to 2^k form
+ }
+ uint k = log2_long(con); // Extract k
+
+ // Expand mod
+ if( !m1 ) { // Case 2^k
+ } else { // Case 2^k-1
+ // Basic algorithm by David Detlefs. See fastmod_long.java for gory details.
+ // Used to help a popular random number generator which does a long-mod
+ // of 2^31-1 and shows up in SpecJBB and SciMark.
+ static int unroll_factor[] = { 999, 999, 61, 30, 20, 15, 12, 10, 8, 7, 6, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/};
+ int trip_count = 1;
+ if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k];
+ if( trip_count > 4 ) return NULL; // Too much unrolling
+ if (ConditionalMoveLimit == 0) return NULL; // cmov is required
+
+ Node *x = in(1); // Value being mod'd
+ Node *divisor = in(2); // Also is mask
+
+ Node *hook = new (phase->C, 1) Node(x);
+ // Generate code to reduce X rapidly to nearly 2^k-1.
+ for( int i = 0; i < trip_count; i++ ) {
+ Node *xl = phase->transform( new (phase->C, 3) AndLNode(x,divisor) );
+ Node *xh = phase->transform( new (phase->C, 3) RShiftLNode(x,phase->intcon(k)) ); // Must be signed
+ x = phase->transform( new (phase->C, 3) AddLNode(xh,xl) );
+ hook->set_req(0, x); // Add a use to x to prevent him from dying
+ }
+ // Generate sign-fixup code. Was original value positive?
+ // long hack_res = (i >= 0) ? divisor : CONST64(1);
+ Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) );
+ Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) );
+ Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) );
+ // if( x >= hack_res ) x -= divisor;
+ Node *sub = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) );
+ Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) );
+ Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) );
+ // Convention is to not transform the return value of an Ideal
+ // since Ideal is expected to return a modified 'this' or a new node.
+ Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG);
+ // cmov2 is now the mod
+
+ // Now remove the bogus extra edges used to keep things alive
+ if (can_reshape) {
+ phase->is_IterGVN()->remove_dead_node(hook);
+ } else {
+ hook->set_req(0, NULL); // Just yank bogus edge during Parse phase
+ }
+ return cmov2;
+ }
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *ModLNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // We always generate the dynamic check for 0.
+ // 0 MOD X is 0
+ if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+ // X MOD X is 0
+ if( phase->eqv( in(1), in(2) ) ) return TypeLong::ZERO;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ const TypeLong *i1 = t1->is_long();
+ const TypeLong *i2 = t2->is_long();
+ if( !i1->is_con() || !i2->is_con() ) {
+ if( i1->_lo >= CONST64(0) && i2->_lo >= CONST64(0) )
+ return TypeLong::POS;
+ // If both numbers are not constants, we know little.
+ return TypeLong::LONG;
+ }
+ // Mod by zero? Throw exception at runtime!
+ if( !i2->get_con() ) return TypeLong::POS;
+
+ // We must be modulo'ing 2 float constants.
+ // Check for min_jint % '-1', result is defined to be '0'.
+ if( i1->get_con() == min_jlong && i2->get_con() == -1 )
+ return TypeLong::ZERO;
+
+ return TypeLong::make( i1->get_con() % i2->get_con() );
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ModFNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ // If either is a NaN, return an input NaN
+ if( g_isnan(t1->getf()) ) return t1;
+ if( g_isnan(t2->getf()) ) return t2;
+
+ // It is not worth trying to constant fold this stuff!
+ return Type::FLOAT;
+
+ /*
+ // If dividend is infinity or divisor is zero, or both, the result is NaN
+ if( !g_isfinite(t1->getf()) || ((t2->getf() == 0.0) || (jint_cast(t2->getf()) == 0x80000000)) )
+
+ // X MOD infinity = X
+ if( !g_isfinite(t2->getf()) && !g_isnan(t2->getf()) ) return t1;
+ // 0 MOD finite = dividend (positive or negative zero)
+ // Not valid for: NaN MOD any; any MOD nan; 0 MOD 0; or for 0 MOD NaN
+ // NaNs are handled previously.
+ if( !(t2->getf() == 0.0) && !((int)t2->getf() == 0x80000000)) {
+ if (((t1->getf() == 0.0) || ((int)t1->getf() == 0x80000000)) && g_isfinite(t2->getf()) ) {
+ return t1;
+ }
+ }
+ // X MOD X is 0
+ // Does not work for variables because of NaN's
+ if( phase->eqv( in(1), in(2) ) && t1->base() == Type::FloatCon)
+ if (!g_isnan(t1->getf()) && (t1->getf() != 0.0) && ((int)t1->getf() != 0x80000000)) {
+ if(t1->getf() < 0.0) {
+ float result = jfloat_cast(0x80000000);
+ return TypeF::make( result );
+ }
+ else
+ return TypeF::ZERO;
+ }
+
+ // If both numbers are not constants, we know nothing.
+ if( (t1->base() != Type::FloatCon) || (t2->base() != Type::FloatCon) )
+ return Type::FLOAT;
+
+ // We must be modulo'ing 2 float constants.
+ // Make sure that the sign of the fmod is equal to the sign of the dividend
+ float result = (float)fmod( t1->getf(), t2->getf() );
+ float dividend = t1->getf();
+ if( (dividend < 0.0) || ((int)dividend == 0x80000000) ) {
+ if( result > 0.0 )
+ result = 0.0 - result;
+ else if( result == 0.0 ) {
+ result = jfloat_cast(0x80000000);
+ }
+ }
+ return TypeF::make( result );
+ */
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ModDNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ // If either is a NaN, return an input NaN
+ if( g_isnan(t1->getd()) ) return t1;
+ if( g_isnan(t2->getd()) ) return t2;
+ // X MOD infinity = X
+ if( !g_isfinite(t2->getd())) return t1;
+ // 0 MOD finite = dividend (positive or negative zero)
+ // Not valid for: NaN MOD any; any MOD nan; 0 MOD 0; or for 0 MOD NaN
+ // NaNs are handled previously.
+ if( !(t2->getd() == 0.0) ) {
+ if( t1->getd() == 0.0 && g_isfinite(t2->getd()) ) {
+ return t1;
+ }
+ }
+
+ // X MOD X is 0
+ // does not work for variables because of NaN's
+ if( phase->eqv( in(1), in(2) ) && t1->base() == Type::DoubleCon )
+ if (!g_isnan(t1->getd()) && t1->getd() != 0.0)
+ return TypeD::ZERO;
+
+
+ // If both numbers are not constants, we know nothing.
+ if( (t1->base() != Type::DoubleCon) || (t2->base() != Type::DoubleCon) )
+ return Type::DOUBLE;
+
+ // We must be modulo'ing 2 double constants.
+ return TypeD::make( fmod( t1->getd(), t2->getd() ) );
+}
+
+//=============================================================================
+
+DivModNode::DivModNode( Node *c, Node *dividend, Node *divisor ) : MultiNode(3) {
+ init_req(0, c);
+ init_req(1, dividend);
+ init_req(2, divisor);
+}
+
+//------------------------------make------------------------------------------
+DivModINode* DivModINode::make(Compile* C, Node* div_or_mod) {
+ Node* n = div_or_mod;
+ assert(n->Opcode() == Op_DivI || n->Opcode() == Op_ModI,
+ "only div or mod input pattern accepted");
+
+ DivModINode* divmod = new (C, 3) DivModINode(n->in(0), n->in(1), n->in(2));
+ Node* dproj = new (C, 1) ProjNode(divmod, DivModNode::div_proj_num);
+ Node* mproj = new (C, 1) ProjNode(divmod, DivModNode::mod_proj_num);
+ return divmod;
+}
+
+//------------------------------make------------------------------------------
+DivModLNode* DivModLNode::make(Compile* C, Node* div_or_mod) {
+ Node* n = div_or_mod;
+ assert(n->Opcode() == Op_DivL || n->Opcode() == Op_ModL,
+ "only div or mod input pattern accepted");
+
+ DivModLNode* divmod = new (C, 3) DivModLNode(n->in(0), n->in(1), n->in(2));
+ Node* dproj = new (C, 1) ProjNode(divmod, DivModNode::div_proj_num);
+ Node* mproj = new (C, 1) ProjNode(divmod, DivModNode::mod_proj_num);
+ return divmod;
+}
+
+//------------------------------match------------------------------------------
+// return result(s) along with their RegMask info
+Node *DivModINode::match( const ProjNode *proj, const Matcher *match ) {
+ uint ideal_reg = proj->ideal_reg();
+ RegMask rm;
+ if (proj->_con == div_proj_num) {
+ rm = match->divI_proj_mask();
+ } else {
+ assert(proj->_con == mod_proj_num, "must be div or mod projection");
+ rm = match->modI_proj_mask();
+ }
+ return new (match->C, 1)MachProjNode(this, proj->_con, rm, ideal_reg);
+}
+
+
+//------------------------------match------------------------------------------
+// return result(s) along with their RegMask info
+Node *DivModLNode::match( const ProjNode *proj, const Matcher *match ) {
+ uint ideal_reg = proj->ideal_reg();
+ RegMask rm;
+ if (proj->_con == div_proj_num) {
+ rm = match->divL_proj_mask();
+ } else {
+ assert(proj->_con == mod_proj_num, "must be div or mod projection");
+ rm = match->modL_proj_mask();
+ }
+ return new (match->C, 1)MachProjNode(this, proj->_con, rm, ideal_reg);
+}
diff --git a/src/share/vm/opto/divnode.hpp b/src/share/vm/opto/divnode.hpp
new file mode 100644
index 000000000..797d2cf41
--- /dev/null
+++ b/src/share/vm/opto/divnode.hpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+
+//------------------------------DivINode---------------------------------------
+// Integer division
+// Note: this is division as defined by JVMS, i.e., MinInt/-1 == MinInt.
+// On processors which don't naturally support this special case (e.g., x86),
+// the matcher or runtime system must take care of this.
+class DivINode : public Node {
+public:
+ DivINode( Node *c, Node *dividend, Node *divisor ) : Node(c, dividend, divisor ) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------DivLNode---------------------------------------
+// Long division
+class DivLNode : public Node {
+public:
+ DivLNode( Node *c, Node *dividend, Node *divisor ) : Node(c, dividend, divisor ) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------DivFNode---------------------------------------
+// Float division
+class DivFNode : public Node {
+public:
+ DivFNode( Node *c, Node *dividend, Node *divisor ) : Node(c, dividend, divisor) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------DivDNode---------------------------------------
+// Double division
+class DivDNode : public Node {
+public:
+ DivDNode( Node *c, Node *dividend, Node *divisor ) : Node(c,dividend, divisor) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ModINode---------------------------------------
+// Integer modulus
+class ModINode : public Node {
+public:
+ ModINode( Node *c, Node *in1, Node *in2 ) : Node(c,in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ModLNode---------------------------------------
+// Long modulus
+class ModLNode : public Node {
+public:
+ ModLNode( Node *c, Node *in1, Node *in2 ) : Node(c,in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ModFNode---------------------------------------
+// Float Modulus
+class ModFNode : public Node {
+public:
+ ModFNode( Node *c, Node *in1, Node *in2 ) : Node(c,in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ModDNode---------------------------------------
+// Double Modulus
+class ModDNode : public Node {
+public:
+ ModDNode( Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------DivModNode---------------------------------------
+// Division with remainder result.
+class DivModNode : public MultiNode {
+protected:
+ DivModNode( Node *c, Node *dividend, Node *divisor );
+public:
+ enum {
+ div_proj_num = 0, // quotient
+ mod_proj_num = 1 // remainder
+ };
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase ) { return this; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { return NULL; }
+ virtual const Type *Value( PhaseTransform *phase ) const { return bottom_type(); }
+ virtual uint hash() const { return Node::hash(); }
+ virtual bool is_CFG() const { return false; }
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+
+ ProjNode* div_proj() { return proj_out(div_proj_num); }
+ ProjNode* mod_proj() { return proj_out(mod_proj_num); }
+};
+
+//------------------------------DivModINode---------------------------------------
+// Integer division with remainder result.
+class DivModINode : public DivModNode {
+public:
+ DivModINode( Node *c, Node *dividend, Node *divisor ) : DivModNode(c, dividend, divisor) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeTuple::INT_PAIR; }
+ virtual Node *match( const ProjNode *proj, const Matcher *m );
+
+ // Make a divmod and associated projections from a div or mod.
+ static DivModINode* make(Compile* C, Node* div_or_mod);
+};
+
+//------------------------------DivModLNode---------------------------------------
+// Long division with remainder result.
+class DivModLNode : public DivModNode {
+public:
+ DivModLNode( Node *c, Node *dividend, Node *divisor ) : DivModNode(c, dividend, divisor) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeTuple::LONG_PAIR; }
+ virtual Node *match( const ProjNode *proj, const Matcher *m );
+
+ // Make a divmod and associated projections from a div or mod.
+ static DivModLNode* make(Compile* C, Node* div_or_mod);
+};
diff --git a/src/share/vm/opto/doCall.cpp b/src/share/vm/opto/doCall.cpp
new file mode 100644
index 000000000..ff85fb643
--- /dev/null
+++ b/src/share/vm/opto/doCall.cpp
@@ -0,0 +1,862 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_doCall.cpp.incl"
+
+#ifndef PRODUCT
+void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
+ if (TraceTypeProfile || PrintInlining || PrintOptoInlining) {
+ tty->print(" ");
+ for( int i = 0; i < depth; i++ ) tty->print(" ");
+ if (!PrintOpto) {
+ method->print_short_name();
+ tty->print(" ->");
+ }
+ tty->print(" @ %d ", bci);
+ prof_method->print_short_name();
+ tty->print(" >>TypeProfile (%d/%d counts) = ", receiver_count, site_count);
+ prof_klass->name()->print_symbol();
+ tty->print_cr(" (%d bytes)", prof_method->code_size());
+ }
+}
+#endif
+
+CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float prof_factor) {
+ CallGenerator* cg;
+
+ // Dtrace currently doesn't work unless all calls are vanilla
+ if (DTraceMethodProbes) {
+ allow_inline = false;
+ }
+
+ // Note: When we get profiling during stage-1 compiles, we want to pull
+ // from more specific profile data which pertains to this inlining.
+ // Right now, ignore the information in jvms->caller(), and do method[bci].
+ ciCallProfile profile = jvms->method()->call_profile_at_bci(jvms->bci());
+
+ // See how many times this site has been invoked.
+ int site_count = profile.count();
+ int receiver_count = -1;
+ if (call_is_virtual && UseTypeProfile && profile.has_receiver(0)) {
+ // Receivers in the profile structure are ordered by call counts
+ // so that the most called (major) receiver is profile.receiver(0).
+ receiver_count = profile.receiver_count(0);
+ }
+
+ CompileLog* log = this->log();
+ if (log != NULL) {
+ int rid = (receiver_count >= 0)? log->identify(profile.receiver(0)): -1;
+ int r2id = (profile.morphism() == 2)? log->identify(profile.receiver(1)):-1;
+ log->begin_elem("call method='%d' count='%d' prof_factor='%g'",
+ log->identify(call_method), site_count, prof_factor);
+ if (call_is_virtual) log->print(" virtual='1'");
+ if (allow_inline) log->print(" inline='1'");
+ if (receiver_count >= 0) {
+ log->print(" receiver='%d' receiver_count='%d'", rid, receiver_count);
+ if (profile.has_receiver(1)) {
+ log->print(" receiver2='%d' receiver2_count='%d'", r2id, profile.receiver_count(1));
+ }
+ }
+ log->end_elem();
+ }
+
+ // Special case the handling of certain common, profitable library
+ // methods. If these methods are replaced with specialized code,
+ // then we return it as the inlined version of the call.
+ // We do this before the strict f.p. check below because the
+ // intrinsics handle strict f.p. correctly.
+ if (allow_inline) {
+ cg = find_intrinsic(call_method, call_is_virtual);
+ if (cg != NULL) return cg;
+ }
+
+ // Do not inline strict fp into non-strict code, or the reverse
+ bool caller_method_is_strict = jvms->method()->is_strict();
+ if( caller_method_is_strict ^ call_method->is_strict() ) {
+ allow_inline = false;
+ }
+
+ // Attempt to inline...
+ if (allow_inline) {
+ // The profile data is only partly attributable to this caller,
+ // scale back the call site information.
+ float past_uses = jvms->method()->scale_count(site_count, prof_factor);
+ // This is the number of times we expect the call code to be used.
+ float expected_uses = past_uses;
+
+ // Try inlining a bytecoded method:
+ if (!call_is_virtual) {
+ InlineTree* ilt;
+ if (UseOldInlining) {
+ ilt = InlineTree::find_subtree_from_root(this->ilt(), jvms->caller(), jvms->method());
+ } else {
+ // Make a disembodied, stateless ILT.
+ // TO DO: When UseOldInlining is removed, copy the ILT code elsewhere.
+ float site_invoke_ratio = prof_factor;
+ // Note: ilt is for the root of this parse, not the present call site.
+ ilt = new InlineTree(this, jvms->method(), jvms->caller(), site_invoke_ratio);
+ }
+ WarmCallInfo scratch_ci;
+ if (!UseOldInlining)
+ scratch_ci.init(jvms, call_method, profile, prof_factor);
+ WarmCallInfo* ci = ilt->ok_to_inline(call_method, jvms, profile, &scratch_ci);
+ assert(ci != &scratch_ci, "do not let this pointer escape");
+ bool allow_inline = (ci != NULL && !ci->is_cold());
+ bool require_inline = (allow_inline && ci->is_hot());
+
+ if (allow_inline) {
+ CallGenerator* cg = CallGenerator::for_inline(call_method, expected_uses);
+ if (cg == NULL) {
+ // Fall through.
+ } else if (require_inline || !InlineWarmCalls) {
+ return cg;
+ } else {
+ CallGenerator* cold_cg = call_generator(call_method, vtable_index, call_is_virtual, jvms, false, prof_factor);
+ return CallGenerator::for_warm_call(ci, cold_cg, cg);
+ }
+ }
+ }
+
+ // Try using the type profile.
+ if (call_is_virtual && site_count > 0 && receiver_count > 0) {
+ // The major receiver's count >= TypeProfileMajorReceiverPercent of site_count.
+ bool have_major_receiver = (100.*profile.receiver_prob(0) >= (float)TypeProfileMajorReceiverPercent);
+ ciMethod* receiver_method = NULL;
+ if (have_major_receiver || profile.morphism() == 1 ||
+ (profile.morphism() == 2 && UseBimorphicInlining)) {
+ // receiver_method = profile.method();
+ // Profiles do not suggest methods now. Look it up in the major receiver.
+ receiver_method = call_method->resolve_invoke(jvms->method()->holder(),
+ profile.receiver(0));
+ }
+ if (receiver_method != NULL) {
+ // The single majority receiver sufficiently outweighs the minority.
+ CallGenerator* hit_cg = this->call_generator(receiver_method,
+ vtable_index, !call_is_virtual, jvms, allow_inline, prof_factor);
+ if (hit_cg != NULL) {
+ // Look up second receiver.
+ CallGenerator* next_hit_cg = NULL;
+ ciMethod* next_receiver_method = NULL;
+ if (profile.morphism() == 2 && UseBimorphicInlining) {
+ next_receiver_method = call_method->resolve_invoke(jvms->method()->holder(),
+ profile.receiver(1));
+ if (next_receiver_method != NULL) {
+ next_hit_cg = this->call_generator(next_receiver_method,
+ vtable_index, !call_is_virtual, jvms,
+ allow_inline, prof_factor);
+ if (next_hit_cg != NULL && !next_hit_cg->is_inline() &&
+ have_major_receiver && UseOnlyInlinedBimorphic) {
+ // Skip if we can't inline second receiver's method
+ next_hit_cg = NULL;
+ }
+ }
+ }
+ CallGenerator* miss_cg;
+ if (( profile.morphism() == 1 ||
+ (profile.morphism() == 2 && next_hit_cg != NULL) ) &&
+
+ !too_many_traps(Deoptimization::Reason_class_check)
+
+ // Check only total number of traps per method to allow
+ // the transition from monomorphic to bimorphic case between
+ // compilations without falling into virtual call.
+ // A monomorphic case may have the class_check trap flag is set
+ // due to the time gap between the uncommon trap processing
+ // when flags are set in MDO and the call site bytecode execution
+ // in Interpreter when MDO counters are updated.
+ // There was also class_check trap in monomorphic case due to
+ // the bug 6225440.
+
+ ) {
+ // Generate uncommon trap for class check failure path
+ // in case of monomorphic or bimorphic virtual call site.
+ miss_cg = CallGenerator::for_uncommon_trap(call_method,
+ Deoptimization::Reason_class_check,
+ Deoptimization::Action_maybe_recompile);
+ } else {
+ // Generate virtual call for class check failure path
+ // in case of polymorphic virtual call site.
+ miss_cg = CallGenerator::for_virtual_call(call_method, vtable_index);
+ }
+ if (miss_cg != NULL) {
+ if (next_hit_cg != NULL) {
+ NOT_PRODUCT(trace_type_profile(jvms->method(), jvms->depth(), jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1)));
+ // We don't need to record dependency on a receiver here and below.
+ // Whenever we inline, the dependency is added by Parse::Parse().
+ miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX);
+ }
+ if (miss_cg != NULL) {
+ NOT_PRODUCT(trace_type_profile(jvms->method(), jvms->depth(), jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count));
+ cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
+ if (cg != NULL) return cg;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // There was no special inlining tactic, or it bailed out.
+ // Use a more generic tactic, like a simple call.
+ if (call_is_virtual) {
+ return CallGenerator::for_virtual_call(call_method, vtable_index);
+ } else {
+ // Class Hierarchy Analysis or Type Profile reveals a unique target,
+ // or it is a static or special call.
+ return CallGenerator::for_direct_call(call_method);
+ }
+}
+
+
+// uncommon-trap call-sites where callee is unloaded, uninitialized or will not link
+bool Parse::can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass* klass) {
+ // Additional inputs to consider...
+ // bc = bc()
+ // caller = method()
+ // iter().get_method_holder_index()
+ assert( dest_method->is_loaded(), "ciTypeFlow should not let us get here" );
+ // Interface classes can be loaded & linked and never get around to
+ // being initialized. Uncommon-trap for not-initialized static or
+ // v-calls. Let interface calls happen.
+ ciInstanceKlass* holder_klass = dest_method->holder();
+ if (!holder_klass->is_initialized() &&
+ !holder_klass->is_interface()) {
+ uncommon_trap(Deoptimization::Reason_uninitialized,
+ Deoptimization::Action_reinterpret,
+ holder_klass);
+ return true;
+ }
+
+ assert(dest_method->will_link(method()->holder(), klass, bc()), "dest_method: typeflow responsibility");
+ return false;
+}
+
+
+//------------------------------do_call----------------------------------------
+// Handle your basic call. Inline if we can & want to, else just setup call.
+void Parse::do_call() {
+ // It's likely we are going to add debug info soon.
+ // Also, if we inline a guy who eventually needs debug info for this JVMS,
+ // our contribution to it is cleaned up right here.
+ kill_dead_locals();
+
+ // Set frequently used booleans
+ bool is_virtual = bc() == Bytecodes::_invokevirtual;
+ bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface;
+ bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial;
+
+ // Find target being called
+ bool will_link;
+ ciMethod* dest_method = iter().get_method(will_link);
+ ciInstanceKlass* holder_klass = dest_method->holder();
+ ciKlass* holder = iter().get_declared_method_holder();
+ ciInstanceKlass* klass = ciEnv::get_instance_klass_for_declared_method_holder(holder);
+
+ int nargs = dest_method->arg_size();
+
+ // uncommon-trap when callee is unloaded, uninitialized or will not link
+ // bailout when too many arguments for register representation
+ if (!will_link || can_not_compile_call_site(dest_method, klass)) {
+#ifndef PRODUCT
+ if (PrintOpto && (Verbose || WizardMode)) {
+ method()->print_name(); tty->print_cr(" can not compile call at bci %d to:", bci());
+ dest_method->print_name(); tty->cr();
+ }
+#endif
+ return;
+ }
+ assert(holder_klass->is_loaded(), "");
+ assert(dest_method->is_static() == !has_receiver, "must match bc");
+ // Note: this takes into account invokeinterface of methods declared in java/lang/Object,
+ // which should be invokevirtuals but according to the VM spec may be invokeinterfaces
+ assert(holder_klass->is_interface() || holder_klass->super() == NULL || (bc() != Bytecodes::_invokeinterface), "must match bc");
+ // Note: In the absence of miranda methods, an abstract class K can perform
+ // an invokevirtual directly on an interface method I.m if K implements I.
+
+ // ---------------------
+ // Does Class Hierarchy Analysis reveal only a single target of a v-call?
+ // Then we may inline or make a static call, but become dependent on there being only 1 target.
+ // Does the call-site type profile reveal only one receiver?
+ // Then we may introduce a run-time check and inline on the path where it succeeds.
+ // The other path may uncommon_trap, check for another receiver, or do a v-call.
+
+ // Choose call strategy.
+ bool call_is_virtual = is_virtual_or_interface;
+ int vtable_index = methodOopDesc::invalid_vtable_index;
+ ciMethod* call_method = dest_method;
+
+ // Try to get the most accurate receiver type
+ if (is_virtual_or_interface) {
+ Node* receiver_node = stack(sp() - nargs);
+ const TypeOopPtr* receiver_type = _gvn.type(receiver_node)->isa_oopptr();
+ ciMethod* optimized_virtual_method = optimize_inlining(method(), bci(), klass, dest_method, receiver_type);
+
+ // Have the call been sufficiently improved such that it is no longer a virtual?
+ if (optimized_virtual_method != NULL) {
+ call_method = optimized_virtual_method;
+ call_is_virtual = false;
+ } else if (!UseInlineCaches && is_virtual && call_method->is_loaded()) {
+ // We can make a vtable call at this site
+ vtable_index = call_method->resolve_vtable_index(method()->holder(), klass);
+ }
+ }
+
+ // Note: It's OK to try to inline a virtual call.
+ // The call generator will not attempt to inline a polymorphic call
+ // unless it knows how to optimize the receiver dispatch.
+ bool try_inline = (C->do_inlining() || InlineAccessors);
+
+ // ---------------------
+ inc_sp(- nargs); // Temporarily pop args for JVM state of call
+ JVMState* jvms = sync_jvms();
+
+ // ---------------------
+ // Decide call tactic.
+ // This call checks with CHA, the interpreter profile, intrinsics table, etc.
+ // It decides whether inlining is desirable or not.
+ CallGenerator* cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+
+ // ---------------------
+ // Round double arguments before call
+ round_double_arguments(dest_method);
+
+#ifndef PRODUCT
+ // bump global counters for calls
+ count_compiled_calls(false/*at_method_entry*/, cg->is_inline());
+
+ // Record first part of parsing work for this call
+ parse_histogram()->record_change();
+#endif // not PRODUCT
+
+ assert(jvms == this->jvms(), "still operating on the right JVMS");
+ assert(jvms_in_sync(), "jvms must carry full info into CG");
+
+ // save across call, for a subsequent cast_not_null.
+ Node* receiver = has_receiver ? argument(0) : NULL;
+
+ // Bump method data counters (We profile *before* the call is made
+ // because exceptions don't return to the call site.)
+ profile_call(receiver);
+
+ JVMState* new_jvms;
+ if ((new_jvms = cg->generate(jvms)) == NULL) {
+ // When inlining attempt fails (e.g., too many arguments),
+ // it may contaminate the current compile state, making it
+ // impossible to pull back and try again. Once we call
+ // cg->generate(), we are committed. If it fails, the whole
+ // compilation task is compromised.
+ if (failing()) return;
+#ifndef PRODUCT
+ if (PrintOpto || PrintOptoInlining || PrintInlining) {
+ // Only one fall-back, so if an intrinsic fails, ignore any bytecodes.
+ if (cg->is_intrinsic() && call_method->code_size() > 0) {
+ tty->print("Bailed out of intrinsic, will not inline: ");
+ call_method->print_name(); tty->cr();
+ }
+ }
+#endif
+ // This can happen if a library intrinsic is available, but refuses
+ // the call site, perhaps because it did not match a pattern the
+ // intrinsic was expecting to optimize. The fallback position is
+ // to call out-of-line.
+ try_inline = false; // Inline tactic bailed out.
+ cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+ if ((new_jvms = cg->generate(jvms)) == NULL) {
+ guarantee(failing(), "call failed to generate: calls should work");
+ return;
+ }
+ }
+
+ if (cg->is_inline()) {
+ C->env()->notice_inlined_method(call_method);
+ }
+
+ // Reset parser state from [new_]jvms, which now carries results of the call.
+ // Return value (if any) is already pushed on the stack by the cg.
+ add_exception_states_from(new_jvms);
+ if (new_jvms->map()->control() == top()) {
+ stop_and_kill_map();
+ } else {
+ assert(new_jvms->same_calls_as(jvms), "method/bci left unchanged");
+ set_jvms(new_jvms);
+ }
+
+ if (!stopped()) {
+ // This was some sort of virtual call, which did a null check for us.
+ // Now we can assert receiver-not-null, on the normal return path.
+ if (receiver != NULL && cg->is_virtual()) {
+ Node* cast = cast_not_null(receiver);
+ // %%% assert(receiver == cast, "should already have cast the receiver");
+ }
+
+ // Round double result after a call from strict to non-strict code
+ round_double_result(dest_method);
+
+ // If the return type of the method is not loaded, assert that the
+ // value we got is a null. Otherwise, we need to recompile.
+ if (!dest_method->return_type()->is_loaded()) {
+#ifndef PRODUCT
+ if (PrintOpto && (Verbose || WizardMode)) {
+ method()->print_name(); tty->print_cr(" asserting nullness of result at bci: %d", bci());
+ dest_method->print_name(); tty->cr();
+ }
+#endif
+ if (C->log() != NULL) {
+ C->log()->elem("assert_null reason='return' klass='%d'",
+ C->log()->identify(dest_method->return_type()));
+ }
+ // If there is going to be a trap, put it at the next bytecode:
+ set_bci(iter().next_bci());
+ do_null_assert(peek(), T_OBJECT);
+ set_bci(iter().cur_bci()); // put it back
+ }
+ }
+
+ // Restart record of parsing work after possible inlining of call
+#ifndef PRODUCT
+ parse_histogram()->set_initial_state(bc());
+#endif
+}
+
+//---------------------------catch_call_exceptions-----------------------------
+// Put a Catch and CatchProj nodes behind a just-created call.
+// Send their caught exceptions to the proper handler.
+// This may be used after a call to the rethrow VM stub,
+// when it is needed to process unloaded exception classes.
+void Parse::catch_call_exceptions(ciExceptionHandlerStream& handlers) {
+ // Exceptions are delivered through this channel:
+ Node* i_o = this->i_o();
+
+ // Add a CatchNode.
+ GrowableArray<int>* bcis = new (C->node_arena()) GrowableArray<int>(C->node_arena(), 8, 0, -1);
+ GrowableArray<const Type*>* extypes = new (C->node_arena()) GrowableArray<const Type*>(C->node_arena(), 8, 0, NULL);
+ GrowableArray<int>* saw_unloaded = new (C->node_arena()) GrowableArray<int>(C->node_arena(), 8, 0, 0);
+
+ for (; !handlers.is_done(); handlers.next()) {
+ ciExceptionHandler* h = handlers.handler();
+ int h_bci = h->handler_bci();
+ ciInstanceKlass* h_klass = h->is_catch_all() ? env()->Throwable_klass() : h->catch_klass();
+ // Do not introduce unloaded exception types into the graph:
+ if (!h_klass->is_loaded()) {
+ if (saw_unloaded->contains(h_bci)) {
+ /* We've already seen an unloaded exception with h_bci,
+ so don't duplicate. Duplication will cause the CatchNode to be
+ unnecessarily large. See 4713716. */
+ continue;
+ } else {
+ saw_unloaded->append(h_bci);
+ }
+ }
+ const Type* h_extype = TypeOopPtr::make_from_klass(h_klass);
+ // (We use make_from_klass because it respects UseUniqueSubclasses.)
+ h_extype = h_extype->join(TypeInstPtr::NOTNULL);
+ assert(!h_extype->empty(), "sanity");
+ // Note: It's OK if the BCIs repeat themselves.
+ bcis->append(h_bci);
+ extypes->append(h_extype);
+ }
+
+ int len = bcis->length();
+ CatchNode *cn = new (C, 2) CatchNode(control(), i_o, len+1);
+ Node *catch_ = _gvn.transform(cn);
+
+ // now branch with the exception state to each of the (potential)
+ // handlers
+ for(int i=0; i < len; i++) {
+ // Setup JVM state to enter the handler.
+ PreserveJVMState pjvms(this);
+ // Locals are just copied from before the call.
+ // Get control from the CatchNode.
+ int handler_bci = bcis->at(i);
+ Node* ctrl = _gvn.transform( new (C, 1) CatchProjNode(catch_, i+1,handler_bci));
+ // This handler cannot happen?
+ if (ctrl == top()) continue;
+ set_control(ctrl);
+
+ // Create exception oop
+ const TypeInstPtr* extype = extypes->at(i)->is_instptr();
+ Node *ex_oop = _gvn.transform(new (C, 2) CreateExNode(extypes->at(i), ctrl, i_o));
+
+ // Handle unloaded exception classes.
+ if (saw_unloaded->contains(handler_bci)) {
+ // An unloaded exception type is coming here. Do an uncommon trap.
+#ifndef PRODUCT
+ // We do not expect the same handler bci to take both cold unloaded
+ // and hot loaded exceptions. But, watch for it.
+ if (extype->is_loaded()) {
+ tty->print_cr("Warning: Handler @%d takes mixed loaded/unloaded exceptions in ");
+ method()->print_name(); tty->cr();
+ } else if (PrintOpto && (Verbose || WizardMode)) {
+ tty->print("Bailing out on unloaded exception type ");
+ extype->klass()->print_name();
+ tty->print(" at bci:%d in ", bci());
+ method()->print_name(); tty->cr();
+ }
+#endif
+ // Emit an uncommon trap instead of processing the block.
+ set_bci(handler_bci);
+ push_ex_oop(ex_oop);
+ uncommon_trap(Deoptimization::Reason_unloaded,
+ Deoptimization::Action_reinterpret,
+ extype->klass(), "!loaded exception");
+ set_bci(iter().cur_bci()); // put it back
+ continue;
+ }
+
+ // go to the exception handler
+ if (handler_bci < 0) { // merge with corresponding rethrow node
+ throw_to_exit(make_exception_state(ex_oop));
+ } else { // Else jump to corresponding handle
+ push_ex_oop(ex_oop); // Clear stack and push just the oop.
+ merge_exception(handler_bci);
+ }
+ }
+
+ // The first CatchProj is for the normal return.
+ // (Note: If this is a call to rethrow_Java, this node goes dead.)
+ set_control(_gvn.transform( new (C, 1) CatchProjNode(catch_, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci)));
+}
+
+
+//----------------------------catch_inline_exceptions--------------------------
+// Handle all exceptions thrown by an inlined method or individual bytecode.
+// Common case 1: we have no handler, so all exceptions merge right into
+// the rethrow case.
+// Case 2: we have some handlers, with loaded exception klasses that have
+// no subklasses. We do a Deutsch-Shiffman style type-check on the incoming
+// exception oop and branch to the handler directly.
+// Case 3: We have some handlers with subklasses or are not loaded at
+// compile-time. We have to call the runtime to resolve the exception.
+// So we insert a RethrowCall and all the logic that goes with it.
+void Parse::catch_inline_exceptions(SafePointNode* ex_map) {
+ // Caller is responsible for saving away the map for normal control flow!
+ assert(stopped(), "call set_map(NULL) first");
+ assert(method()->has_exception_handlers(), "don't come here w/o work to do");
+
+ Node* ex_node = saved_ex_oop(ex_map);
+ if (ex_node == top()) {
+ // No action needed.
+ return;
+ }
+ const TypeInstPtr* ex_type = _gvn.type(ex_node)->isa_instptr();
+ NOT_PRODUCT(if (ex_type==NULL) tty->print_cr("*** Exception not InstPtr"));
+ if (ex_type == NULL)
+ ex_type = TypeOopPtr::make_from_klass(env()->Throwable_klass())->is_instptr();
+
+ // determine potential exception handlers
+ ciExceptionHandlerStream handlers(method(), bci(),
+ ex_type->klass()->as_instance_klass(),
+ ex_type->klass_is_exact());
+
+ // Start executing from the given throw state. (Keep its stack, for now.)
+ // Get the exception oop as known at compile time.
+ ex_node = use_exception_state(ex_map);
+
+ // Get the exception oop klass from its header
+ Node* ex_klass_node = NULL;
+ if (has_ex_handler() && !ex_type->klass_is_exact()) {
+ Node* p = basic_plus_adr( ex_node, ex_node, oopDesc::klass_offset_in_bytes());
+ ex_klass_node = _gvn.transform(new (C, 3) LoadKlassNode(NULL, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
+
+ // Compute the exception klass a little more cleverly.
+ // Obvious solution is to simple do a LoadKlass from the 'ex_node'.
+ // However, if the ex_node is a PhiNode, I'm going to do a LoadKlass for
+ // each arm of the Phi. If I know something clever about the exceptions
+ // I'm loading the class from, I can replace the LoadKlass with the
+ // klass constant for the exception oop.
+ if( ex_node->is_Phi() ) {
+ ex_klass_node = new (C, ex_node->req()) PhiNode( ex_node->in(0), TypeKlassPtr::OBJECT );
+ for( uint i = 1; i < ex_node->req(); i++ ) {
+ Node* p = basic_plus_adr( ex_node->in(i), ex_node->in(i), oopDesc::klass_offset_in_bytes() );
+ Node* k = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
+ ex_klass_node->init_req( i, k );
+ }
+ _gvn.set_type(ex_klass_node, TypeKlassPtr::OBJECT);
+
+ }
+ }
+
+ // Scan the exception table for applicable handlers.
+ // If none, we can call rethrow() and be done!
+ // If precise (loaded with no subklasses), insert a D.S. style
+ // pointer compare to the correct handler and loop back.
+ // If imprecise, switch to the Rethrow VM-call style handling.
+
+ int remaining = handlers.count_remaining();
+
+ // iterate through all entries sequentially
+ for (;!handlers.is_done(); handlers.next()) {
+ // Do nothing if turned off
+ if( !DeutschShiffmanExceptions ) break;
+ ciExceptionHandler* handler = handlers.handler();
+
+ if (handler->is_rethrow()) {
+ // If we fell off the end of the table without finding an imprecise
+ // exception klass (and without finding a generic handler) then we
+ // know this exception is not handled in this method. We just rethrow
+ // the exception into the caller.
+ throw_to_exit(make_exception_state(ex_node));
+ return;
+ }
+
+ // exception handler bci range covers throw_bci => investigate further
+ int handler_bci = handler->handler_bci();
+
+ if (remaining == 1) {
+ push_ex_oop(ex_node); // Push exception oop for handler
+#ifndef PRODUCT
+ if (PrintOpto && WizardMode) {
+ tty->print_cr(" Catching every inline exception bci:%d -> handler_bci:%d", bci(), handler_bci);
+ }
+#endif
+ merge_exception(handler_bci); // jump to handler
+ return; // No more handling to be done here!
+ }
+
+ // %%% The following logic replicates make_from_klass_unique.
+ // TO DO: Replace by a subroutine call. Then generalize
+ // the type check, as noted in the next "%%%" comment.
+
+ ciInstanceKlass* klass = handler->catch_klass();
+ if (UseUniqueSubclasses) {
+ // (We use make_from_klass because it respects UseUniqueSubclasses.)
+ const TypeOopPtr* tp = TypeOopPtr::make_from_klass(klass);
+ klass = tp->klass()->as_instance_klass();
+ }
+
+ // Get the handler's klass
+ if (!klass->is_loaded()) // klass is not loaded?
+ break; // Must call Rethrow!
+ if (klass->is_interface()) // should not happen, but...
+ break; // bail out
+ // See if the loaded exception klass has no subtypes
+ if (klass->has_subklass())
+ break; // Cannot easily do precise test ==> Rethrow
+
+ // %%% Now that subclass checking is very fast, we need to rewrite
+ // this section and remove the option "DeutschShiffmanExceptions".
+ // The exception processing chain should be a normal typecase pattern,
+ // with a bailout to the interpreter only in the case of unloaded
+ // classes. (The bailout should mark the method non-entrant.)
+ // This rewrite should be placed in GraphKit::, not Parse::.
+
+ // Add a dependence; if any subclass added we need to recompile
+ // %%% should use stronger assert_unique_concrete_subtype instead
+ if (!klass->is_final()) {
+ C->dependencies()->assert_leaf_type(klass);
+ }
+
+ // Implement precise test
+ const TypeKlassPtr *tk = TypeKlassPtr::make(klass);
+ Node* con = _gvn.makecon(tk);
+ Node* cmp = _gvn.transform( new (C, 3) CmpPNode(ex_klass_node, con) );
+ Node* bol = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
+ { BuildCutout unless(this, bol, PROB_LIKELY(0.7f));
+ const TypeInstPtr* tinst = TypeInstPtr::make_exact(TypePtr::NotNull, klass);
+ Node* ex_oop = _gvn.transform(new (C, 2) CheckCastPPNode(control(), ex_node, tinst));
+ push_ex_oop(ex_oop); // Push exception oop for handler
+#ifndef PRODUCT
+ if (PrintOpto && WizardMode) {
+ tty->print(" Catching inline exception bci:%d -> handler_bci:%d -- ", bci(), handler_bci);
+ klass->print_name();
+ tty->cr();
+ }
+#endif
+ merge_exception(handler_bci);
+ }
+
+ // Come here if exception does not match handler.
+ // Carry on with more handler checks.
+ --remaining;
+ }
+
+ assert(!stopped(), "you should return if you finish the chain");
+
+ if (remaining == 1) {
+ // Further checks do not matter.
+ }
+
+ if (can_rerun_bytecode()) {
+ // Do not push_ex_oop here!
+ // Re-executing the bytecode will reproduce the throwing condition.
+ bool must_throw = true;
+ uncommon_trap(Deoptimization::Reason_unhandled,
+ Deoptimization::Action_none,
+ (ciKlass*)NULL, (const char*)NULL, // default args
+ must_throw);
+ return;
+ }
+
+ // Oops, need to call into the VM to resolve the klasses at runtime.
+ // Note: This call must not deoptimize, since it is not a real at this bci!
+ kill_dead_locals();
+
+ make_runtime_call(RC_NO_LEAF | RC_MUST_THROW,
+ OptoRuntime::rethrow_Type(),
+ OptoRuntime::rethrow_stub(),
+ NULL, NULL,
+ ex_node);
+
+ // Rethrow is a pure call, no side effects, only a result.
+ // The result cannot be allocated, so we use I_O
+
+ // Catch exceptions from the rethrow
+ catch_call_exceptions(handlers);
+}
+
+
+// (Note: Moved add_debug_info into GraphKit::add_safepoint_edges.)
+
+
+#ifndef PRODUCT
+void Parse::count_compiled_calls(bool at_method_entry, bool is_inline) {
+ if( CountCompiledCalls ) {
+ if( at_method_entry ) {
+ // bump invocation counter if top method (for statistics)
+ if (CountCompiledCalls && depth() == 1) {
+ const TypeInstPtr* addr_type = TypeInstPtr::make(method());
+ Node* adr1 = makecon(addr_type);
+ Node* adr2 = basic_plus_adr(adr1, adr1, in_bytes(methodOopDesc::compiled_invocation_counter_offset()));
+ increment_counter(adr2);
+ }
+ } else if (is_inline) {
+ switch (bc()) {
+ case Bytecodes::_invokevirtual: increment_counter(SharedRuntime::nof_inlined_calls_addr()); break;
+ case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_inlined_interface_calls_addr()); break;
+ case Bytecodes::_invokestatic:
+ case Bytecodes::_invokespecial: increment_counter(SharedRuntime::nof_inlined_static_calls_addr()); break;
+ default: fatal("unexpected call bytecode");
+ }
+ } else {
+ switch (bc()) {
+ case Bytecodes::_invokevirtual: increment_counter(SharedRuntime::nof_normal_calls_addr()); break;
+ case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_interface_calls_addr()); break;
+ case Bytecodes::_invokestatic:
+ case Bytecodes::_invokespecial: increment_counter(SharedRuntime::nof_static_calls_addr()); break;
+ default: fatal("unexpected call bytecode");
+ }
+ }
+ }
+}
+#endif //PRODUCT
+
+
+// Identify possible target method and inlining style
+ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+ ciMethod *dest_method, const TypeOopPtr* receiver_type) {
+ // only use for virtual or interface calls
+
+ // If it is obviously final, do not bother to call find_monomorphic_target,
+ // because the class hierarchy checks are not needed, and may fail due to
+ // incompletely loaded classes. Since we do our own class loading checks
+ // in this module, we may confidently bind to any method.
+ if (dest_method->can_be_statically_bound()) {
+ return dest_method;
+ }
+
+ // Attempt to improve the receiver
+ bool actual_receiver_is_exact = false;
+ ciInstanceKlass* actual_receiver = klass;
+ if (receiver_type != NULL) {
+ // Array methods are all inherited from Object, and are monomorphic.
+ if (receiver_type->isa_aryptr() &&
+ dest_method->holder() == env()->Object_klass()) {
+ return dest_method;
+ }
+
+ // All other interesting cases are instance klasses.
+ if (!receiver_type->isa_instptr()) {
+ return NULL;
+ }
+
+ ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
+ if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
+ (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
+ // ikl is a same or better type than the original actual_receiver,
+ // e.g. static receiver from bytecodes.
+ actual_receiver = ikl;
+ // Is the actual_receiver exact?
+ actual_receiver_is_exact = receiver_type->klass_is_exact();
+ }
+ }
+
+ ciInstanceKlass* calling_klass = caller->holder();
+ ciMethod* cha_monomorphic_target = dest_method->find_monomorphic_target(calling_klass, klass, actual_receiver);
+ if (cha_monomorphic_target != NULL) {
+ assert(!cha_monomorphic_target->is_abstract(), "");
+ // Look at the method-receiver type. Does it add "too much information"?
+ ciKlass* mr_klass = cha_monomorphic_target->holder();
+ const Type* mr_type = TypeInstPtr::make(TypePtr::BotPTR, mr_klass);
+ if (receiver_type == NULL || !receiver_type->higher_equal(mr_type)) {
+ // Calling this method would include an implicit cast to its holder.
+ // %%% Not yet implemented. Would throw minor asserts at present.
+ // %%% The most common wins are already gained by +UseUniqueSubclasses.
+ // To fix, put the higher_equal check at the call of this routine,
+ // and add a CheckCastPP to the receiver.
+ if (TraceDependencies) {
+ tty->print_cr("found unique CHA method, but could not cast up");
+ tty->print(" method = ");
+ cha_monomorphic_target->print();
+ tty->cr();
+ }
+ if (C->log() != NULL) {
+ C->log()->elem("missed_CHA_opportunity klass='%d' method='%d'",
+ C->log()->identify(klass),
+ C->log()->identify(cha_monomorphic_target));
+ }
+ cha_monomorphic_target = NULL;
+ }
+ }
+ if (cha_monomorphic_target != NULL) {
+ // Hardwiring a virtual.
+ // If we inlined because CHA revealed only a single target method,
+ // then we are dependent on that target method not getting overridden
+ // by dynamic class loading. Be sure to test the "static" receiver
+ // dest_method here, as opposed to the actual receiver, which may
+ // falsely lead us to believe that the receiver is final or private.
+ C->dependencies()->assert_unique_concrete_method(actual_receiver, cha_monomorphic_target);
+ return cha_monomorphic_target;
+ }
+
+ // If the type is exact, we can still bind the method w/o a vcall.
+ // (This case comes after CHA so we can see how much extra work it does.)
+ if (actual_receiver_is_exact) {
+ // In case of evolution, there is a dependence on every inlined method, since each
+ // such method can be changed when its class is redefined.
+ ciMethod* exact_method = dest_method->resolve_invoke(calling_klass, actual_receiver);
+ if (exact_method != NULL) {
+#ifndef PRODUCT
+ if (PrintOpto) {
+ tty->print(" Calling method via exact type @%d --- ", bci);
+ exact_method->print_name();
+ tty->cr();
+ }
+#endif
+ return exact_method;
+ }
+ }
+
+ return NULL;
+}
diff --git a/src/share/vm/opto/domgraph.cpp b/src/share/vm/opto/domgraph.cpp
new file mode 100644
index 000000000..2ef02fd0c
--- /dev/null
+++ b/src/share/vm/opto/domgraph.cpp
@@ -0,0 +1,664 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_domgraph.cpp.incl"
+
+//------------------------------Tarjan-----------------------------------------
+// A data structure that holds all the information needed to find dominators.
+struct Tarjan {
+ Block *_block; // Basic block for this info
+
+ uint _semi; // Semi-dominators
+ uint _size; // Used for faster LINK and EVAL
+ Tarjan *_parent; // Parent in DFS
+ Tarjan *_label; // Used for LINK and EVAL
+ Tarjan *_ancestor; // Used for LINK and EVAL
+ Tarjan *_child; // Used for faster LINK and EVAL
+ Tarjan *_dom; // Parent in dominator tree (immediate dom)
+ Tarjan *_bucket; // Set of vertices with given semidominator
+
+ Tarjan *_dom_child; // Child in dominator tree
+ Tarjan *_dom_next; // Next in dominator tree
+
+ // Fast union-find work
+ void COMPRESS();
+ Tarjan *EVAL(void);
+ void LINK( Tarjan *w, Tarjan *tarjan0 );
+
+ void setdepth( uint size );
+
+};
+
+//------------------------------Dominator--------------------------------------
+// Compute the dominator tree of the CFG. The CFG must already have been
+// constructed. This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
+void PhaseCFG::Dominators( ) {
+ // Pre-grow the blocks array, prior to the ResourceMark kicking in
+ _blocks.map(_num_blocks,0);
+
+ ResourceMark rm;
+ // Setup mappings from my Graph to Tarjan's stuff and back
+ // Note: Tarjan uses 1-based arrays
+ Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1);
+
+ // Tarjan's algorithm, almost verbatim:
+ // Step 1:
+ _rpo_ctr = _num_blocks;
+ uint dfsnum = DFS( tarjan );
+ if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops!
+ // If the returned dfsnum does not match the number of blocks, then we
+ // must have some unreachable loops. These can be made at any time by
+ // IterGVN. They are cleaned up by CCP or the loop opts, but the last
+ // IterGVN can always make more that are not cleaned up. Highly unlikely
+ // except in ZKM.jar, where endless irreducible loops cause the loop opts
+ // to not get run.
+ //
+ // Having found unreachable loops, we have made a bad RPO _block layout.
+ // We can re-run the above DFS pass with the correct number of blocks,
+ // and hack the Tarjan algorithm below to be robust in the presence of
+ // such dead loops (as was done for the NTarjan code farther below).
+ // Since this situation is so unlikely, instead I've decided to bail out.
+ // CNC 7/24/2001
+ C->record_method_not_compilable("unreachable loop");
+ return;
+ }
+ _blocks._cnt = _num_blocks;
+
+ // Tarjan is using 1-based arrays, so these are some initialize flags
+ tarjan[0]._size = tarjan[0]._semi = 0;
+ tarjan[0]._label = &tarjan[0];
+
+ uint i;
+ for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order
+ Tarjan *w = &tarjan[i]; // Get vertex from DFS
+
+ // Step 2:
+ Node *whead = w->_block->head();
+ for( uint j=1; j < whead->req(); j++ ) {
+ Block *b = _bbs[whead->in(j)->_idx];
+ Tarjan *vx = &tarjan[b->_pre_order];
+ Tarjan *u = vx->EVAL();
+ if( u->_semi < w->_semi )
+ w->_semi = u->_semi;
+ }
+
+ // w is added to a bucket here, and only here.
+ // Thus w is in at most one bucket and the sum of all bucket sizes is O(n).
+ // Thus bucket can be a linked list.
+ // Thus we do not need a small integer name for each Block.
+ w->_bucket = tarjan[w->_semi]._bucket;
+ tarjan[w->_semi]._bucket = w;
+
+ w->_parent->LINK( w, &tarjan[0] );
+
+ // Step 3:
+ for( Tarjan *vx = w->_parent->_bucket; vx; vx = vx->_bucket ) {
+ Tarjan *u = vx->EVAL();
+ vx->_dom = (u->_semi < vx->_semi) ? u : w->_parent;
+ }
+ }
+
+ // Step 4:
+ for( i=2; i <= _num_blocks; i++ ) {
+ Tarjan *w = &tarjan[i];
+ if( w->_dom != &tarjan[w->_semi] )
+ w->_dom = w->_dom->_dom;
+ w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
+ }
+ // No immediate dominator for the root
+ Tarjan *w = &tarjan[_broot->_pre_order];
+ w->_dom = NULL;
+ w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
+
+ // Convert the dominator tree array into my kind of graph
+ for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices
+ Tarjan *t = &tarjan[i]; // Handy access
+ Tarjan *tdom = t->_dom; // Handy access to immediate dominator
+ if( tdom ) { // Root has no immediate dominator
+ t->_block->_idom = tdom->_block; // Set immediate dominator
+ t->_dom_next = tdom->_dom_child; // Make me a sibling of parent's child
+ tdom->_dom_child = t; // Make me a child of my parent
+ } else
+ t->_block->_idom = NULL; // Root
+ }
+ w->setdepth( _num_blocks+1 ); // Set depth in dominator tree
+
+}
+
+//----------------------------Block_Stack--------------------------------------
+class Block_Stack {
+ private:
+ struct Block_Descr {
+ Block *block; // Block
+ int index; // Index of block's successor pushed on stack
+ int freq_idx; // Index of block's most frequent successor
+ };
+ Block_Descr *_stack_top;
+ Block_Descr *_stack_max;
+ Block_Descr *_stack;
+ Tarjan *_tarjan;
+ uint most_frequent_successor( Block *b );
+ public:
+ Block_Stack(Tarjan *tarjan, int size) : _tarjan(tarjan) {
+ _stack = NEW_RESOURCE_ARRAY(Block_Descr, size);
+ _stack_max = _stack + size;
+ _stack_top = _stack - 1; // stack is empty
+ }
+ void push(uint pre_order, Block *b) {
+ Tarjan *t = &_tarjan[pre_order]; // Fast local access
+ b->_pre_order = pre_order; // Flag as visited
+ t->_block = b; // Save actual block
+ t->_semi = pre_order; // Block to DFS map
+ t->_label = t; // DFS to vertex map
+ t->_ancestor = NULL; // Fast LINK & EVAL setup
+ t->_child = &_tarjan[0]; // Sentenial
+ t->_size = 1;
+ t->_bucket = NULL;
+ if (pre_order == 1)
+ t->_parent = NULL; // first block doesn't have parent
+ else {
+ // Save parent (currernt top block on stack) in DFS
+ t->_parent = &_tarjan[_stack_top->block->_pre_order];
+ }
+ // Now put this block on stack
+ ++_stack_top;
+ assert(_stack_top < _stack_max, ""); // assert if stack have to grow
+ _stack_top->block = b;
+ _stack_top->index = -1;
+ // Find the index into b->succs[] array of the most frequent successor.
+ _stack_top->freq_idx = most_frequent_successor(b); // freq_idx >= 0
+ }
+ Block* pop() { Block* b = _stack_top->block; _stack_top--; return b; }
+ bool is_nonempty() { return (_stack_top >= _stack); }
+ bool last_successor() { return (_stack_top->index == _stack_top->freq_idx); }
+ Block* next_successor() {
+ int i = _stack_top->index;
+ i++;
+ if (i == _stack_top->freq_idx) i++;
+ if (i >= (int)(_stack_top->block->_num_succs)) {
+ i = _stack_top->freq_idx; // process most frequent successor last
+ }
+ _stack_top->index = i;
+ return _stack_top->block->_succs[ i ];
+ }
+};
+
+//-------------------------most_frequent_successor-----------------------------
+// Find the index into the b->succs[] array of the most frequent successor.
+uint Block_Stack::most_frequent_successor( Block *b ) {
+ uint freq_idx = 0;
+ int eidx = b->end_idx();
+ Node *n = b->_nodes[eidx];
+ int op = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : n->Opcode();
+ switch( op ) {
+ case Op_CountedLoopEnd:
+ case Op_If: { // Split frequency amongst children
+ float prob = n->as_MachIf()->_prob;
+ // Is succ[0] the TRUE branch or the FALSE branch?
+ if( b->_nodes[eidx+1]->Opcode() == Op_IfFalse )
+ prob = 1.0f - prob;
+ freq_idx = prob < PROB_FAIR; // freq=1 for succ[0] < 0.5 prob
+ break;
+ }
+ case Op_Catch: // Split frequency amongst children
+ for( freq_idx = 0; freq_idx < b->_num_succs; freq_idx++ )
+ if( b->_nodes[eidx+1+freq_idx]->as_CatchProj()->_con == CatchProjNode::fall_through_index )
+ break;
+ // Handle case of no fall-thru (e.g., check-cast MUST throw an exception)
+ if( freq_idx == b->_num_succs ) freq_idx = 0;
+ break;
+ // Currently there is no support for finding out the most
+ // frequent successor for jumps, so lets just make it the first one
+ case Op_Jump:
+ case Op_Root:
+ case Op_Goto:
+ case Op_NeverBranch:
+ freq_idx = 0; // fall thru
+ break;
+ case Op_TailCall:
+ case Op_TailJump:
+ case Op_Return:
+ case Op_Halt:
+ case Op_Rethrow:
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ return freq_idx;
+}
+
+//------------------------------DFS--------------------------------------------
+// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
+// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
+uint PhaseCFG::DFS( Tarjan *tarjan ) {
+ Block *b = _broot;
+ uint pre_order = 1;
+ // Allocate stack of size _num_blocks+1 to avoid frequent realloc
+ Block_Stack bstack(tarjan, _num_blocks+1);
+
+ // Push on stack the state for the first block
+ bstack.push(pre_order, b);
+ ++pre_order;
+
+ while (bstack.is_nonempty()) {
+ if (!bstack.last_successor()) {
+ // Walk over all successors in pre-order (DFS).
+ Block *s = bstack.next_successor();
+ if (s->_pre_order == 0) { // Check for no-pre-order, not-visited
+ // Push on stack the state of successor
+ bstack.push(pre_order, s);
+ ++pre_order;
+ }
+ }
+ else {
+ // Build a reverse post-order in the CFG _blocks array
+ Block *stack_top = bstack.pop();
+ stack_top->_rpo = --_rpo_ctr;
+ _blocks.map(stack_top->_rpo, stack_top);
+ }
+ }
+ return pre_order;
+}
+
+//------------------------------COMPRESS---------------------------------------
+void Tarjan::COMPRESS()
+{
+ assert( _ancestor != 0, "" );
+ if( _ancestor->_ancestor != 0 ) {
+ _ancestor->COMPRESS( );
+ if( _ancestor->_label->_semi < _label->_semi )
+ _label = _ancestor->_label;
+ _ancestor = _ancestor->_ancestor;
+ }
+}
+
+//------------------------------EVAL-------------------------------------------
+Tarjan *Tarjan::EVAL() {
+ if( !_ancestor ) return _label;
+ COMPRESS();
+ return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
+}
+
+//------------------------------LINK-------------------------------------------
+void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
+ Tarjan *s = w;
+ while( w->_label->_semi < s->_child->_label->_semi ) {
+ if( s->_size + s->_child->_child->_size >= (s->_child->_size << 1) ) {
+ s->_child->_ancestor = s;
+ s->_child = s->_child->_child;
+ } else {
+ s->_child->_size = s->_size;
+ s = s->_ancestor = s->_child;
+ }
+ }
+ s->_label = w->_label;
+ _size += w->_size;
+ if( _size < (w->_size << 1) ) {
+ Tarjan *tmp = s; s = _child; _child = tmp;
+ }
+ while( s != tarjan0 ) {
+ s->_ancestor = this;
+ s = s->_child;
+ }
+}
+
+//------------------------------setdepth---------------------------------------
+void Tarjan::setdepth( uint stack_size ) {
+ Tarjan **top = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
+ Tarjan **next = top;
+ Tarjan **last;
+ uint depth = 0;
+ *top = this;
+ ++top;
+ do {
+ // next level
+ ++depth;
+ last = top;
+ do {
+ // Set current depth for all tarjans on this level
+ Tarjan *t = *next; // next tarjan from stack
+ ++next;
+ do {
+ t->_block->_dom_depth = depth; // Set depth in dominator tree
+ Tarjan *dom_child = t->_dom_child;
+ t = t->_dom_next; // next tarjan
+ if (dom_child != NULL) {
+ *top = dom_child; // save child on stack
+ ++top;
+ }
+ } while (t != NULL);
+ } while (next < last);
+ } while (last < top);
+}
+
+//*********************** DOMINATORS ON THE SEA OF NODES***********************
+//------------------------------NTarjan----------------------------------------
+// A data structure that holds all the information needed to find dominators.
+struct NTarjan {
+ Node *_control; // Control node associated with this info
+
+ uint _semi; // Semi-dominators
+ uint _size; // Used for faster LINK and EVAL
+ NTarjan *_parent; // Parent in DFS
+ NTarjan *_label; // Used for LINK and EVAL
+ NTarjan *_ancestor; // Used for LINK and EVAL
+ NTarjan *_child; // Used for faster LINK and EVAL
+ NTarjan *_dom; // Parent in dominator tree (immediate dom)
+ NTarjan *_bucket; // Set of vertices with given semidominator
+
+ NTarjan *_dom_child; // Child in dominator tree
+ NTarjan *_dom_next; // Next in dominator tree
+
+ // Perform DFS search.
+ // Setup 'vertex' as DFS to vertex mapping.
+ // Setup 'semi' as vertex to DFS mapping.
+ // Set 'parent' to DFS parent.
+ static int DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder );
+ void setdepth( uint size, uint *dom_depth );
+
+ // Fast union-find work
+ void COMPRESS();
+ NTarjan *EVAL(void);
+ void LINK( NTarjan *w, NTarjan *ntarjan0 );
+#ifndef PRODUCT
+ void dump(int offset) const;
+#endif
+};
+
+//------------------------------Dominator--------------------------------------
+// Compute the dominator tree of the sea of nodes. This version walks all CFG
+// nodes (using the is_CFG() call) and places them in a dominator tree. Thus,
+// it needs a count of the CFG nodes for the mapping table. This is the
+// Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
+void PhaseIdealLoop::Dominators( ) {
+ ResourceMark rm;
+ // Setup mappings from my Graph to Tarjan's stuff and back
+ // Note: Tarjan uses 1-based arrays
+ NTarjan *ntarjan = NEW_RESOURCE_ARRAY(NTarjan,C->unique()+1);
+ // Initialize _control field for fast reference
+ int i;
+ for( i= C->unique()-1; i>=0; i-- )
+ ntarjan[i]._control = NULL;
+
+ // Store the DFS order for the main loop
+ uint *dfsorder = NEW_RESOURCE_ARRAY(uint,C->unique()+1);
+ memset(dfsorder, max_uint, (C->unique()+1) * sizeof(uint));
+
+ // Tarjan's algorithm, almost verbatim:
+ // Step 1:
+ VectorSet visited(Thread::current()->resource_area());
+ int dfsnum = NTarjan::DFS( ntarjan, visited, this, dfsorder);
+
+ // Tarjan is using 1-based arrays, so these are some initialize flags
+ ntarjan[0]._size = ntarjan[0]._semi = 0;
+ ntarjan[0]._label = &ntarjan[0];
+
+ for( i = dfsnum-1; i>1; i-- ) { // For all nodes in reverse DFS order
+ NTarjan *w = &ntarjan[i]; // Get Node from DFS
+ assert(w->_control != NULL,"bad DFS walk");
+
+ // Step 2:
+ Node *whead = w->_control;
+ for( uint j=0; j < whead->req(); j++ ) { // For each predecessor
+ if( whead->in(j) == NULL || !whead->in(j)->is_CFG() )
+ continue; // Only process control nodes
+ uint b = dfsorder[whead->in(j)->_idx];
+ if(b == max_uint) continue;
+ NTarjan *vx = &ntarjan[b];
+ NTarjan *u = vx->EVAL();
+ if( u->_semi < w->_semi )
+ w->_semi = u->_semi;
+ }
+
+ // w is added to a bucket here, and only here.
+ // Thus w is in at most one bucket and the sum of all bucket sizes is O(n).
+ // Thus bucket can be a linked list.
+ w->_bucket = ntarjan[w->_semi]._bucket;
+ ntarjan[w->_semi]._bucket = w;
+
+ w->_parent->LINK( w, &ntarjan[0] );
+
+ // Step 3:
+ for( NTarjan *vx = w->_parent->_bucket; vx; vx = vx->_bucket ) {
+ NTarjan *u = vx->EVAL();
+ vx->_dom = (u->_semi < vx->_semi) ? u : w->_parent;
+ }
+
+ // Cleanup any unreachable loops now. Unreachable loops are loops that
+ // flow into the main graph (and hence into ROOT) but are not reachable
+ // from above. Such code is dead, but requires a global pass to detect
+ // it; this global pass was the 'build_loop_tree' pass run just prior.
+ if( whead->is_Region() ) {
+ for( uint i = 1; i < whead->req(); i++ ) {
+ if (!has_node(whead->in(i))) {
+ // Kill dead input path
+ assert( !visited.test(whead->in(i)->_idx),
+ "input with no loop must be dead" );
+ _igvn.hash_delete(whead);
+ whead->del_req(i);
+ _igvn._worklist.push(whead);
+ for (DUIterator_Fast jmax, j = whead->fast_outs(jmax); j < jmax; j++) {
+ Node* p = whead->fast_out(j);
+ if( p->is_Phi() ) {
+ _igvn.hash_delete(p);
+ p->del_req(i);
+ _igvn._worklist.push(p);
+ }
+ }
+ i--; // Rerun same iteration
+ } // End of if dead input path
+ } // End of for all input paths
+ } // End if if whead is a Region
+ } // End of for all Nodes in reverse DFS order
+
+ // Step 4:
+ for( i=2; i < dfsnum; i++ ) { // DFS order
+ NTarjan *w = &ntarjan[i];
+ assert(w->_control != NULL,"Bad DFS walk");
+ if( w->_dom != &ntarjan[w->_semi] )
+ w->_dom = w->_dom->_dom;
+ w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
+ }
+ // No immediate dominator for the root
+ NTarjan *w = &ntarjan[dfsorder[C->root()->_idx]];
+ w->_dom = NULL;
+ w->_parent = NULL;
+ w->_dom_next = w->_dom_child = NULL; // Initialize for building tree later
+
+ // Convert the dominator tree array into my kind of graph
+ for( i=1; i<dfsnum; i++ ) { // For all Tarjan vertices
+ NTarjan *t = &ntarjan[i]; // Handy access
+ assert(t->_control != NULL,"Bad DFS walk");
+ NTarjan *tdom = t->_dom; // Handy access to immediate dominator
+ if( tdom ) { // Root has no immediate dominator
+ _idom[t->_control->_idx] = tdom->_control; // Set immediate dominator
+ t->_dom_next = tdom->_dom_child; // Make me a sibling of parent's child
+ tdom->_dom_child = t; // Make me a child of my parent
+ } else
+ _idom[C->root()->_idx] = NULL; // Root
+ }
+ w->setdepth( C->unique()+1, _dom_depth ); // Set depth in dominator tree
+ // Pick up the 'top' node as well
+ _idom [C->top()->_idx] = C->root();
+ _dom_depth[C->top()->_idx] = 1;
+
+ // Debug Print of Dominator tree
+ if( PrintDominators ) {
+#ifndef PRODUCT
+ w->dump(0);
+#endif
+ }
+}
+
+//------------------------------DFS--------------------------------------------
+// Perform DFS search. Setup 'vertex' as DFS to vertex mapping. Setup
+// 'semi' as vertex to DFS mapping. Set 'parent' to DFS parent.
+int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
+ // Allocate stack of size C->unique()/8 to avoid frequent realloc
+ GrowableArray <Node *> dfstack(pil->C->unique() >> 3);
+ Node *b = pil->C->root();
+ int dfsnum = 1;
+ dfsorder[b->_idx] = dfsnum; // Cache parent's dfsnum for a later use
+ dfstack.push(b);
+
+ while (dfstack.is_nonempty()) {
+ b = dfstack.pop();
+ if( !visited.test_set(b->_idx) ) { // Test node and flag it as visited
+ NTarjan *w = &ntarjan[dfsnum];
+ // Only fully process control nodes
+ w->_control = b; // Save actual node
+ // Use parent's cached dfsnum to identify "Parent in DFS"
+ w->_parent = &ntarjan[dfsorder[b->_idx]];
+ dfsorder[b->_idx] = dfsnum; // Save DFS order info
+ w->_semi = dfsnum; // Node to DFS map
+ w->_label = w; // DFS to vertex map
+ w->_ancestor = NULL; // Fast LINK & EVAL setup
+ w->_child = &ntarjan[0]; // Sentinal
+ w->_size = 1;
+ w->_bucket = NULL;
+
+ // Need DEF-USE info for this pass
+ for ( int i = b->outcnt(); i-- > 0; ) { // Put on stack backwards
+ Node* s = b->raw_out(i); // Get a use
+ // CFG nodes only and not dead stuff
+ if( s->is_CFG() && pil->has_node(s) && !visited.test(s->_idx) ) {
+ dfsorder[s->_idx] = dfsnum; // Cache parent's dfsnum for a later use
+ dfstack.push(s);
+ }
+ }
+ dfsnum++; // update after parent's dfsnum has been cached.
+ }
+ }
+
+ return dfsnum;
+}
+
+//------------------------------COMPRESS---------------------------------------
+void NTarjan::COMPRESS()
+{
+ assert( _ancestor != 0, "" );
+ if( _ancestor->_ancestor != 0 ) {
+ _ancestor->COMPRESS( );
+ if( _ancestor->_label->_semi < _label->_semi )
+ _label = _ancestor->_label;
+ _ancestor = _ancestor->_ancestor;
+ }
+}
+
+//------------------------------EVAL-------------------------------------------
+NTarjan *NTarjan::EVAL() {
+ if( !_ancestor ) return _label;
+ COMPRESS();
+ return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
+}
+
+//------------------------------LINK-------------------------------------------
+void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
+ NTarjan *s = w;
+ while( w->_label->_semi < s->_child->_label->_semi ) {
+ if( s->_size + s->_child->_child->_size >= (s->_child->_size << 1) ) {
+ s->_child->_ancestor = s;
+ s->_child = s->_child->_child;
+ } else {
+ s->_child->_size = s->_size;
+ s = s->_ancestor = s->_child;
+ }
+ }
+ s->_label = w->_label;
+ _size += w->_size;
+ if( _size < (w->_size << 1) ) {
+ NTarjan *tmp = s; s = _child; _child = tmp;
+ }
+ while( s != ntarjan0 ) {
+ s->_ancestor = this;
+ s = s->_child;
+ }
+}
+
+//------------------------------setdepth---------------------------------------
+void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
+ NTarjan **top = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
+ NTarjan **next = top;
+ NTarjan **last;
+ uint depth = 0;
+ *top = this;
+ ++top;
+ do {
+ // next level
+ ++depth;
+ last = top;
+ do {
+ // Set current depth for all tarjans on this level
+ NTarjan *t = *next; // next tarjan from stack
+ ++next;
+ do {
+ dom_depth[t->_control->_idx] = depth; // Set depth in dominator tree
+ NTarjan *dom_child = t->_dom_child;
+ t = t->_dom_next; // next tarjan
+ if (dom_child != NULL) {
+ *top = dom_child; // save child on stack
+ ++top;
+ }
+ } while (t != NULL);
+ } while (next < last);
+ } while (last < top);
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void NTarjan::dump(int offset) const {
+ // Dump the data from this node
+ int i;
+ for(i = offset; i >0; i--) // Use indenting for tree structure
+ tty->print(" ");
+ tty->print("Dominator Node: ");
+ _control->dump(); // Control node for this dom node
+ tty->print("\n");
+ for(i = offset; i >0; i--) // Use indenting for tree structure
+ tty->print(" ");
+ tty->print("semi:%d, size:%d\n",_semi, _size);
+ for(i = offset; i >0; i--) // Use indenting for tree structure
+ tty->print(" ");
+ tty->print("DFS Parent: ");
+ if(_parent != NULL)
+ _parent->_control->dump(); // Parent in DFS
+ tty->print("\n");
+ for(i = offset; i >0; i--) // Use indenting for tree structure
+ tty->print(" ");
+ tty->print("Dom Parent: ");
+ if(_dom != NULL)
+ _dom->_control->dump(); // Parent in Dominator Tree
+ tty->print("\n");
+
+ // Recurse over remaining tree
+ if( _dom_child ) _dom_child->dump(offset+2); // Children in dominator tree
+ if( _dom_next ) _dom_next ->dump(offset ); // Siblings in dominator tree
+
+}
+#endif
diff --git a/src/share/vm/opto/escape.cpp b/src/share/vm/opto/escape.cpp
new file mode 100644
index 000000000..62d0c0f1a
--- /dev/null
+++ b/src/share/vm/opto/escape.cpp
@@ -0,0 +1,1346 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_escape.cpp.incl"
+
+uint PointsToNode::edge_target(uint e) const {
+ assert(_edges != NULL && e < (uint)_edges->length(), "valid edge index");
+ return (_edges->at(e) >> EdgeShift);
+}
+
+PointsToNode::EdgeType PointsToNode::edge_type(uint e) const {
+ assert(_edges != NULL && e < (uint)_edges->length(), "valid edge index");
+ return (EdgeType) (_edges->at(e) & EdgeMask);
+}
+
+void PointsToNode::add_edge(uint targIdx, PointsToNode::EdgeType et) {
+ uint v = (targIdx << EdgeShift) + ((uint) et);
+ if (_edges == NULL) {
+ Arena *a = Compile::current()->comp_arena();
+ _edges = new(a) GrowableArray<uint>(a, INITIAL_EDGE_COUNT, 0, 0);
+ }
+ _edges->append_if_missing(v);
+}
+
+void PointsToNode::remove_edge(uint targIdx, PointsToNode::EdgeType et) {
+ uint v = (targIdx << EdgeShift) + ((uint) et);
+
+ _edges->remove(v);
+}
+
+#ifndef PRODUCT
+static char *node_type_names[] = {
+ "UnknownType",
+ "JavaObject",
+ "LocalVar",
+ "Field"
+};
+
+static char *esc_names[] = {
+ "UnknownEscape",
+ "NoEscape ",
+ "ArgEscape ",
+ "GlobalEscape "
+};
+
+static char *edge_type_suffix[] = {
+ "?", // UnknownEdge
+ "P", // PointsToEdge
+ "D", // DeferredEdge
+ "F" // FieldEdge
+};
+
+void PointsToNode::dump() const {
+ NodeType nt = node_type();
+ EscapeState es = escape_state();
+ tty->print("%s %s [[", node_type_names[(int) nt], esc_names[(int) es]);
+ for (uint i = 0; i < edge_count(); i++) {
+ tty->print(" %d%s", edge_target(i), edge_type_suffix[(int) edge_type(i)]);
+ }
+ tty->print("]] ");
+ if (_node == NULL)
+ tty->print_cr("<null>");
+ else
+ _node->dump();
+}
+#endif
+
+ConnectionGraph::ConnectionGraph(Compile * C) : _processed(C->comp_arena()), _node_map(C->comp_arena()) {
+ _collecting = true;
+ this->_compile = C;
+ const PointsToNode &dummy = PointsToNode();
+ _nodes = new(C->comp_arena()) GrowableArray<PointsToNode>(C->comp_arena(), (int) INITIAL_NODE_COUNT, 0, dummy);
+ _phantom_object = C->top()->_idx;
+ PointsToNode *phn = ptnode_adr(_phantom_object);
+ phn->set_node_type(PointsToNode::JavaObject);
+ phn->set_escape_state(PointsToNode::GlobalEscape);
+}
+
+void ConnectionGraph::add_pointsto_edge(uint from_i, uint to_i) {
+ PointsToNode *f = ptnode_adr(from_i);
+ PointsToNode *t = ptnode_adr(to_i);
+
+ assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
+ assert(f->node_type() == PointsToNode::LocalVar || f->node_type() == PointsToNode::Field, "invalid source of PointsTo edge");
+ assert(t->node_type() == PointsToNode::JavaObject, "invalid destination of PointsTo edge");
+ f->add_edge(to_i, PointsToNode::PointsToEdge);
+}
+
+void ConnectionGraph::add_deferred_edge(uint from_i, uint to_i) {
+ PointsToNode *f = ptnode_adr(from_i);
+ PointsToNode *t = ptnode_adr(to_i);
+
+ assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
+ assert(f->node_type() == PointsToNode::LocalVar || f->node_type() == PointsToNode::Field, "invalid source of Deferred edge");
+ assert(t->node_type() == PointsToNode::LocalVar || t->node_type() == PointsToNode::Field, "invalid destination of Deferred edge");
+ // don't add a self-referential edge, this can occur during removal of
+ // deferred edges
+ if (from_i != to_i)
+ f->add_edge(to_i, PointsToNode::DeferredEdge);
+}
+
+int ConnectionGraph::type_to_offset(const Type *t) {
+ const TypePtr *t_ptr = t->isa_ptr();
+ assert(t_ptr != NULL, "must be a pointer type");
+ return t_ptr->offset();
+}
+
+void ConnectionGraph::add_field_edge(uint from_i, uint to_i, int offset) {
+ PointsToNode *f = ptnode_adr(from_i);
+ PointsToNode *t = ptnode_adr(to_i);
+
+ assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
+ assert(f->node_type() == PointsToNode::JavaObject, "invalid destination of Field edge");
+ assert(t->node_type() == PointsToNode::Field, "invalid destination of Field edge");
+ assert (t->offset() == -1 || t->offset() == offset, "conflicting field offsets");
+ t->set_offset(offset);
+
+ f->add_edge(to_i, PointsToNode::FieldEdge);
+}
+
+void ConnectionGraph::set_escape_state(uint ni, PointsToNode::EscapeState es) {
+ PointsToNode *npt = ptnode_adr(ni);
+ PointsToNode::EscapeState old_es = npt->escape_state();
+ if (es > old_es)
+ npt->set_escape_state(es);
+}
+
+PointsToNode::EscapeState ConnectionGraph::escape_state(Node *n, PhaseTransform *phase) {
+ uint idx = n->_idx;
+ PointsToNode::EscapeState es;
+
+ // If we are still collecting we don't know the answer yet
+ if (_collecting)
+ return PointsToNode::UnknownEscape;
+
+ // if the node was created after the escape computation, return
+ // UnknownEscape
+ if (idx >= (uint)_nodes->length())
+ return PointsToNode::UnknownEscape;
+
+ es = _nodes->at_grow(idx).escape_state();
+
+ // if we have already computed a value, return it
+ if (es != PointsToNode::UnknownEscape)
+ return es;
+
+ // compute max escape state of anything this node could point to
+ VectorSet ptset(Thread::current()->resource_area());
+ PointsTo(ptset, n, phase);
+ for( VectorSetI i(&ptset); i.test() && es != PointsToNode::GlobalEscape; ++i ) {
+ uint pt = i.elem;
+ PointsToNode::EscapeState pes = _nodes->at(pt).escape_state();
+ if (pes > es)
+ es = pes;
+ }
+ // cache the computed escape state
+ assert(es != PointsToNode::UnknownEscape, "should have computed an escape state");
+ _nodes->adr_at(idx)->set_escape_state(es);
+ return es;
+}
+
+void ConnectionGraph::PointsTo(VectorSet &ptset, Node * n, PhaseTransform *phase) {
+ VectorSet visited(Thread::current()->resource_area());
+ GrowableArray<uint> worklist;
+
+ n = skip_casts(n);
+ PointsToNode npt = _nodes->at_grow(n->_idx);
+
+ // If we have a JavaObject, return just that object
+ if (npt.node_type() == PointsToNode::JavaObject) {
+ ptset.set(n->_idx);
+ return;
+ }
+ // we may have a Phi which has not been processed
+ if (npt._node == NULL) {
+ assert(n->is_Phi(), "unprocessed node must be a Phi");
+ record_for_escape_analysis(n);
+ npt = _nodes->at(n->_idx);
+ }
+ worklist.push(n->_idx);
+ while(worklist.length() > 0) {
+ int ni = worklist.pop();
+ PointsToNode pn = _nodes->at_grow(ni);
+ if (!visited.test(ni)) {
+ visited.set(ni);
+
+ // ensure that all inputs of a Phi have been processed
+ if (_collecting && pn._node->is_Phi()) {
+ PhiNode *phi = pn._node->as_Phi();
+ process_phi_escape(phi, phase);
+ }
+
+ int edges_processed = 0;
+ for (uint e = 0; e < pn.edge_count(); e++) {
+ PointsToNode::EdgeType et = pn.edge_type(e);
+ if (et == PointsToNode::PointsToEdge) {
+ ptset.set(pn.edge_target(e));
+ edges_processed++;
+ } else if (et == PointsToNode::DeferredEdge) {
+ worklist.push(pn.edge_target(e));
+ edges_processed++;
+ }
+ }
+ if (edges_processed == 0) {
+ // no deferred or pointsto edges found. Assume the value was set outside
+ // this method. Add the phantom object to the pointsto set.
+ ptset.set(_phantom_object);
+ }
+ }
+ }
+}
+
+void ConnectionGraph::remove_deferred(uint ni) {
+ VectorSet visited(Thread::current()->resource_area());
+
+ uint i = 0;
+ PointsToNode *ptn = ptnode_adr(ni);
+
+ while(i < ptn->edge_count()) {
+ if (ptn->edge_type(i) != PointsToNode::DeferredEdge) {
+ i++;
+ } else {
+ uint t = ptn->edge_target(i);
+ PointsToNode *ptt = ptnode_adr(t);
+ ptn->remove_edge(t, PointsToNode::DeferredEdge);
+ if(!visited.test(t)) {
+ visited.set(t);
+ for (uint j = 0; j < ptt->edge_count(); j++) {
+ uint n1 = ptt->edge_target(j);
+ PointsToNode *pt1 = ptnode_adr(n1);
+ switch(ptt->edge_type(j)) {
+ case PointsToNode::PointsToEdge:
+ add_pointsto_edge(ni, n1);
+ break;
+ case PointsToNode::DeferredEdge:
+ add_deferred_edge(ni, n1);
+ break;
+ case PointsToNode::FieldEdge:
+ assert(false, "invalid connection graph");
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+// Add an edge to node given by "to_i" from any field of adr_i whose offset
+// matches "offset" A deferred edge is added if to_i is a LocalVar, and
+// a pointsto edge is added if it is a JavaObject
+
+void ConnectionGraph::add_edge_from_fields(uint adr_i, uint to_i, int offs) {
+ PointsToNode an = _nodes->at_grow(adr_i);
+ PointsToNode to = _nodes->at_grow(to_i);
+ bool deferred = (to.node_type() == PointsToNode::LocalVar);
+
+ for (uint fe = 0; fe < an.edge_count(); fe++) {
+ assert(an.edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
+ int fi = an.edge_target(fe);
+ PointsToNode pf = _nodes->at_grow(fi);
+ int po = pf.offset();
+ if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) {
+ if (deferred)
+ add_deferred_edge(fi, to_i);
+ else
+ add_pointsto_edge(fi, to_i);
+ }
+ }
+}
+
+// Add a deferred edge from node given by "from_i" to any field of adr_i whose offset
+// matches "offset"
+void ConnectionGraph::add_deferred_edge_to_fields(uint from_i, uint adr_i, int offs) {
+ PointsToNode an = _nodes->at_grow(adr_i);
+ for (uint fe = 0; fe < an.edge_count(); fe++) {
+ assert(an.edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
+ int fi = an.edge_target(fe);
+ PointsToNode pf = _nodes->at_grow(fi);
+ int po = pf.offset();
+ if (pf.edge_count() == 0) {
+ // we have not seen any stores to this field, assume it was set outside this method
+ add_pointsto_edge(fi, _phantom_object);
+ }
+ if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) {
+ add_deferred_edge(from_i, fi);
+ }
+ }
+}
+
+//
+// Search memory chain of "mem" to find a MemNode whose address
+// is the specified alias index. Returns the MemNode found or the
+// first non-MemNode encountered.
+//
+Node *ConnectionGraph::find_mem(Node *mem, int alias_idx, PhaseGVN *igvn) {
+ if (mem == NULL)
+ return mem;
+ while (mem->is_Mem()) {
+ const Type *at = igvn->type(mem->in(MemNode::Address));
+ if (at != Type::TOP) {
+ assert (at->isa_ptr() != NULL, "pointer type required.");
+ int idx = _compile->get_alias_index(at->is_ptr());
+ if (idx == alias_idx)
+ break;
+ }
+ mem = mem->in(MemNode::Memory);
+ }
+ return mem;
+}
+
+//
+// Adjust the type and inputs of an AddP which computes the
+// address of a field of an instance
+//
+void ConnectionGraph::split_AddP(Node *addp, Node *base, PhaseGVN *igvn) {
+ const TypeOopPtr *t = igvn->type(addp)->isa_oopptr();
+ const TypeOopPtr *base_t = igvn->type(base)->isa_oopptr();
+ assert(t != NULL, "expecting oopptr");
+ assert(base_t != NULL && base_t->is_instance(), "expecting instance oopptr");
+ uint inst_id = base_t->instance_id();
+ assert(!t->is_instance() || t->instance_id() == inst_id,
+ "old type must be non-instance or match new type");
+ const TypeOopPtr *tinst = base_t->add_offset(t->offset())->is_oopptr();
+ // ensure an alias index is allocated for the instance type
+ int alias_idx = _compile->get_alias_index(tinst);
+ igvn->set_type(addp, tinst);
+ // record the allocation in the node map
+ set_map(addp->_idx, get_map(base->_idx));
+ // if the Address input is not the appropriate instance type (due to intervening
+ // casts,) insert a cast
+ Node *adr = addp->in(AddPNode::Address);
+ const TypeOopPtr *atype = igvn->type(adr)->isa_oopptr();
+ if (atype->instance_id() != inst_id) {
+ assert(!atype->is_instance(), "no conflicting instances");
+ const TypeOopPtr *new_atype = base_t->add_offset(atype->offset())->isa_oopptr();
+ Node *acast = new (_compile, 2) CastPPNode(adr, new_atype);
+ acast->set_req(0, adr->in(0));
+ igvn->set_type(acast, new_atype);
+ record_for_optimizer(acast);
+ Node *bcast = acast;
+ Node *abase = addp->in(AddPNode::Base);
+ if (abase != adr) {
+ bcast = new (_compile, 2) CastPPNode(abase, base_t);
+ bcast->set_req(0, abase->in(0));
+ igvn->set_type(bcast, base_t);
+ record_for_optimizer(bcast);
+ }
+ igvn->hash_delete(addp);
+ addp->set_req(AddPNode::Base, bcast);
+ addp->set_req(AddPNode::Address, acast);
+ igvn->hash_insert(addp);
+ record_for_optimizer(addp);
+ }
+}
+
+//
+// Create a new version of orig_phi if necessary. Returns either the newly
+// created phi or an existing phi. Sets create_new to indicate wheter a new
+// phi was created. Cache the last newly created phi in the node map.
+//
+PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, PhaseGVN *igvn, bool &new_created) {
+ Compile *C = _compile;
+ new_created = false;
+ int phi_alias_idx = C->get_alias_index(orig_phi->adr_type());
+ // nothing to do if orig_phi is bottom memory or matches alias_idx
+ if (phi_alias_idx == Compile::AliasIdxBot || phi_alias_idx == alias_idx) {
+ return orig_phi;
+ }
+ // have we already created a Phi for this alias index?
+ PhiNode *result = get_map_phi(orig_phi->_idx);
+ const TypePtr *atype = C->get_adr_type(alias_idx);
+ if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) {
+ return result;
+ }
+
+ orig_phi_worklist.append_if_missing(orig_phi);
+ result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype);
+ set_map_phi(orig_phi->_idx, result);
+ igvn->set_type(result, result->bottom_type());
+ record_for_optimizer(result);
+ new_created = true;
+ return result;
+}
+
+//
+// Return a new version of Memory Phi "orig_phi" with the inputs having the
+// specified alias index.
+//
+PhiNode *ConnectionGraph::split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, PhaseGVN *igvn) {
+
+ assert(alias_idx != Compile::AliasIdxBot, "can't split out bottom memory");
+ Compile *C = _compile;
+ bool new_phi_created;
+ PhiNode *result = create_split_phi(orig_phi, alias_idx, orig_phi_worklist, igvn, new_phi_created);
+ if (!new_phi_created) {
+ return result;
+ }
+
+ GrowableArray<PhiNode *> phi_list;
+ GrowableArray<uint> cur_input;
+
+ PhiNode *phi = orig_phi;
+ uint idx = 1;
+ bool finished = false;
+ while(!finished) {
+ while (idx < phi->req()) {
+ Node *mem = find_mem(phi->in(idx), alias_idx, igvn);
+ if (mem != NULL && mem->is_Phi()) {
+ PhiNode *nphi = create_split_phi(mem->as_Phi(), alias_idx, orig_phi_worklist, igvn, new_phi_created);
+ if (new_phi_created) {
+ // found an phi for which we created a new split, push current one on worklist and begin
+ // processing new one
+ phi_list.push(phi);
+ cur_input.push(idx);
+ phi = mem->as_Phi();
+ result = nphi;
+ idx = 1;
+ continue;
+ } else {
+ mem = nphi;
+ }
+ }
+ result->set_req(idx++, mem);
+ }
+#ifdef ASSERT
+ // verify that the new Phi has an input for each input of the original
+ assert( phi->req() == result->req(), "must have same number of inputs.");
+ assert( result->in(0) != NULL && result->in(0) == phi->in(0), "regions must match");
+ for (uint i = 1; i < phi->req(); i++) {
+ assert((phi->in(i) == NULL) == (result->in(i) == NULL), "inputs must correspond.");
+ }
+#endif
+ // we have finished processing a Phi, see if there are any more to do
+ finished = (phi_list.length() == 0 );
+ if (!finished) {
+ phi = phi_list.pop();
+ idx = cur_input.pop();
+ PhiNode *prev_phi = get_map_phi(phi->_idx);
+ prev_phi->set_req(idx++, result);
+ result = prev_phi;
+ }
+ }
+ return result;
+}
+
+//
+// Convert the types of unescaped object to instance types where possible,
+// propagate the new type information through the graph, and update memory
+// edges and MergeMem inputs to reflect the new type.
+//
+// We start with allocations (and calls which may be allocations) on alloc_worklist.
+// The processing is done in 4 phases:
+//
+// Phase 1: Process possible allocations from alloc_worklist. Create instance
+// types for the CheckCastPP for allocations where possible.
+// Propagate the the new types through users as follows:
+// casts and Phi: push users on alloc_worklist
+// AddP: cast Base and Address inputs to the instance type
+// push any AddP users on alloc_worklist and push any memnode
+// users onto memnode_worklist.
+// Phase 2: Process MemNode's from memnode_worklist. compute new address type and
+// search the Memory chain for a store with the appropriate type
+// address type. If a Phi is found, create a new version with
+// the approriate memory slices from each of the Phi inputs.
+// For stores, process the users as follows:
+// MemNode: push on memnode_worklist
+// MergeMem: push on mergemem_worklist
+// Phase 3: Process MergeMem nodes from mergemem_worklist. Walk each memory slice
+// moving the first node encountered of each instance type to the
+// the input corresponding to its alias index.
+// appropriate memory slice.
+// Phase 4: Update the inputs of non-instance memory Phis and the Memory input of memnodes.
+//
+// In the following example, the CheckCastPP nodes are the cast of allocation
+// results and the allocation of node 29 is unescaped and eligible to be an
+// instance type.
+//
+// We start with:
+//
+// 7 Parm #memory
+// 10 ConI "12"
+// 19 CheckCastPP "Foo"
+// 20 AddP _ 19 19 10 Foo+12 alias_index=4
+// 29 CheckCastPP "Foo"
+// 30 AddP _ 29 29 10 Foo+12 alias_index=4
+//
+// 40 StoreP 25 7 20 ... alias_index=4
+// 50 StoreP 35 40 30 ... alias_index=4
+// 60 StoreP 45 50 20 ... alias_index=4
+// 70 LoadP _ 60 30 ... alias_index=4
+// 80 Phi 75 50 60 Memory alias_index=4
+// 90 LoadP _ 80 30 ... alias_index=4
+// 100 LoadP _ 80 20 ... alias_index=4
+//
+//
+// Phase 1 creates an instance type for node 29 assigning it an instance id of 24
+// and creating a new alias index for node 30. This gives:
+//
+// 7 Parm #memory
+// 10 ConI "12"
+// 19 CheckCastPP "Foo"
+// 20 AddP _ 19 19 10 Foo+12 alias_index=4
+// 29 CheckCastPP "Foo" iid=24
+// 30 AddP _ 29 29 10 Foo+12 alias_index=6 iid=24
+//
+// 40 StoreP 25 7 20 ... alias_index=4
+// 50 StoreP 35 40 30 ... alias_index=6
+// 60 StoreP 45 50 20 ... alias_index=4
+// 70 LoadP _ 60 30 ... alias_index=6
+// 80 Phi 75 50 60 Memory alias_index=4
+// 90 LoadP _ 80 30 ... alias_index=6
+// 100 LoadP _ 80 20 ... alias_index=4
+//
+// In phase 2, new memory inputs are computed for the loads and stores,
+// And a new version of the phi is created. In phase 4, the inputs to
+// node 80 are updated and then the memory nodes are updated with the
+// values computed in phase 2. This results in:
+//
+// 7 Parm #memory
+// 10 ConI "12"
+// 19 CheckCastPP "Foo"
+// 20 AddP _ 19 19 10 Foo+12 alias_index=4
+// 29 CheckCastPP "Foo" iid=24
+// 30 AddP _ 29 29 10 Foo+12 alias_index=6 iid=24
+//
+// 40 StoreP 25 7 20 ... alias_index=4
+// 50 StoreP 35 7 30 ... alias_index=6
+// 60 StoreP 45 40 20 ... alias_index=4
+// 70 LoadP _ 50 30 ... alias_index=6
+// 80 Phi 75 40 60 Memory alias_index=4
+// 120 Phi 75 50 50 Memory alias_index=6
+// 90 LoadP _ 120 30 ... alias_index=6
+// 100 LoadP _ 80 20 ... alias_index=4
+//
+void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) {
+ GrowableArray<Node *> memnode_worklist;
+ GrowableArray<Node *> mergemem_worklist;
+ GrowableArray<PhiNode *> orig_phis;
+ PhaseGVN *igvn = _compile->initial_gvn();
+ uint new_index_start = (uint) _compile->num_alias_types();
+ VectorSet visited(Thread::current()->resource_area());
+ VectorSet ptset(Thread::current()->resource_area());
+
+ // Phase 1: Process possible allocations from alloc_worklist. Create instance
+ // types for the CheckCastPP for allocations where possible.
+ while (alloc_worklist.length() != 0) {
+ Node *n = alloc_worklist.pop();
+ uint ni = n->_idx;
+ if (n->is_Call()) {
+ CallNode *alloc = n->as_Call();
+ // copy escape information to call node
+ PointsToNode ptn = _nodes->at(alloc->_idx);
+ PointsToNode::EscapeState es = escape_state(alloc, igvn);
+ alloc->_escape_state = es;
+ // find CheckCastPP of call return value
+ n = alloc->proj_out(TypeFunc::Parms);
+ if (n != NULL && n->outcnt() == 1) {
+ n = n->unique_out();
+ if (n->Opcode() != Op_CheckCastPP) {
+ continue;
+ }
+ } else {
+ continue;
+ }
+ // we have an allocation or call which returns a Java object, see if it is unescaped
+ if (es != PointsToNode::NoEscape || !ptn._unique_type) {
+ continue; // can't make a unique type
+ }
+ set_map(alloc->_idx, n);
+ set_map(n->_idx, alloc);
+ const TypeInstPtr *t = igvn->type(n)->isa_instptr();
+ // Unique types which are arrays are not currently supported.
+ // The check for AllocateArray is needed in case an array
+ // allocation is immediately cast to Object
+ if (t == NULL || alloc->is_AllocateArray())
+ continue; // not a TypeInstPtr
+ const TypeOopPtr *tinst = t->cast_to_instance(ni);
+ igvn->hash_delete(n);
+ igvn->set_type(n, tinst);
+ n->raise_bottom_type(tinst);
+ igvn->hash_insert(n);
+ } else if (n->is_AddP()) {
+ ptset.Clear();
+ PointsTo(ptset, n->in(AddPNode::Address), igvn);
+ assert(ptset.Size() == 1, "AddP address is unique");
+ Node *base = get_map(ptset.getelem());
+ split_AddP(n, base, igvn);
+ } else if (n->is_Phi() || n->Opcode() == Op_CastPP || n->Opcode() == Op_CheckCastPP) {
+ if (visited.test_set(n->_idx)) {
+ assert(n->is_Phi(), "loops only through Phi's");
+ continue; // already processed
+ }
+ ptset.Clear();
+ PointsTo(ptset, n, igvn);
+ if (ptset.Size() == 1) {
+ TypeNode *tn = n->as_Type();
+ Node *val = get_map(ptset.getelem());
+ const TypeInstPtr *val_t = igvn->type(val)->isa_instptr();;
+ assert(val_t != NULL && val_t->is_instance(), "instance type expected.");
+ const TypeInstPtr *tn_t = igvn->type(tn)->isa_instptr();;
+
+ if (tn_t != NULL && val_t->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE)->higher_equal(tn_t)) {
+ igvn->hash_delete(tn);
+ igvn->set_type(tn, val_t);
+ tn->set_type(val_t);
+ igvn->hash_insert(tn);
+ }
+ }
+ } else {
+ continue;
+ }
+ // push users on appropriate worklist
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node *use = n->fast_out(i);
+ if(use->is_Mem() && use->in(MemNode::Address) == n) {
+ memnode_worklist.push(use);
+ } else if (use->is_AddP() || use->is_Phi() || use->Opcode() == Op_CastPP || use->Opcode() == Op_CheckCastPP) {
+ alloc_worklist.push(use);
+ }
+ }
+
+ }
+ uint new_index_end = (uint) _compile->num_alias_types();
+
+ // Phase 2: Process MemNode's from memnode_worklist. compute new address type and
+ // compute new values for Memory inputs (the Memory inputs are not
+ // actually updated until phase 4.)
+ if (memnode_worklist.length() == 0)
+ return; // nothing to do
+
+
+ while (memnode_worklist.length() != 0) {
+ Node *n = memnode_worklist.pop();
+ if (n->is_Phi()) {
+ assert(n->as_Phi()->adr_type() != TypePtr::BOTTOM, "narrow memory slice required");
+ // we don't need to do anything, but the users must be pushed if we haven't processed
+ // this Phi before
+ if (visited.test_set(n->_idx))
+ continue;
+ } else {
+ assert(n->is_Mem(), "memory node required.");
+ Node *addr = n->in(MemNode::Address);
+ const Type *addr_t = igvn->type(addr);
+ if (addr_t == Type::TOP)
+ continue;
+ assert (addr_t->isa_ptr() != NULL, "pointer type required.");
+ int alias_idx = _compile->get_alias_index(addr_t->is_ptr());
+ Node *mem = find_mem(n->in(MemNode::Memory), alias_idx, igvn);
+ if (mem->is_Phi()) {
+ mem = split_memory_phi(mem->as_Phi(), alias_idx, orig_phis, igvn);
+ }
+ if (mem != n->in(MemNode::Memory))
+ set_map(n->_idx, mem);
+ if (n->is_Load()) {
+ continue; // don't push users
+ } else if (n->is_LoadStore()) {
+ // get the memory projection
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node *use = n->fast_out(i);
+ if (use->Opcode() == Op_SCMemProj) {
+ n = use;
+ break;
+ }
+ }
+ assert(n->Opcode() == Op_SCMemProj, "memory projection required");
+ }
+ }
+ // push user on appropriate worklist
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node *use = n->fast_out(i);
+ if (use->is_Phi()) {
+ memnode_worklist.push(use);
+ } else if(use->is_Mem() && use->in(MemNode::Memory) == n) {
+ memnode_worklist.push(use);
+ } else if (use->is_MergeMem()) {
+ mergemem_worklist.push(use);
+ }
+ }
+ }
+
+ // Phase 3: Process MergeMem nodes from mergemem_worklist. Walk each memory slice
+ // moving the first node encountered of each instance type to the
+ // the input corresponding to its alias index.
+ while (mergemem_worklist.length() != 0) {
+ Node *n = mergemem_worklist.pop();
+ assert(n->is_MergeMem(), "MergeMem node required.");
+ MergeMemNode *nmm = n->as_MergeMem();
+ // Note: we don't want to use MergeMemStream here because we only want to
+ // scan inputs which exist at the start, not ones we add during processing
+ uint nslices = nmm->req();
+ igvn->hash_delete(nmm);
+ for (uint i = Compile::AliasIdxRaw+1; i < nslices; i++) {
+ Node * mem = nmm->in(i);
+ Node * cur = NULL;
+ if (mem == NULL || mem->is_top())
+ continue;
+ while (mem->is_Mem()) {
+ const Type *at = igvn->type(mem->in(MemNode::Address));
+ if (at != Type::TOP) {
+ assert (at->isa_ptr() != NULL, "pointer type required.");
+ uint idx = (uint)_compile->get_alias_index(at->is_ptr());
+ if (idx == i) {
+ if (cur == NULL)
+ cur = mem;
+ } else {
+ if (idx >= nmm->req() || nmm->is_empty_memory(nmm->in(idx))) {
+ nmm->set_memory_at(idx, mem);
+ }
+ }
+ }
+ mem = mem->in(MemNode::Memory);
+ }
+ nmm->set_memory_at(i, (cur != NULL) ? cur : mem);
+ if (mem->is_Phi()) {
+ // We have encountered a Phi, we need to split the Phi for
+ // any instance of the current type if we haven't encountered
+ // a value of the instance along the chain.
+ for (uint ni = new_index_start; ni < new_index_end; ni++) {
+ if((uint)_compile->get_general_index(ni) == i) {
+ Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni);
+ if (nmm->is_empty_memory(m)) {
+ nmm->set_memory_at(ni, split_memory_phi(mem->as_Phi(), ni, orig_phis, igvn));
+ }
+ }
+ }
+ }
+ }
+ igvn->hash_insert(nmm);
+ record_for_optimizer(nmm);
+ }
+
+ // Phase 4: Update the inputs of non-instance memory Phis and the Memory input of memnodes
+ //
+ // First update the inputs of any non-instance Phi's from
+ // which we split out an instance Phi. Note we don't have
+ // to recursively process Phi's encounted on the input memory
+ // chains as is done in split_memory_phi() since they will
+ // also be processed here.
+ while (orig_phis.length() != 0) {
+ PhiNode *phi = orig_phis.pop();
+ int alias_idx = _compile->get_alias_index(phi->adr_type());
+ igvn->hash_delete(phi);
+ for (uint i = 1; i < phi->req(); i++) {
+ Node *mem = phi->in(i);
+ Node *new_mem = find_mem(mem, alias_idx, igvn);
+ if (mem != new_mem) {
+ phi->set_req(i, new_mem);
+ }
+ }
+ igvn->hash_insert(phi);
+ record_for_optimizer(phi);
+ }
+
+ // Update the memory inputs of MemNodes with the value we computed
+ // in Phase 2.
+ for (int i = 0; i < _nodes->length(); i++) {
+ Node *nmem = get_map(i);
+ if (nmem != NULL) {
+ Node *n = _nodes->at(i)._node;
+ if (n != NULL && n->is_Mem()) {
+ igvn->hash_delete(n);
+ n->set_req(MemNode::Memory, nmem);
+ igvn->hash_insert(n);
+ record_for_optimizer(n);
+ }
+ }
+ }
+}
+
+void ConnectionGraph::compute_escape() {
+ GrowableArray<int> worklist;
+ GrowableArray<Node *> alloc_worklist;
+ VectorSet visited(Thread::current()->resource_area());
+ PhaseGVN *igvn = _compile->initial_gvn();
+
+ // process Phi nodes from the deferred list, they may not have
+ while(_deferred.size() > 0) {
+ Node * n = _deferred.pop();
+ PhiNode * phi = n->as_Phi();
+
+ process_phi_escape(phi, igvn);
+ }
+
+ VectorSet ptset(Thread::current()->resource_area());
+
+ // remove deferred edges from the graph and collect
+ // information we will need for type splitting
+ for (uint ni = 0; ni < (uint)_nodes->length(); ni++) {
+ PointsToNode * ptn = _nodes->adr_at(ni);
+ PointsToNode::NodeType nt = ptn->node_type();
+
+ if (nt == PointsToNode::UnknownType) {
+ continue; // not a node we are interested in
+ }
+ Node *n = ptn->_node;
+ if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
+ remove_deferred(ni);
+ if (n->is_AddP()) {
+ // if this AddP computes an address which may point to more that one
+ // object, nothing the address points to can be a unique type.
+ Node *base = n->in(AddPNode::Base);
+ ptset.Clear();
+ PointsTo(ptset, base, igvn);
+ if (ptset.Size() > 1) {
+ for( VectorSetI j(&ptset); j.test(); ++j ) {
+ PointsToNode *ptaddr = _nodes->adr_at(j.elem);
+ ptaddr->_unique_type = false;
+ }
+ }
+ }
+ } else if (n->is_Call()) {
+ // initialize _escape_state of calls to GlobalEscape
+ n->as_Call()->_escape_state = PointsToNode::GlobalEscape;
+ // push call on alloc_worlist (alocations are calls)
+ // for processing by split_unique_types()
+ alloc_worklist.push(n);
+ }
+ }
+ // push all GlobalEscape nodes on the worklist
+ for (uint nj = 0; nj < (uint)_nodes->length(); nj++) {
+ if (_nodes->at(nj).escape_state() == PointsToNode::GlobalEscape) {
+ worklist.append(nj);
+ }
+ }
+ // mark all node reachable from GlobalEscape nodes
+ while(worklist.length() > 0) {
+ PointsToNode n = _nodes->at(worklist.pop());
+ for (uint ei = 0; ei < n.edge_count(); ei++) {
+ uint npi = n.edge_target(ei);
+ PointsToNode *np = ptnode_adr(npi);
+ if (np->escape_state() != PointsToNode::GlobalEscape) {
+ np->set_escape_state(PointsToNode::GlobalEscape);
+ worklist.append_if_missing(npi);
+ }
+ }
+ }
+
+ // push all ArgEscape nodes on the worklist
+ for (uint nk = 0; nk < (uint)_nodes->length(); nk++) {
+ if (_nodes->at(nk).escape_state() == PointsToNode::ArgEscape)
+ worklist.push(nk);
+ }
+ // mark all node reachable from ArgEscape nodes
+ while(worklist.length() > 0) {
+ PointsToNode n = _nodes->at(worklist.pop());
+
+ for (uint ei = 0; ei < n.edge_count(); ei++) {
+ uint npi = n.edge_target(ei);
+ PointsToNode *np = ptnode_adr(npi);
+ if (np->escape_state() != PointsToNode::ArgEscape) {
+ np->set_escape_state(PointsToNode::ArgEscape);
+ worklist.append_if_missing(npi);
+ }
+ }
+ }
+ _collecting = false;
+
+ // Now use the escape information to create unique types for
+ // unescaped objects
+ split_unique_types(alloc_worklist);
+}
+
+Node * ConnectionGraph::skip_casts(Node *n) {
+ while(n->Opcode() == Op_CastPP || n->Opcode() == Op_CheckCastPP) {
+ n = n->in(1);
+ }
+ return n;
+}
+
+void ConnectionGraph::process_phi_escape(PhiNode *phi, PhaseTransform *phase) {
+
+ if (phi->type()->isa_oopptr() == NULL)
+ return; // nothing to do if not an oop
+
+ PointsToNode *ptadr = ptnode_adr(phi->_idx);
+ int incount = phi->req();
+ int non_null_inputs = 0;
+
+ for (int i = 1; i < incount ; i++) {
+ if (phi->in(i) != NULL)
+ non_null_inputs++;
+ }
+ if (non_null_inputs == ptadr->_inputs_processed)
+ return; // no new inputs since the last time this node was processed,
+ // the current information is valid
+
+ ptadr->_inputs_processed = non_null_inputs; // prevent recursive processing of this node
+ for (int j = 1; j < incount ; j++) {
+ Node * n = phi->in(j);
+ if (n == NULL)
+ continue; // ignore NULL
+ n = skip_casts(n);
+ if (n->is_top() || n == phi)
+ continue; // ignore top or inputs which go back this node
+ int nopc = n->Opcode();
+ PointsToNode npt = _nodes->at(n->_idx);
+ if (_nodes->at(n->_idx).node_type() == PointsToNode::JavaObject) {
+ add_pointsto_edge(phi->_idx, n->_idx);
+ } else {
+ add_deferred_edge(phi->_idx, n->_idx);
+ }
+ }
+}
+
+void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) {
+
+ _processed.set(call->_idx);
+ switch (call->Opcode()) {
+
+ // arguments to allocation and locking don't escape
+ case Op_Allocate:
+ case Op_AllocateArray:
+ case Op_Lock:
+ case Op_Unlock:
+ break;
+
+ case Op_CallStaticJava:
+ // For a static call, we know exactly what method is being called.
+ // Use bytecode estimator to record the call's escape affects
+ {
+ ciMethod *meth = call->as_CallJava()->method();
+ if (meth != NULL) {
+ const TypeTuple * d = call->tf()->domain();
+ BCEscapeAnalyzer call_analyzer(meth);
+ VectorSet ptset(Thread::current()->resource_area());
+ for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+ const Type* at = d->field_at(i);
+ int k = i - TypeFunc::Parms;
+
+ if (at->isa_oopptr() != NULL) {
+ Node *arg = skip_casts(call->in(i));
+
+ if (!call_analyzer.is_arg_stack(k)) {
+ // The argument global escapes, mark everything it could point to
+ ptset.Clear();
+ PointsTo(ptset, arg, phase);
+ for( VectorSetI j(&ptset); j.test(); ++j ) {
+ uint pt = j.elem;
+
+ set_escape_state(pt, PointsToNode::GlobalEscape);
+ }
+ } else if (!call_analyzer.is_arg_local(k)) {
+ // The argument itself doesn't escape, but any fields might
+ ptset.Clear();
+ PointsTo(ptset, arg, phase);
+ for( VectorSetI j(&ptset); j.test(); ++j ) {
+ uint pt = j.elem;
+ add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
+ }
+ }
+ }
+ }
+ call_analyzer.copy_dependencies(C()->dependencies());
+ break;
+ }
+ // fall-through if not a Java method
+ }
+
+ default:
+ // Some other type of call, assume the worst case: all arguments
+ // globally escape.
+ {
+ // adjust escape state for outgoing arguments
+ const TypeTuple * d = call->tf()->domain();
+ VectorSet ptset(Thread::current()->resource_area());
+ for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+ const Type* at = d->field_at(i);
+
+ if (at->isa_oopptr() != NULL) {
+ Node *arg = skip_casts(call->in(i));
+ ptset.Clear();
+ PointsTo(ptset, arg, phase);
+ for( VectorSetI j(&ptset); j.test(); ++j ) {
+ uint pt = j.elem;
+
+ set_escape_state(pt, PointsToNode::GlobalEscape);
+ }
+ }
+ }
+ }
+ }
+}
+void ConnectionGraph::process_call_result(ProjNode *resproj, PhaseTransform *phase) {
+ CallNode *call = resproj->in(0)->as_Call();
+
+ PointsToNode *ptadr = ptnode_adr(resproj->_idx);
+
+ ptadr->_node = resproj;
+ ptadr->set_node_type(PointsToNode::LocalVar);
+ set_escape_state(resproj->_idx, PointsToNode::UnknownEscape);
+ _processed.set(resproj->_idx);
+
+ switch (call->Opcode()) {
+ case Op_Allocate:
+ {
+ Node *k = call->in(AllocateNode::KlassNode);
+ const TypeKlassPtr *kt;
+ if (k->Opcode() == Op_LoadKlass) {
+ kt = k->as_Load()->type()->isa_klassptr();
+ } else {
+ kt = k->as_Type()->type()->isa_klassptr();
+ }
+ assert(kt != NULL, "TypeKlassPtr required.");
+ ciKlass* cik = kt->klass();
+ ciInstanceKlass* ciik = cik->as_instance_klass();
+
+ PointsToNode *ptadr = ptnode_adr(call->_idx);
+ ptadr->set_node_type(PointsToNode::JavaObject);
+ if (cik->is_subclass_of(_compile->env()->Thread_klass()) || ciik->has_finalizer()) {
+ set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+ add_pointsto_edge(resproj->_idx, _phantom_object);
+ } else {
+ set_escape_state(call->_idx, PointsToNode::NoEscape);
+ add_pointsto_edge(resproj->_idx, call->_idx);
+ }
+ _processed.set(call->_idx);
+ break;
+ }
+
+ case Op_AllocateArray:
+ {
+ PointsToNode *ptadr = ptnode_adr(call->_idx);
+ ptadr->set_node_type(PointsToNode::JavaObject);
+ set_escape_state(call->_idx, PointsToNode::NoEscape);
+ _processed.set(call->_idx);
+ add_pointsto_edge(resproj->_idx, call->_idx);
+ break;
+ }
+
+ case Op_Lock:
+ case Op_Unlock:
+ break;
+
+ case Op_CallStaticJava:
+ // For a static call, we know exactly what method is being called.
+ // Use bytecode estimator to record whether the call's return value escapes
+ {
+ const TypeTuple *r = call->tf()->range();
+ const Type* ret_type = NULL;
+
+ if (r->cnt() > TypeFunc::Parms)
+ ret_type = r->field_at(TypeFunc::Parms);
+
+ // Note: we use isa_ptr() instead of isa_oopptr() here because the
+ // _multianewarray functions return a TypeRawPtr.
+ if (ret_type == NULL || ret_type->isa_ptr() == NULL)
+ break; // doesn't return a pointer type
+
+ ciMethod *meth = call->as_CallJava()->method();
+ if (meth == NULL) {
+ // not a Java method, assume global escape
+ set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+ if (resproj != NULL)
+ add_pointsto_edge(resproj->_idx, _phantom_object);
+ } else {
+ BCEscapeAnalyzer call_analyzer(meth);
+ VectorSet ptset(Thread::current()->resource_area());
+
+ if (call_analyzer.is_return_local() && resproj != NULL) {
+ // determine whether any arguments are returned
+ const TypeTuple * d = call->tf()->domain();
+ set_escape_state(call->_idx, PointsToNode::NoEscape);
+ for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+ const Type* at = d->field_at(i);
+
+ if (at->isa_oopptr() != NULL) {
+ Node *arg = skip_casts(call->in(i));
+
+ if (call_analyzer.is_arg_returned(i - TypeFunc::Parms)) {
+ PointsToNode *arg_esp = _nodes->adr_at(arg->_idx);
+ if (arg_esp->node_type() == PointsToNode::JavaObject)
+ add_pointsto_edge(resproj->_idx, arg->_idx);
+ else
+ add_deferred_edge(resproj->_idx, arg->_idx);
+ arg_esp->_hidden_alias = true;
+ }
+ }
+ }
+ } else {
+ set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+ if (resproj != NULL)
+ add_pointsto_edge(resproj->_idx, _phantom_object);
+ }
+ call_analyzer.copy_dependencies(C()->dependencies());
+ }
+ break;
+ }
+
+ default:
+ // Some other type of call, assume the worst case that the
+ // returned value, if any, globally escapes.
+ {
+ const TypeTuple *r = call->tf()->range();
+
+ if (r->cnt() > TypeFunc::Parms) {
+ const Type* ret_type = r->field_at(TypeFunc::Parms);
+
+ // Note: we use isa_ptr() instead of isa_oopptr() here because the
+ // _multianewarray functions return a TypeRawPtr.
+ if (ret_type->isa_ptr() != NULL) {
+ PointsToNode *ptadr = ptnode_adr(call->_idx);
+ ptadr->set_node_type(PointsToNode::JavaObject);
+ set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+ if (resproj != NULL)
+ add_pointsto_edge(resproj->_idx, _phantom_object);
+ }
+ }
+ }
+ }
+}
+
+void ConnectionGraph::record_for_escape_analysis(Node *n) {
+ if (_collecting) {
+ if (n->is_Phi()) {
+ PhiNode *phi = n->as_Phi();
+ const Type *pt = phi->type();
+ if ((pt->isa_oopptr() != NULL) || pt == TypePtr::NULL_PTR) {
+ PointsToNode *ptn = ptnode_adr(phi->_idx);
+ ptn->set_node_type(PointsToNode::LocalVar);
+ ptn->_node = n;
+ _deferred.push(n);
+ }
+ }
+ }
+}
+
+void ConnectionGraph::record_escape_work(Node *n, PhaseTransform *phase) {
+
+ int opc = n->Opcode();
+ PointsToNode *ptadr = ptnode_adr(n->_idx);
+
+ if (_processed.test(n->_idx))
+ return;
+
+ ptadr->_node = n;
+ if (n->is_Call()) {
+ CallNode *call = n->as_Call();
+ process_call_arguments(call, phase);
+ return;
+ }
+
+ switch (opc) {
+ case Op_AddP:
+ {
+ Node *base = skip_casts(n->in(AddPNode::Base));
+ ptadr->set_node_type(PointsToNode::Field);
+
+ // create a field edge to this node from everything adr could point to
+ VectorSet ptset(Thread::current()->resource_area());
+ PointsTo(ptset, base, phase);
+ for( VectorSetI i(&ptset); i.test(); ++i ) {
+ uint pt = i.elem;
+ add_field_edge(pt, n->_idx, type_to_offset(phase->type(n)));
+ }
+ break;
+ }
+ case Op_Parm:
+ {
+ ProjNode *nproj = n->as_Proj();
+ uint con = nproj->_con;
+ if (con < TypeFunc::Parms)
+ return;
+ const Type *t = nproj->in(0)->as_Start()->_domain->field_at(con);
+ if (t->isa_ptr() == NULL)
+ return;
+ ptadr->set_node_type(PointsToNode::JavaObject);
+ if (t->isa_oopptr() != NULL) {
+ set_escape_state(n->_idx, PointsToNode::ArgEscape);
+ } else {
+ // this must be the incoming state of an OSR compile, we have to assume anything
+ // passed in globally escapes
+ assert(_compile->is_osr_compilation(), "bad argument type for non-osr compilation");
+ set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+ }
+ _processed.set(n->_idx);
+ break;
+ }
+ case Op_Phi:
+ {
+ PhiNode *phi = n->as_Phi();
+ if (phi->type()->isa_oopptr() == NULL)
+ return; // nothing to do if not an oop
+ ptadr->set_node_type(PointsToNode::LocalVar);
+ process_phi_escape(phi, phase);
+ break;
+ }
+ case Op_CreateEx:
+ {
+ // assume that all exception objects globally escape
+ ptadr->set_node_type(PointsToNode::JavaObject);
+ set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+ _processed.set(n->_idx);
+ break;
+ }
+ case Op_ConP:
+ {
+ const Type *t = phase->type(n);
+ ptadr->set_node_type(PointsToNode::JavaObject);
+ // assume all pointer constants globally escape except for null
+ if (t == TypePtr::NULL_PTR)
+ set_escape_state(n->_idx, PointsToNode::NoEscape);
+ else
+ set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+ _processed.set(n->_idx);
+ break;
+ }
+ case Op_LoadKlass:
+ {
+ ptadr->set_node_type(PointsToNode::JavaObject);
+ set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+ _processed.set(n->_idx);
+ break;
+ }
+ case Op_LoadP:
+ {
+ const Type *t = phase->type(n);
+ if (!t->isa_oopptr())
+ return;
+ ptadr->set_node_type(PointsToNode::LocalVar);
+ set_escape_state(n->_idx, PointsToNode::UnknownEscape);
+
+ Node *adr = skip_casts(n->in(MemNode::Address));
+ const Type *adr_type = phase->type(adr);
+ Node *adr_base = skip_casts((adr->Opcode() == Op_AddP) ? adr->in(AddPNode::Base) : adr);
+
+ // For everything "adr" could point to, create a deferred edge from
+ // this node to each field with the same offset as "adr_type"
+ VectorSet ptset(Thread::current()->resource_area());
+ PointsTo(ptset, adr_base, phase);
+ // If ptset is empty, then this value must have been set outside
+ // this method, so we add the phantom node
+ if (ptset.Size() == 0)
+ ptset.set(_phantom_object);
+ for( VectorSetI i(&ptset); i.test(); ++i ) {
+ uint pt = i.elem;
+ add_deferred_edge_to_fields(n->_idx, pt, type_to_offset(adr_type));
+ }
+ break;
+ }
+ case Op_StoreP:
+ case Op_StorePConditional:
+ case Op_CompareAndSwapP:
+ {
+ Node *adr = n->in(MemNode::Address);
+ Node *val = skip_casts(n->in(MemNode::ValueIn));
+ const Type *adr_type = phase->type(adr);
+ if (!adr_type->isa_oopptr())
+ return;
+
+ assert(adr->Opcode() == Op_AddP, "expecting an AddP");
+ Node *adr_base = adr->in(AddPNode::Base);
+
+ // For everything "adr_base" could point to, create a deferred edge to "val" from each field
+ // with the same offset as "adr_type"
+ VectorSet ptset(Thread::current()->resource_area());
+ PointsTo(ptset, adr_base, phase);
+ for( VectorSetI i(&ptset); i.test(); ++i ) {
+ uint pt = i.elem;
+ add_edge_from_fields(pt, val->_idx, type_to_offset(adr_type));
+ }
+ break;
+ }
+ case Op_Proj:
+ {
+ ProjNode *nproj = n->as_Proj();
+ Node *n0 = nproj->in(0);
+ // we are only interested in the result projection from a call
+ if (nproj->_con == TypeFunc::Parms && n0->is_Call() ) {
+ process_call_result(nproj, phase);
+ }
+
+ break;
+ }
+ case Op_CastPP:
+ case Op_CheckCastPP:
+ {
+ ptadr->set_node_type(PointsToNode::LocalVar);
+ int ti = n->in(1)->_idx;
+ if (_nodes->at(ti).node_type() == PointsToNode::JavaObject) {
+ add_pointsto_edge(n->_idx, ti);
+ } else {
+ add_deferred_edge(n->_idx, ti);
+ }
+ break;
+ }
+ default:
+ ;
+ // nothing to do
+ }
+}
+
+void ConnectionGraph::record_escape(Node *n, PhaseTransform *phase) {
+ if (_collecting)
+ record_escape_work(n, phase);
+}
+
+#ifndef PRODUCT
+void ConnectionGraph::dump() {
+ PhaseGVN *igvn = _compile->initial_gvn();
+ bool first = true;
+
+ for (uint ni = 0; ni < (uint)_nodes->length(); ni++) {
+ PointsToNode *esp = _nodes->adr_at(ni);
+ if (esp->node_type() == PointsToNode::UnknownType || esp->_node == NULL)
+ continue;
+ PointsToNode::EscapeState es = escape_state(esp->_node, igvn);
+ if (es == PointsToNode::NoEscape || (Verbose &&
+ (es != PointsToNode::UnknownEscape || esp->edge_count() != 0))) {
+ // don't print null pointer node which almost every method has
+ if (esp->_node->Opcode() != Op_ConP || igvn->type(esp->_node) != TypePtr::NULL_PTR) {
+ if (first) {
+ tty->print("======== Connection graph for ");
+ C()->method()->print_short_name();
+ tty->cr();
+ first = false;
+ }
+ tty->print("%4d ", ni);
+ esp->dump();
+ }
+ }
+ }
+}
+#endif
diff --git a/src/share/vm/opto/escape.hpp b/src/share/vm/opto/escape.hpp
new file mode 100644
index 000000000..3cd879257
--- /dev/null
+++ b/src/share/vm/opto/escape.hpp
@@ -0,0 +1,319 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Adaptation for C2 of the escape analysis algorithm described in:
+//
+// [Choi99] Jong-Deok Shoi, Manish Gupta, Mauricio Seffano, Vugranam C. Sreedhar,
+// Sam Midkiff, "Escape Analysis for Java", Procedings of ACM SIGPLAN
+// OOPSLA Conference, November 1, 1999
+//
+// The flow-insensitive analysis described in the paper has been implemented.
+//
+// The analysis requires construction of a "connection graph" (CG) for the method being
+// analyzed. The nodes of the connection graph are:
+//
+// - Java objects (JO)
+// - Local variables (LV)
+// - Fields of an object (OF), these also include array elements
+//
+// The CG contains 3 types of edges:
+//
+// - PointsTo (-P>) {LV,OF} to JO
+// - Deferred (-D>) from {LV, OF} to {LV, OF}
+// - Field (-F>) from JO to OF
+//
+// The following utility functions is used by the algorithm:
+//
+// PointsTo(n) - n is any CG node, it returns the set of JO that n could
+// point to.
+//
+// The algorithm describes how to construct the connection graph in the following 4 cases:
+//
+// Case Edges Created
+//
+// (1) p = new T() LV -P> JO
+// (2) p = q LV -D> LV
+// (3) p.f = q JO -F> OF, OF -D> LV
+// (4) p = q.f JO -F> OF, LV -D> OF
+//
+// In all these cases, p and q are local variables. For static field references, we can
+// construct a local variable containing a reference to the static memory.
+//
+// C2 does not have local variables. However for the purposes of constructing
+// the connection graph, the following IR nodes are treated as local variables:
+// Phi (pointer values)
+// LoadP
+// Proj (value returned from callnodes including allocations)
+// CheckCastPP
+//
+// The LoadP, Proj and CheckCastPP behave like variables assigned to only once. Only
+// a Phi can have multiple assignments. Each input to a Phi is treated
+// as an assignment to it.
+//
+// The following note types are JavaObject:
+//
+// top()
+// Allocate
+// AllocateArray
+// Parm (for incoming arguments)
+// CreateEx
+// ConP
+// LoadKlass
+//
+// AddP nodes are fields.
+//
+// After building the graph, a pass is made over the nodes, deleting deferred
+// nodes and copying the edges from the target of the deferred edge to the
+// source. This results in a graph with no deferred edges, only:
+//
+// LV -P> JO
+// OF -P> JO
+// JO -F> OF
+//
+// Then, for each node which is GlobalEscape, anything it could point to
+// is marked GlobalEscape. Finally, for any node marked ArgEscape, anything
+// it could point to is marked ArgEscape.
+//
+
+class Compile;
+class Node;
+class CallNode;
+class PhiNode;
+class PhaseTransform;
+class Type;
+class TypePtr;
+class VectorSet;
+
+class PointsToNode {
+friend class ConnectionGraph;
+public:
+ typedef enum {
+ UnknownType = 0,
+ JavaObject = 1,
+ LocalVar = 2,
+ Field = 3
+ } NodeType;
+
+ typedef enum {
+ UnknownEscape = 0,
+ NoEscape = 1,
+ ArgEscape = 2,
+ GlobalEscape = 3
+ } EscapeState;
+
+ typedef enum {
+ UnknownEdge = 0,
+ PointsToEdge = 1,
+ DeferredEdge = 2,
+ FieldEdge = 3
+ } EdgeType;
+
+private:
+ enum {
+ EdgeMask = 3,
+ EdgeShift = 2,
+
+ INITIAL_EDGE_COUNT = 4
+ };
+
+ NodeType _type;
+ EscapeState _escape;
+ GrowableArray<uint>* _edges; // outgoing edges
+ int _offset; // for fields
+
+ bool _unique_type; // For allocated objects, this node may be a unique type
+public:
+ Node* _node; // Ideal node corresponding to this PointsTo node
+ int _inputs_processed; // the number of Phi inputs that have been processed so far
+ bool _hidden_alias; // this node is an argument to a function which may return it
+ // creating a hidden alias
+
+
+ PointsToNode(): _offset(-1), _type(UnknownType), _escape(UnknownEscape), _edges(NULL), _node(NULL), _inputs_processed(0), _hidden_alias(false), _unique_type(true) {}
+
+ EscapeState escape_state() const { return _escape; }
+ NodeType node_type() const { return _type;}
+ int offset() { return _offset;}
+
+ void set_offset(int offs) { _offset = offs;}
+ void set_escape_state(EscapeState state) { _escape = state; }
+ void set_node_type(NodeType ntype) {
+ assert(_type == UnknownType || _type == ntype, "Can't change node type");
+ _type = ntype;
+ }
+
+ // count of outgoing edges
+ uint edge_count() const { return (_edges == NULL) ? 0 : _edges->length(); }
+ // node index of target of outgoing edge "e"
+ uint edge_target(uint e) const;
+ // type of outgoing edge "e"
+ EdgeType edge_type(uint e) const;
+ // add a edge of the specified type pointing to the specified target
+ void add_edge(uint targIdx, EdgeType et);
+ // remove an edge of the specified type pointing to the specified target
+ void remove_edge(uint targIdx, EdgeType et);
+#ifndef PRODUCT
+ void dump() const;
+#endif
+
+};
+
+class ConnectionGraph: public ResourceObj {
+private:
+ enum {
+ INITIAL_NODE_COUNT = 100 // initial size of _nodes array
+ };
+
+
+ GrowableArray<PointsToNode>* _nodes; // connection graph nodes Indexed by ideal
+ // node index
+ Unique_Node_List _deferred; // Phi's to be processed after parsing
+ VectorSet _processed; // records which nodes have been processed
+ bool _collecting; // indicates whether escape information is
+ // still being collected. If false, no new
+ // nodes will be processed
+ uint _phantom_object; // index of globally escaping object that
+ // pointer values loaded from a field which
+ // has not been set are assumed to point to
+ Compile * _compile; // Compile object for current compilation
+
+ // address of an element in _nodes. Used when the element is to be modified
+ PointsToNode *ptnode_adr(uint idx) {
+ if ((uint)_nodes->length() <= idx) {
+ // expand _nodes array
+ PointsToNode dummy = _nodes->at_grow(idx);
+ }
+ return _nodes->adr_at(idx);
+ }
+
+ // offset of a field reference
+ int type_to_offset(const Type *t);
+
+ // compute the escape state for arguments to a call
+ void process_call_arguments(CallNode *call, PhaseTransform *phase);
+
+ // compute the escape state for the return value of a call
+ void process_call_result(ProjNode *resproj, PhaseTransform *phase);
+
+ // compute the escape state of a Phi. This may be called multiple
+ // times as new inputs are added to the Phi.
+ void process_phi_escape(PhiNode *phi, PhaseTransform *phase);
+
+ // compute the escape state of an ideal node.
+ void record_escape_work(Node *n, PhaseTransform *phase);
+
+ // walk the connection graph starting at the node corresponding to "n" and
+ // add the index of everything it could point to, to "ptset". This may cause
+ // Phi's encountered to get (re)processed (which requires "phase".)
+ void PointsTo(VectorSet &ptset, Node * n, PhaseTransform *phase);
+
+ // Edge manipulation. The "from_i" and "to_i" arguments are the
+ // node indices of the source and destination of the edge
+ void add_pointsto_edge(uint from_i, uint to_i);
+ void add_deferred_edge(uint from_i, uint to_i);
+ void add_field_edge(uint from_i, uint to_i, int offs);
+
+
+ // Add an edge to node given by "to_i" from any field of adr_i whose offset
+ // matches "offset" A deferred edge is added if to_i is a LocalVar, and
+ // a pointsto edge is added if it is a JavaObject
+ void add_edge_from_fields(uint adr, uint to_i, int offs);
+
+ // Add a deferred edge from node given by "from_i" to any field of adr_i whose offset
+ // matches "offset"
+ void add_deferred_edge_to_fields(uint from_i, uint adr, int offs);
+
+
+ // Remove outgoing deferred edges from the node referenced by "ni".
+ // Any outgoing edges from the target of the deferred edge are copied
+ // to "ni".
+ void remove_deferred(uint ni);
+
+ Node_Array _node_map; // used for bookeeping during type splitting
+ // Used for the following purposes:
+ // Memory Phi - most recent unique Phi split out
+ // from this Phi
+ // MemNode - new memory input for this node
+ // ChecCastPP - allocation that this is a cast of
+ // allocation - CheckCastPP of the allocation
+ void split_AddP(Node *addp, Node *base, PhaseGVN *igvn);
+ PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, PhaseGVN *igvn, bool &new_created);
+ PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, PhaseGVN *igvn);
+ Node *find_mem(Node *mem, int alias_idx, PhaseGVN *igvn);
+ // Propagate unique types created for unescaped allocated objects
+ // through the graph
+ void split_unique_types(GrowableArray<Node *> &alloc_worklist);
+
+ // manage entries in _node_map
+ void set_map(int idx, Node *n) { _node_map.map(idx, n); }
+ void set_map_phi(int idx, PhiNode *p) { _node_map.map(idx, (Node *) p); }
+ Node *get_map(int idx) { return _node_map[idx]; }
+ PhiNode *get_map_phi(int idx) {
+ Node *phi = _node_map[idx];
+ return (phi == NULL) ? NULL : phi->as_Phi();
+ }
+
+ // Notify optimizer that a node has been modified
+ // Node: This assumes that escape analysis is run before
+ // PhaseIterGVN creation
+ void record_for_optimizer(Node *n) {
+ _compile->record_for_igvn(n);
+ }
+
+ // Set the escape state of a node
+ void set_escape_state(uint ni, PointsToNode::EscapeState es);
+
+ // bypass any casts and return the node they refer to
+ Node * skip_casts(Node *n);
+
+ // Get Compile object for current compilation.
+ Compile *C() const { return _compile; }
+
+public:
+ ConnectionGraph(Compile *C);
+
+ // record a Phi for later processing.
+ void record_for_escape_analysis(Node *n);
+
+ // process a node and fill in its connection graph node
+ void record_escape(Node *n, PhaseTransform *phase);
+
+ // All nodes have been recorded, compute the escape information
+ void compute_escape();
+
+ // escape state of a node
+ PointsToNode::EscapeState escape_state(Node *n, PhaseTransform *phase);
+
+ bool hidden_alias(Node *n) {
+ if (_collecting)
+ return true;
+ PointsToNode ptn = _nodes->at_grow(n->_idx);
+ return (ptn.escape_state() != PointsToNode::NoEscape) || ptn._hidden_alias;
+ }
+
+#ifndef PRODUCT
+ void dump();
+#endif
+};
diff --git a/src/share/vm/opto/gcm.cpp b/src/share/vm/opto/gcm.cpp
new file mode 100644
index 000000000..88af191a1
--- /dev/null
+++ b/src/share/vm/opto/gcm.cpp
@@ -0,0 +1,1767 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_gcm.cpp.incl"
+
+//----------------------------schedule_node_into_block-------------------------
+// Insert node n into block b. Look for projections of n and make sure they
+// are in b also.
+void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
+ // Set basic block of n, Add n to b,
+ _bbs.map(n->_idx, b);
+ b->add_inst(n);
+
+ // After Matching, nearly any old Node may have projections trailing it.
+ // These are usually machine-dependent flags. In any case, they might
+ // float to another block below this one. Move them up.
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node* use = n->fast_out(i);
+ if (use->is_Proj()) {
+ Block* buse = _bbs[use->_idx];
+ if (buse != b) { // In wrong block?
+ if (buse != NULL)
+ buse->find_remove(use); // Remove from wrong block
+ _bbs.map(use->_idx, b); // Re-insert in this block
+ b->add_inst(use);
+ }
+ }
+ }
+}
+
+
+//------------------------------schedule_pinned_nodes--------------------------
+// Set the basic block for Nodes pinned into blocks
+void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
+ // Allocate node stack of size C->unique()+8 to avoid frequent realloc
+ GrowableArray <Node *> spstack(C->unique()+8);
+ spstack.push(_root);
+ while ( spstack.is_nonempty() ) {
+ Node *n = spstack.pop();
+ if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
+ if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down!
+ Node *input = n->in(0);
+ assert( input, "pinned Node must have Control" );
+ while( !input->is_block_start() )
+ input = input->in(0);
+ Block *b = _bbs[input->_idx]; // Basic block of controlling input
+ schedule_node_into_block(n, b);
+ }
+ for( int i = n->req() - 1; i >= 0; --i ) { // For all inputs
+ if( n->in(i) != NULL )
+ spstack.push(n->in(i));
+ }
+ }
+ }
+}
+
+#ifdef ASSERT
+// Assert that new input b2 is dominated by all previous inputs.
+// Check this by by seeing that it is dominated by b1, the deepest
+// input observed until b2.
+static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) {
+ if (b1 == NULL) return;
+ assert(b1->_dom_depth < b2->_dom_depth, "sanity");
+ Block* tmp = b2;
+ while (tmp != b1 && tmp != NULL) {
+ tmp = tmp->_idom;
+ }
+ if (tmp != b1) {
+ // Detected an unschedulable graph. Print some nice stuff and die.
+ tty->print_cr("!!! Unschedulable graph !!!");
+ for (uint j=0; j<n->len(); j++) { // For all inputs
+ Node* inn = n->in(j); // Get input
+ if (inn == NULL) continue; // Ignore NULL, missing inputs
+ Block* inb = bbs[inn->_idx];
+ tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
+ inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
+ inn->dump();
+ }
+ tty->print("Failing node: ");
+ n->dump();
+ assert(false, "unscheduable graph");
+ }
+}
+#endif
+
+static Block* find_deepest_input(Node* n, Block_Array &bbs) {
+ // Find the last input dominated by all other inputs.
+ Block* deepb = NULL; // Deepest block so far
+ int deepb_dom_depth = 0;
+ for (uint k = 0; k < n->len(); k++) { // For all inputs
+ Node* inn = n->in(k); // Get input
+ if (inn == NULL) continue; // Ignore NULL, missing inputs
+ Block* inb = bbs[inn->_idx];
+ assert(inb != NULL, "must already have scheduled this input");
+ if (deepb_dom_depth < (int) inb->_dom_depth) {
+ // The new inb must be dominated by the previous deepb.
+ // The various inputs must be linearly ordered in the dom
+ // tree, or else there will not be a unique deepest block.
+ DEBUG_ONLY(assert_dom(deepb, inb, n, bbs));
+ deepb = inb; // Save deepest block
+ deepb_dom_depth = deepb->_dom_depth;
+ }
+ }
+ assert(deepb != NULL, "must be at least one input to n");
+ return deepb;
+}
+
+
+//------------------------------schedule_early---------------------------------
+// Find the earliest Block any instruction can be placed in. Some instructions
+// are pinned into Blocks. Unpinned instructions can appear in last block in
+// which all their inputs occur.
+bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
+ // Allocate stack with enough space to avoid frequent realloc
+ Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
+ // roots.push(_root); _root will be processed among C->top() inputs
+ roots.push(C->top());
+ visited.set(C->top()->_idx);
+
+ while (roots.size() != 0) {
+ // Use local variables nstack_top_n & nstack_top_i to cache values
+ // on stack's top.
+ Node *nstack_top_n = roots.pop();
+ uint nstack_top_i = 0;
+//while_nstack_nonempty:
+ while (true) {
+ // Get parent node and next input's index from stack's top.
+ Node *n = nstack_top_n;
+ uint i = nstack_top_i;
+
+ if (i == 0) {
+ // Special control input processing.
+ // While I am here, go ahead and look for Nodes which are taking control
+ // from a is_block_proj Node. After I inserted RegionNodes to make proper
+ // blocks, the control at a is_block_proj more properly comes from the
+ // Region being controlled by the block_proj Node.
+ const Node *in0 = n->in(0);
+ if (in0 != NULL) { // Control-dependent?
+ const Node *p = in0->is_block_proj();
+ if (p != NULL && p != n) { // Control from a block projection?
+ // Find trailing Region
+ Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
+ uint j = 0;
+ if (pb->_num_succs != 1) { // More then 1 successor?
+ // Search for successor
+ uint max = pb->_nodes.size();
+ assert( max > 1, "" );
+ uint start = max - pb->_num_succs;
+ // Find which output path belongs to projection
+ for (j = start; j < max; j++) {
+ if( pb->_nodes[j] == in0 )
+ break;
+ }
+ assert( j < max, "must find" );
+ // Change control to match head of successor basic block
+ j -= start;
+ }
+ n->set_req(0, pb->_succs[j]->head());
+ }
+ } else { // n->in(0) == NULL
+ if (n->req() == 1) { // This guy is a constant with NO inputs?
+ n->set_req(0, _root);
+ }
+ }
+ }
+
+ // First, visit all inputs and force them to get a block. If an
+ // input is already in a block we quit following inputs (to avoid
+ // cycles). Instead we put that Node on a worklist to be handled
+ // later (since IT'S inputs may not have a block yet).
+ bool done = true; // Assume all n's inputs will be processed
+ while (i < n->len()) { // For all inputs
+ Node *in = n->in(i); // Get input
+ ++i;
+ if (in == NULL) continue; // Ignore NULL, missing inputs
+ int is_visited = visited.test_set(in->_idx);
+ if (!_bbs.lookup(in->_idx)) { // Missing block selection?
+ if (is_visited) {
+ // assert( !visited.test(in->_idx), "did not schedule early" );
+ return false;
+ }
+ nstack.push(n, i); // Save parent node and next input's index.
+ nstack_top_n = in; // Process current input now.
+ nstack_top_i = 0;
+ done = false; // Not all n's inputs processed.
+ break; // continue while_nstack_nonempty;
+ } else if (!is_visited) { // Input not yet visited?
+ roots.push(in); // Visit this guy later, using worklist
+ }
+ }
+ if (done) {
+ // All of n's inputs have been processed, complete post-processing.
+
+ // Some instructions are pinned into a block. These include Region,
+ // Phi, Start, Return, and other control-dependent instructions and
+ // any projections which depend on them.
+ if (!n->pinned()) {
+ // Set earliest legal block.
+ _bbs.map(n->_idx, find_deepest_input(n, _bbs));
+ }
+
+ if (nstack.is_empty()) {
+ // Finished all nodes on stack.
+ // Process next node on the worklist 'roots'.
+ break;
+ }
+ // Get saved parent node and next input's index.
+ nstack_top_n = nstack.node();
+ nstack_top_i = nstack.index();
+ nstack.pop();
+ } // if (done)
+ } // while (true)
+ } // while (roots.size() != 0)
+ return true;
+}
+
+//------------------------------dom_lca----------------------------------------
+// Find least common ancestor in dominator tree
+// LCA is a current notion of LCA, to be raised above 'this'.
+// As a convenient boundary condition, return 'this' if LCA is NULL.
+// Find the LCA of those two nodes.
+Block* Block::dom_lca(Block* LCA) {
+ if (LCA == NULL || LCA == this) return this;
+
+ Block* anc = this;
+ while (anc->_dom_depth > LCA->_dom_depth)
+ anc = anc->_idom; // Walk up till anc is as high as LCA
+
+ while (LCA->_dom_depth > anc->_dom_depth)
+ LCA = LCA->_idom; // Walk up till LCA is as high as anc
+
+ while (LCA != anc) { // Walk both up till they are the same
+ LCA = LCA->_idom;
+ anc = anc->_idom;
+ }
+
+ return LCA;
+}
+
+//--------------------------raise_LCA_above_use--------------------------------
+// We are placing a definition, and have been given a def->use edge.
+// The definition must dominate the use, so move the LCA upward in the
+// dominator tree to dominate the use. If the use is a phi, adjust
+// the LCA only with the phi input paths which actually use this def.
+static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) {
+ Block* buse = bbs[use->_idx];
+ if (buse == NULL) return LCA; // Unused killing Projs have no use block
+ if (!use->is_Phi()) return buse->dom_lca(LCA);
+ uint pmax = use->req(); // Number of Phi inputs
+ // Why does not this loop just break after finding the matching input to
+ // the Phi? Well...it's like this. I do not have true def-use/use-def
+ // chains. Means I cannot distinguish, from the def-use direction, which
+ // of many use-defs lead from the same use to the same def. That is, this
+ // Phi might have several uses of the same def. Each use appears in a
+ // different predecessor block. But when I enter here, I cannot distinguish
+ // which use-def edge I should find the predecessor block for. So I find
+ // them all. Means I do a little extra work if a Phi uses the same value
+ // more than once.
+ for (uint j=1; j<pmax; j++) { // For all inputs
+ if (use->in(j) == def) { // Found matching input?
+ Block* pred = bbs[buse->pred(j)->_idx];
+ LCA = pred->dom_lca(LCA);
+ }
+ }
+ return LCA;
+}
+
+//----------------------------raise_LCA_above_marks----------------------------
+// Return a new LCA that dominates LCA and any of its marked predecessors.
+// Search all my parents up to 'early' (exclusive), looking for predecessors
+// which are marked with the given index. Return the LCA (in the dom tree)
+// of all marked blocks. If there are none marked, return the original
+// LCA.
+static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,
+ Block* early, Block_Array &bbs) {
+ Block_List worklist;
+ worklist.push(LCA);
+ while (worklist.size() > 0) {
+ Block* mid = worklist.pop();
+ if (mid == early) continue; // stop searching here
+
+ // Test and set the visited bit.
+ if (mid->raise_LCA_visited() == mark) continue; // already visited
+ mid->set_raise_LCA_visited(mark);
+
+ // Don't process the current LCA, otherwise the search may terminate early
+ if (mid != LCA && mid->raise_LCA_mark() == mark) {
+ // Raise the LCA.
+ LCA = mid->dom_lca(LCA);
+ if (LCA == early) break; // stop searching everywhere
+ assert(early->dominates(LCA), "early is high enough");
+ // Resume searching at that point, skipping intermediate levels.
+ worklist.push(LCA);
+ } else {
+ // Keep searching through this block's predecessors.
+ for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
+ Block* mid_parent = bbs[ mid->pred(j)->_idx ];
+ worklist.push(mid_parent);
+ }
+ }
+ }
+ return LCA;
+}
+
+//--------------------------memory_early_block--------------------------------
+// This is a variation of find_deepest_input, the heart of schedule_early.
+// Find the "early" block for a load, if we considered only memory and
+// address inputs, that is, if other data inputs were ignored.
+//
+// Because a subset of edges are considered, the resulting block will
+// be earlier (at a shallower dom_depth) than the true schedule_early
+// point of the node. We compute this earlier block as a more permissive
+// site for anti-dependency insertion, but only if subsume_loads is enabled.
+static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) {
+ Node* base;
+ Node* index;
+ Node* store = load->in(MemNode::Memory);
+ load->as_Mach()->memory_inputs(base, index);
+
+ assert(base != NodeSentinel && index != NodeSentinel,
+ "unexpected base/index inputs");
+
+ Node* mem_inputs[4];
+ int mem_inputs_length = 0;
+ if (base != NULL) mem_inputs[mem_inputs_length++] = base;
+ if (index != NULL) mem_inputs[mem_inputs_length++] = index;
+ if (store != NULL) mem_inputs[mem_inputs_length++] = store;
+
+ // In the comparision below, add one to account for the control input,
+ // which may be null, but always takes up a spot in the in array.
+ if (mem_inputs_length + 1 < (int) load->req()) {
+ // This "load" has more inputs than just the memory, base and index inputs.
+ // For purposes of checking anti-dependences, we need to start
+ // from the early block of only the address portion of the instruction,
+ // and ignore other blocks that may have factored into the wider
+ // schedule_early calculation.
+ if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0);
+
+ Block* deepb = NULL; // Deepest block so far
+ int deepb_dom_depth = 0;
+ for (int i = 0; i < mem_inputs_length; i++) {
+ Block* inb = bbs[mem_inputs[i]->_idx];
+ if (deepb_dom_depth < (int) inb->_dom_depth) {
+ // The new inb must be dominated by the previous deepb.
+ // The various inputs must be linearly ordered in the dom
+ // tree, or else there will not be a unique deepest block.
+ DEBUG_ONLY(assert_dom(deepb, inb, load, bbs));
+ deepb = inb; // Save deepest block
+ deepb_dom_depth = deepb->_dom_depth;
+ }
+ }
+ early = deepb;
+ }
+
+ return early;
+}
+
+//--------------------------insert_anti_dependences---------------------------
+// A load may need to witness memory that nearby stores can overwrite.
+// For each nearby store, either insert an "anti-dependence" edge
+// from the load to the store, or else move LCA upward to force the
+// load to (eventually) be scheduled in a block above the store.
+//
+// Do not add edges to stores on distinct control-flow paths;
+// only add edges to stores which might interfere.
+//
+// Return the (updated) LCA. There will not be any possibly interfering
+// store between the load's "early block" and the updated LCA.
+// Any stores in the updated LCA will have new precedence edges
+// back to the load. The caller is expected to schedule the load
+// in the LCA, in which case the precedence edges will make LCM
+// preserve anti-dependences. The caller may also hoist the load
+// above the LCA, if it is not the early block.
+Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
+ assert(load->needs_anti_dependence_check(), "must be a load of some sort");
+ assert(LCA != NULL, "");
+ DEBUG_ONLY(Block* LCA_orig = LCA);
+
+ // Compute the alias index. Loads and stores with different alias indices
+ // do not need anti-dependence edges.
+ uint load_alias_idx = C->get_alias_index(load->adr_type());
+#ifdef ASSERT
+ if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 &&
+ (PrintOpto || VerifyAliases ||
+ PrintMiscellaneous && (WizardMode || Verbose))) {
+ // Load nodes should not consume all of memory.
+ // Reporting a bottom type indicates a bug in adlc.
+ // If some particular type of node validly consumes all of memory,
+ // sharpen the preceding "if" to exclude it, so we can catch bugs here.
+ tty->print_cr("*** Possible Anti-Dependence Bug: Load consumes all of memory.");
+ load->dump(2);
+ if (VerifyAliases) assert(load_alias_idx != Compile::AliasIdxBot, "");
+ }
+#endif
+ assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp),
+ "String compare is only known 'load' that does not conflict with any stores");
+
+ if (!C->alias_type(load_alias_idx)->is_rewritable()) {
+ // It is impossible to spoil this load by putting stores before it,
+ // because we know that the stores will never update the value
+ // which 'load' must witness.
+ return LCA;
+ }
+
+ node_idx_t load_index = load->_idx;
+
+ // Note the earliest legal placement of 'load', as determined by
+ // by the unique point in the dom tree where all memory effects
+ // and other inputs are first available. (Computed by schedule_early.)
+ // For normal loads, 'early' is the shallowest place (dom graph wise)
+ // to look for anti-deps between this load and any store.
+ Block* early = _bbs[load_index];
+
+ // If we are subsuming loads, compute an "early" block that only considers
+ // memory or address inputs. This block may be different than the
+ // schedule_early block in that it could be at an even shallower depth in the
+ // dominator tree, and allow for a broader discovery of anti-dependences.
+ if (C->subsume_loads()) {
+ early = memory_early_block(load, early, _bbs);
+ }
+
+ ResourceArea *area = Thread::current()->resource_area();
+ Node_List worklist_mem(area); // prior memory state to store
+ Node_List worklist_store(area); // possible-def to explore
+ Node_List non_early_stores(area); // all relevant stores outside of early
+ bool must_raise_LCA = false;
+ DEBUG_ONLY(VectorSet should_not_repeat(area));
+
+#ifdef TRACK_PHI_INPUTS
+ // %%% This extra checking fails because MergeMem nodes are not GVNed.
+ // Provide "phi_inputs" to check if every input to a PhiNode is from the
+ // original memory state. This indicates a PhiNode for which should not
+ // prevent the load from sinking. For such a block, set_raise_LCA_mark
+ // may be overly conservative.
+ // Mechanism: count inputs seen for each Phi encountered in worklist_store.
+ DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0));
+#endif
+
+ // 'load' uses some memory state; look for users of the same state.
+ // Recurse through MergeMem nodes to the stores that use them.
+
+ // Each of these stores is a possible definition of memory
+ // that 'load' needs to use. We need to force 'load'
+ // to occur before each such store. When the store is in
+ // the same block as 'load', we insert an anti-dependence
+ // edge load->store.
+
+ // The relevant stores "nearby" the load consist of a tree rooted
+ // at initial_mem, with internal nodes of type MergeMem.
+ // Therefore, the branches visited by the worklist are of this form:
+ // initial_mem -> (MergeMem ->)* store
+ // The anti-dependence constraints apply only to the fringe of this tree.
+
+ Node* initial_mem = load->in(MemNode::Memory);
+ worklist_store.push(initial_mem);
+ worklist_mem.push(NULL);
+ DEBUG_ONLY(should_not_repeat.test_set(initial_mem->_idx));
+ while (worklist_store.size() > 0) {
+ // Examine a nearby store to see if it might interfere with our load.
+ Node* mem = worklist_mem.pop();
+ Node* store = worklist_store.pop();
+ uint op = store->Opcode();
+
+ // MergeMems do not directly have anti-deps.
+ // Treat them as internal nodes in a forward tree of memory states,
+ // the leaves of which are each a 'possible-def'.
+ if (store == initial_mem // root (exclusive) of tree we are searching
+ || op == Op_MergeMem // internal node of tree we are searching
+ ) {
+ mem = store; // It's not a possibly interfering store.
+ for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+ store = mem->fast_out(i);
+ if (store->is_MergeMem()) {
+ // Be sure we don't get into combinatorial problems.
+ // (Allow phis to be repeated; they can merge two relevant states.)
+ uint i = worklist_store.size();
+ for (; i > 0; i--) {
+ if (worklist_store.at(i-1) == store) break;
+ }
+ if (i > 0) continue; // already on work list; do not repeat
+ DEBUG_ONLY(int repeated = should_not_repeat.test_set(store->_idx));
+ assert(!repeated, "do not walk merges twice");
+ }
+ worklist_mem.push(mem);
+ worklist_store.push(store);
+ }
+ continue;
+ }
+
+ if (op == Op_MachProj || op == Op_Catch) continue;
+ if (store->needs_anti_dependence_check()) continue; // not really a store
+
+ // Compute the alias index. Loads and stores with different alias
+ // indices do not need anti-dependence edges. Wide MemBar's are
+ // anti-dependent on everything (except immutable memories).
+ const TypePtr* adr_type = store->adr_type();
+ if (!C->can_alias(adr_type, load_alias_idx)) continue;
+
+ // Most slow-path runtime calls do NOT modify Java memory, but
+ // they can block and so write Raw memory.
+ if (store->is_Mach()) {
+ MachNode* mstore = store->as_Mach();
+ if (load_alias_idx != Compile::AliasIdxRaw) {
+ // Check for call into the runtime using the Java calling
+ // convention (and from there into a wrapper); it has no
+ // _method. Can't do this optimization for Native calls because
+ // they CAN write to Java memory.
+ if (mstore->ideal_Opcode() == Op_CallStaticJava) {
+ assert(mstore->is_MachSafePoint(), "");
+ MachSafePointNode* ms = (MachSafePointNode*) mstore;
+ assert(ms->is_MachCallJava(), "");
+ MachCallJavaNode* mcj = (MachCallJavaNode*) ms;
+ if (mcj->_method == NULL) {
+ // These runtime calls do not write to Java visible memory
+ // (other than Raw) and so do not require anti-dependence edges.
+ continue;
+ }
+ }
+ // Same for SafePoints: they read/write Raw but only read otherwise.
+ // This is basically a workaround for SafePoints only defining control
+ // instead of control + memory.
+ if (mstore->ideal_Opcode() == Op_SafePoint)
+ continue;
+ } else {
+ // Some raw memory, such as the load of "top" at an allocation,
+ // can be control dependent on the previous safepoint. See
+ // comments in GraphKit::allocate_heap() about control input.
+ // Inserting an anti-dep between such a safepoint and a use
+ // creates a cycle, and will cause a subsequent failure in
+ // local scheduling. (BugId 4919904)
+ // (%%% How can a control input be a safepoint and not a projection??)
+ if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore)
+ continue;
+ }
+ }
+
+ // Identify a block that the current load must be above,
+ // or else observe that 'store' is all the way up in the
+ // earliest legal block for 'load'. In the latter case,
+ // immediately insert an anti-dependence edge.
+ Block* store_block = _bbs[store->_idx];
+ assert(store_block != NULL, "unused killing projections skipped above");
+
+ if (store->is_Phi()) {
+ // 'load' uses memory which is one (or more) of the Phi's inputs.
+ // It must be scheduled not before the Phi, but rather before
+ // each of the relevant Phi inputs.
+ //
+ // Instead of finding the LCA of all inputs to a Phi that match 'mem',
+ // we mark each corresponding predecessor block and do a combined
+ // hoisting operation later (raise_LCA_above_marks).
+ //
+ // Do not assert(store_block != early, "Phi merging memory after access")
+ // PhiNode may be at start of block 'early' with backedge to 'early'
+ DEBUG_ONLY(bool found_match = false);
+ for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
+ if (store->in(j) == mem) { // Found matching input?
+ DEBUG_ONLY(found_match = true);
+ Block* pred_block = _bbs[store_block->pred(j)->_idx];
+ if (pred_block != early) {
+ // If any predecessor of the Phi matches the load's "early block",
+ // we do not need a precedence edge between the Phi and 'load'
+ // since the load will be forced into a block preceeding the Phi.
+ pred_block->set_raise_LCA_mark(load_index);
+ assert(!LCA_orig->dominates(pred_block) ||
+ early->dominates(pred_block), "early is high enough");
+ must_raise_LCA = true;
+ }
+ }
+ }
+ assert(found_match, "no worklist bug");
+#ifdef TRACK_PHI_INPUTS
+#ifdef ASSERT
+ // This assert asks about correct handling of PhiNodes, which may not
+ // have all input edges directly from 'mem'. See BugId 4621264
+ int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1;
+ // Increment by exactly one even if there are multiple copies of 'mem'
+ // coming into the phi, because we will run this block several times
+ // if there are several copies of 'mem'. (That's how DU iterators work.)
+ phi_inputs.at_put(store->_idx, num_mem_inputs);
+ assert(PhiNode::Input + num_mem_inputs < store->req(),
+ "Expect at least one phi input will not be from original memory state");
+#endif //ASSERT
+#endif //TRACK_PHI_INPUTS
+ } else if (store_block != early) {
+ // 'store' is between the current LCA and earliest possible block.
+ // Label its block, and decide later on how to raise the LCA
+ // to include the effect on LCA of this store.
+ // If this store's block gets chosen as the raised LCA, we
+ // will find him on the non_early_stores list and stick him
+ // with a precedence edge.
+ // (But, don't bother if LCA is already raised all the way.)
+ if (LCA != early) {
+ store_block->set_raise_LCA_mark(load_index);
+ must_raise_LCA = true;
+ non_early_stores.push(store);
+ }
+ } else {
+ // Found a possibly-interfering store in the load's 'early' block.
+ // This means 'load' cannot sink at all in the dominator tree.
+ // Add an anti-dep edge, and squeeze 'load' into the highest block.
+ assert(store != load->in(0), "dependence cycle found");
+ if (verify) {
+ assert(store->find_edge(load) != -1, "missing precedence edge");
+ } else {
+ store->add_prec(load);
+ }
+ LCA = early;
+ // This turns off the process of gathering non_early_stores.
+ }
+ }
+ // (Worklist is now empty; all nearby stores have been visited.)
+
+ // Finished if 'load' must be scheduled in its 'early' block.
+ // If we found any stores there, they have already been given
+ // precedence edges.
+ if (LCA == early) return LCA;
+
+ // We get here only if there are no possibly-interfering stores
+ // in the load's 'early' block. Move LCA up above all predecessors
+ // which contain stores we have noted.
+ //
+ // The raised LCA block can be a home to such interfering stores,
+ // but its predecessors must not contain any such stores.
+ //
+ // The raised LCA will be a lower bound for placing the load,
+ // preventing the load from sinking past any block containing
+ // a store that may invalidate the memory state required by 'load'.
+ if (must_raise_LCA)
+ LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs);
+ if (LCA == early) return LCA;
+
+ // Insert anti-dependence edges from 'load' to each store
+ // in the non-early LCA block.
+ // Mine the non_early_stores list for such stores.
+ if (LCA->raise_LCA_mark() == load_index) {
+ while (non_early_stores.size() > 0) {
+ Node* store = non_early_stores.pop();
+ Block* store_block = _bbs[store->_idx];
+ if (store_block == LCA) {
+ // add anti_dependence from store to load in its own block
+ assert(store != load->in(0), "dependence cycle found");
+ if (verify) {
+ assert(store->find_edge(load) != -1, "missing precedence edge");
+ } else {
+ store->add_prec(load);
+ }
+ } else {
+ assert(store_block->raise_LCA_mark() == load_index, "block was marked");
+ // Any other stores we found must be either inside the new LCA
+ // or else outside the original LCA. In the latter case, they
+ // did not interfere with any use of 'load'.
+ assert(LCA->dominates(store_block)
+ || !LCA_orig->dominates(store_block), "no stray stores");
+ }
+ }
+ }
+
+ // Return the highest block containing stores; any stores
+ // within that block have been given anti-dependence edges.
+ return LCA;
+}
+
+// This class is used to iterate backwards over the nodes in the graph.
+
+class Node_Backward_Iterator {
+
+private:
+ Node_Backward_Iterator();
+
+public:
+ // Constructor for the iterator
+ Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs);
+
+ // Postincrement operator to iterate over the nodes
+ Node *next();
+
+private:
+ VectorSet &_visited;
+ Node_List &_stack;
+ Block_Array &_bbs;
+};
+
+// Constructor for the Node_Backward_Iterator
+Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs )
+ : _visited(visited), _stack(stack), _bbs(bbs) {
+ // The stack should contain exactly the root
+ stack.clear();
+ stack.push(root);
+
+ // Clear the visited bits
+ visited.Clear();
+}
+
+// Iterator for the Node_Backward_Iterator
+Node *Node_Backward_Iterator::next() {
+
+ // If the _stack is empty, then just return NULL: finished.
+ if ( !_stack.size() )
+ return NULL;
+
+ // '_stack' is emulating a real _stack. The 'visit-all-users' loop has been
+ // made stateless, so I do not need to record the index 'i' on my _stack.
+ // Instead I visit all users each time, scanning for unvisited users.
+ // I visit unvisited not-anti-dependence users first, then anti-dependent
+ // children next.
+ Node *self = _stack.pop();
+
+ // I cycle here when I am entering a deeper level of recursion.
+ // The key variable 'self' was set prior to jumping here.
+ while( 1 ) {
+
+ _visited.set(self->_idx);
+
+ // Now schedule all uses as late as possible.
+ uint src = self->is_Proj() ? self->in(0)->_idx : self->_idx;
+ uint src_rpo = _bbs[src]->_rpo;
+
+ // Schedule all nodes in a post-order visit
+ Node *unvisited = NULL; // Unvisited anti-dependent Node, if any
+
+ // Scan for unvisited nodes
+ for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
+ // For all uses, schedule late
+ Node* n = self->fast_out(i); // Use
+
+ // Skip already visited children
+ if ( _visited.test(n->_idx) )
+ continue;
+
+ // do not traverse backward control edges
+ Node *use = n->is_Proj() ? n->in(0) : n;
+ uint use_rpo = _bbs[use->_idx]->_rpo;
+
+ if ( use_rpo < src_rpo )
+ continue;
+
+ // Phi nodes always precede uses in a basic block
+ if ( use_rpo == src_rpo && use->is_Phi() )
+ continue;
+
+ unvisited = n; // Found unvisited
+
+ // Check for possible-anti-dependent
+ if( !n->needs_anti_dependence_check() )
+ break; // Not visited, not anti-dep; schedule it NOW
+ }
+
+ // Did I find an unvisited not-anti-dependent Node?
+ if ( !unvisited )
+ break; // All done with children; post-visit 'self'
+
+ // Visit the unvisited Node. Contains the obvious push to
+ // indicate I'm entering a deeper level of recursion. I push the
+ // old state onto the _stack and set a new state and loop (recurse).
+ _stack.push(self);
+ self = unvisited;
+ } // End recursion loop
+
+ return self;
+}
+
+//------------------------------ComputeLatenciesBackwards----------------------
+// Compute the latency of all the instructions.
+void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
+#ifndef PRODUCT
+ if (trace_opto_pipelining())
+ tty->print("\n#---- ComputeLatenciesBackwards ----\n");
+#endif
+
+ Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+ Node *n;
+
+ // Walk over all the nodes from last to first
+ while (n = iter.next()) {
+ // Set the latency for the definitions of this instruction
+ partial_latency_of_defs(n);
+ }
+} // end ComputeLatenciesBackwards
+
+//------------------------------partial_latency_of_defs------------------------
+// Compute the latency impact of this node on all defs. This computes
+// a number that increases as we approach the beginning of the routine.
+void PhaseCFG::partial_latency_of_defs(Node *n) {
+ // Set the latency for this instruction
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
+ n->_idx, _node_latency.at_grow(n->_idx));
+ dump();
+ }
+#endif
+
+ if (n->is_Proj())
+ n = n->in(0);
+
+ if (n->is_Root())
+ return;
+
+ uint nlen = n->len();
+ uint use_latency = _node_latency.at_grow(n->_idx);
+ uint use_pre_order = _bbs[n->_idx]->_pre_order;
+
+ for ( uint j=0; j<nlen; j++ ) {
+ Node *def = n->in(j);
+
+ if (!def || def == n)
+ continue;
+
+ // Walk backwards thru projections
+ if (def->is_Proj())
+ def = def->in(0);
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("# in(%2d): ", j);
+ def->dump();
+ }
+#endif
+
+ // If the defining block is not known, assume it is ok
+ Block *def_block = _bbs[def->_idx];
+ uint def_pre_order = def_block ? def_block->_pre_order : 0;
+
+ if ( (use_pre_order < def_pre_order) ||
+ (use_pre_order == def_pre_order && n->is_Phi()) )
+ continue;
+
+ uint delta_latency = n->latency(j);
+ uint current_latency = delta_latency + use_latency;
+
+ if (_node_latency.at_grow(def->_idx) < current_latency) {
+ _node_latency.at_put_grow(def->_idx, current_latency);
+ }
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print_cr("# %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
+ use_latency, j, delta_latency, current_latency, def->_idx,
+ _node_latency.at_grow(def->_idx));
+ }
+#endif
+ }
+}
+
+//------------------------------latency_from_use-------------------------------
+// Compute the latency of a specific use
+int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
+ // If self-reference, return no latency
+ if (use == n || use->is_Root())
+ return 0;
+
+ uint def_pre_order = _bbs[def->_idx]->_pre_order;
+ uint latency = 0;
+
+ // If the use is not a projection, then it is simple...
+ if (!use->is_Proj()) {
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("# out(): ");
+ use->dump();
+ }
+#endif
+
+ uint use_pre_order = _bbs[use->_idx]->_pre_order;
+
+ if (use_pre_order < def_pre_order)
+ return 0;
+
+ if (use_pre_order == def_pre_order && use->is_Phi())
+ return 0;
+
+ uint nlen = use->len();
+ uint nl = _node_latency.at_grow(use->_idx);
+
+ for ( uint j=0; j<nlen; j++ ) {
+ if (use->in(j) == n) {
+ // Change this if we want local latencies
+ uint ul = use->latency(j);
+ uint l = ul + nl;
+ if (latency < l) latency = l;
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print_cr("# %d + edge_latency(%d) == %d -> %d, latency = %d",
+ nl, j, ul, l, latency);
+ }
+#endif
+ }
+ }
+ } else {
+ // This is a projection, just grab the latency of the use(s)
+ for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
+ uint l = latency_from_use(use, def, use->fast_out(j));
+ if (latency < l) latency = l;
+ }
+ }
+
+ return latency;
+}
+
+//------------------------------latency_from_uses------------------------------
+// Compute the latency of this instruction relative to all of it's uses.
+// This computes a number that increases as we approach the beginning of the
+// routine.
+void PhaseCFG::latency_from_uses(Node *n) {
+ // Set the latency for this instruction
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
+ n->_idx, _node_latency.at_grow(n->_idx));
+ dump();
+ }
+#endif
+ uint latency=0;
+ const Node *def = n->is_Proj() ? n->in(0): n;
+
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ uint l = latency_from_use(n, def, n->fast_out(i));
+
+ if (latency < l) latency = l;
+ }
+
+ _node_latency.at_put_grow(n->_idx, latency);
+}
+
+//------------------------------hoist_to_cheaper_block-------------------------
+// Pick a block for node self, between early and LCA, that is a cheaper
+// alternative to LCA.
+Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
+ const double delta = 1+PROB_UNLIKELY_MAG(4);
+ Block* least = LCA;
+ double least_freq = least->_freq;
+ uint target = _node_latency.at_grow(self->_idx);
+ uint start_latency = _node_latency.at_grow(LCA->_nodes[0]->_idx);
+ uint end_latency = _node_latency.at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
+ bool in_latency = (target <= start_latency);
+ const Block* root_block = _bbs[_root->_idx];
+
+ // Turn off latency scheduling if scheduling is just plain off
+ if (!C->do_scheduling())
+ in_latency = true;
+
+ // Do not hoist (to cover latency) instructions which target a
+ // single register. Hoisting stretches the live range of the
+ // single register and may force spilling.
+ MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
+ if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
+ in_latency = true;
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("# Find cheaper block for latency %d: ",
+ _node_latency.at_grow(self->_idx));
+ self->dump();
+ tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
+ LCA->_pre_order,
+ LCA->_nodes[0]->_idx,
+ start_latency,
+ LCA->_nodes[LCA->end_idx()]->_idx,
+ end_latency,
+ least_freq);
+ }
+#endif
+
+ // Walk up the dominator tree from LCA (Lowest common ancestor) to
+ // the earliest legal location. Capture the least execution frequency.
+ while (LCA != early) {
+ LCA = LCA->_idom; // Follow up the dominator tree
+
+ if (LCA == NULL) {
+ // Bailout without retry
+ C->record_method_not_compilable("late schedule failed: LCA == NULL");
+ return least;
+ }
+
+ // Don't hoist machine instructions to the root basic block
+ if (mach && LCA == root_block)
+ break;
+
+ uint start_lat = _node_latency.at_grow(LCA->_nodes[0]->_idx);
+ uint end_idx = LCA->end_idx();
+ uint end_lat = _node_latency.at_grow(LCA->_nodes[end_idx]->_idx);
+ double LCA_freq = LCA->_freq;
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print_cr("# B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
+ LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
+ }
+#endif
+ if (LCA_freq < least_freq || // Better Frequency
+ ( !in_latency && // No block containing latency
+ LCA_freq < least_freq * delta && // No worse frequency
+ target >= end_lat && // within latency range
+ !self->is_iteratively_computed() ) // But don't hoist IV increments
+ // because they may end up above other uses of their phi forcing
+ // their result register to be different from their input.
+ ) {
+ least = LCA; // Found cheaper block
+ least_freq = LCA_freq;
+ start_latency = start_lat;
+ end_latency = end_lat;
+ if (target <= start_lat)
+ in_latency = true;
+ }
+ }
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print_cr("# Choose block B%d with start latency=%d and freq=%g",
+ least->_pre_order, start_latency, least_freq);
+ }
+#endif
+
+ // See if the latency needs to be updated
+ if (target < end_latency) {
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print_cr("# Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
+ }
+#endif
+ _node_latency.at_put_grow(self->_idx, end_latency);
+ partial_latency_of_defs(self);
+ }
+
+ return least;
+}
+
+
+//------------------------------schedule_late-----------------------------------
+// Now schedule all codes as LATE as possible. This is the LCA in the
+// dominator tree of all USES of a value. Pick the block with the least
+// loop nesting depth that is lowest in the dominator tree.
+extern const char must_clone[];
+void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
+#ifndef PRODUCT
+ if (trace_opto_pipelining())
+ tty->print("\n#---- schedule_late ----\n");
+#endif
+
+ Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+ Node *self;
+
+ // Walk over all the nodes from last to first
+ while (self = iter.next()) {
+ Block* early = _bbs[self->_idx]; // Earliest legal placement
+
+ if (self->is_top()) {
+ // Top node goes in bb #2 with other constants.
+ // It must be special-cased, because it has no out edges.
+ early->add_inst(self);
+ continue;
+ }
+
+ // No uses, just terminate
+ if (self->outcnt() == 0) {
+ assert(self->Opcode() == Op_MachProj, "sanity");
+ continue; // Must be a dead machine projection
+ }
+
+ // If node is pinned in the block, then no scheduling can be done.
+ if( self->pinned() ) // Pinned in block?
+ continue;
+
+ MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
+ if (mach) {
+ switch (mach->ideal_Opcode()) {
+ case Op_CreateEx:
+ // Don't move exception creation
+ early->add_inst(self);
+ continue;
+ break;
+ case Op_CheckCastPP:
+ // Don't move CheckCastPP nodes away from their input, if the input
+ // is a rawptr (5071820).
+ Node *def = self->in(1);
+ if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
+ early->add_inst(self);
+ continue;
+ }
+ break;
+ }
+ }
+
+ // Gather LCA of all uses
+ Block *LCA = NULL;
+ {
+ for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
+ // For all uses, find LCA
+ Node* use = self->fast_out(i);
+ LCA = raise_LCA_above_use(LCA, use, self, _bbs);
+ }
+ } // (Hide defs of imax, i from rest of block.)
+
+ // Place temps in the block of their use. This isn't a
+ // requirement for correctness but it reduces useless
+ // interference between temps and other nodes.
+ if (mach != NULL && mach->is_MachTemp()) {
+ _bbs.map(self->_idx, LCA);
+ LCA->add_inst(self);
+ continue;
+ }
+
+ // Check if 'self' could be anti-dependent on memory
+ if (self->needs_anti_dependence_check()) {
+ // Hoist LCA above possible-defs and insert anti-dependences to
+ // defs in new LCA block.
+ LCA = insert_anti_dependences(LCA, self);
+ }
+
+ if (early->_dom_depth > LCA->_dom_depth) {
+ // Somehow the LCA has moved above the earliest legal point.
+ // (One way this can happen is via memory_early_block.)
+ if (C->subsume_loads() == true && !C->failing()) {
+ // Retry with subsume_loads == false
+ // If this is the first failure, the sentinel string will "stick"
+ // to the Compile object, and the C2Compiler will see it and retry.
+ C->record_failure(C2Compiler::retry_no_subsuming_loads());
+ } else {
+ // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
+ C->record_method_not_compilable("late schedule failed: incorrect graph");
+ }
+ return;
+ }
+
+ // If there is no opportunity to hoist, then we're done.
+ bool try_to_hoist = (LCA != early);
+
+ // Must clone guys stay next to use; no hoisting allowed.
+ // Also cannot hoist guys that alter memory or are otherwise not
+ // allocatable (hoisting can make a value live longer, leading to
+ // anti and output dependency problems which are normally resolved
+ // by the register allocator giving everyone a different register).
+ if (mach != NULL && must_clone[mach->ideal_Opcode()])
+ try_to_hoist = false;
+
+ Block* late = NULL;
+ if (try_to_hoist) {
+ // Now find the block with the least execution frequency.
+ // Start at the latest schedule and work up to the earliest schedule
+ // in the dominator tree. Thus the Node will dominate all its uses.
+ late = hoist_to_cheaper_block(LCA, early, self);
+ } else {
+ // Just use the LCA of the uses.
+ late = LCA;
+ }
+
+ // Put the node into target block
+ schedule_node_into_block(self, late);
+
+#ifdef ASSERT
+ if (self->needs_anti_dependence_check()) {
+ // since precedence edges are only inserted when we're sure they
+ // are needed make sure that after placement in a block we don't
+ // need any new precedence edges.
+ verify_anti_dependences(late, self);
+ }
+#endif
+ } // Loop until all nodes have been visited
+
+} // end ScheduleLate
+
+//------------------------------GlobalCodeMotion-------------------------------
+void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
+ ResourceMark rm;
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("\n---- Start GlobalCodeMotion ----\n");
+ }
+#endif
+
+ // Initialize the bbs.map for things on the proj_list
+ uint i;
+ for( i=0; i < proj_list.size(); i++ )
+ _bbs.map(proj_list[i]->_idx, NULL);
+
+ // Set the basic block for Nodes pinned into blocks
+ Arena *a = Thread::current()->resource_area();
+ VectorSet visited(a);
+ schedule_pinned_nodes( visited );
+
+ // Find the earliest Block any instruction can be placed in. Some
+ // instructions are pinned into Blocks. Unpinned instructions can
+ // appear in last block in which all their inputs occur.
+ visited.Clear();
+ Node_List stack(a);
+ stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
+ if (!schedule_early(visited, stack)) {
+ // Bailout without retry
+ C->record_method_not_compilable("early schedule failed");
+ return;
+ }
+
+ // Build Def-Use edges.
+ proj_list.push(_root); // Add real root as another root
+ proj_list.pop();
+
+ // Compute the latency information (via backwards walk) for all the
+ // instructions in the graph
+ GrowableArray<uint> node_latency;
+ _node_latency = node_latency;
+
+ if( C->do_scheduling() )
+ ComputeLatenciesBackwards(visited, stack);
+
+ // Now schedule all codes as LATE as possible. This is the LCA in the
+ // dominator tree of all USES of a value. Pick the block with the least
+ // loop nesting depth that is lowest in the dominator tree.
+ // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
+ schedule_late(visited, stack);
+ if( C->failing() ) {
+ // schedule_late fails only when graph is incorrect.
+ assert(!VerifyGraphEdges, "verification should have failed");
+ return;
+ }
+
+ unique = C->unique();
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("\n---- Detect implicit null checks ----\n");
+ }
+#endif
+
+ // Detect implicit-null-check opportunities. Basically, find NULL checks
+ // with suitable memory ops nearby. Use the memory op to do the NULL check.
+ // I can generate a memory op if there is not one nearby.
+ if (C->is_method_compilation()) {
+ // Don't do it for natives, adapters, or runtime stubs
+ int allowed_reasons = 0;
+ // ...and don't do it when there have been too many traps, globally.
+ for (int reason = (int)Deoptimization::Reason_none+1;
+ reason < Compile::trapHistLength; reason++) {
+ assert(reason < BitsPerInt, "recode bit map");
+ if (!C->too_many_traps((Deoptimization::DeoptReason) reason))
+ allowed_reasons |= nth_bit(reason);
+ }
+ // By reversing the loop direction we get a very minor gain on mpegaudio.
+ // Feel free to revert to a forward loop for clarity.
+ // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
+ for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
+ Node *proj = matcher._null_check_tests[i ];
+ Node *val = matcher._null_check_tests[i+1];
+ _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons);
+ // The implicit_null_check will only perform the transformation
+ // if the null branch is truly uncommon, *and* it leads to an
+ // uncommon trap. Combined with the too_many_traps guards
+ // above, this prevents SEGV storms reported in 6366351,
+ // by recompiling offending methods without this optimization.
+ }
+ }
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("\n---- Start Local Scheduling ----\n");
+ }
+#endif
+
+ // Schedule locally. Right now a simple topological sort.
+ // Later, do a real latency aware scheduler.
+ int *ready_cnt = NEW_RESOURCE_ARRAY(int,C->unique());
+ memset( ready_cnt, -1, C->unique() * sizeof(int) );
+ visited.Clear();
+ for (i = 0; i < _num_blocks; i++) {
+ if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
+ if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
+ C->record_method_not_compilable("local schedule failed");
+ }
+ return;
+ }
+ }
+
+ // If we inserted any instructions between a Call and his CatchNode,
+ // clone the instructions on all paths below the Catch.
+ for( i=0; i < _num_blocks; i++ )
+ _blocks[i]->call_catch_cleanup(_bbs);
+
+#ifndef PRODUCT
+ if (trace_opto_pipelining()) {
+ tty->print("\n---- After GlobalCodeMotion ----\n");
+ for (uint i = 0; i < _num_blocks; i++) {
+ _blocks[i]->dump();
+ }
+ }
+#endif
+}
+
+
+//------------------------------Estimate_Block_Frequency-----------------------
+// Estimate block frequencies based on IfNode probabilities.
+void PhaseCFG::Estimate_Block_Frequency() {
+ int cnts = C->method() ? C->method()->interpreter_invocation_count() : 1;
+ // Most of our algorithms will die horribly if frequency can become
+ // negative so make sure cnts is a sane value.
+ if( cnts <= 0 ) cnts = 1;
+ float f = (float)cnts/(float)FreqCountInvocations;
+
+ // Create the loop tree and calculate loop depth.
+ _root_loop = create_loop_tree();
+ _root_loop->compute_loop_depth(0);
+
+ // Compute block frequency of each block, relative to a single loop entry.
+ _root_loop->compute_freq();
+
+ // Adjust all frequencies to be relative to a single method entry
+ _root_loop->_freq = f * 1.0;
+ _root_loop->scale_freq();
+
+ // force paths ending at uncommon traps to be infrequent
+ Block_List worklist;
+ Block* root_blk = _blocks[0];
+ for (uint i = 0; i < root_blk->num_preds(); i++) {
+ Block *pb = _bbs[root_blk->pred(i)->_idx];
+ if (pb->has_uncommon_code()) {
+ worklist.push(pb);
+ }
+ }
+ while (worklist.size() > 0) {
+ Block* uct = worklist.pop();
+ uct->_freq = PROB_MIN;
+ for (uint i = 0; i < uct->num_preds(); i++) {
+ Block *pb = _bbs[uct->pred(i)->_idx];
+ if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
+ worklist.push(pb);
+ }
+ }
+ }
+
+#ifndef PRODUCT
+ if (PrintCFGBlockFreq) {
+ tty->print_cr("CFG Block Frequencies");
+ _root_loop->dump_tree();
+ if (Verbose) {
+ tty->print_cr("PhaseCFG dump");
+ dump();
+ tty->print_cr("Node dump");
+ _root->dump(99999);
+ }
+ }
+#endif
+}
+
+//----------------------------create_loop_tree--------------------------------
+// Create a loop tree from the CFG
+CFGLoop* PhaseCFG::create_loop_tree() {
+
+#ifdef ASSERT
+ assert( _blocks[0] == _broot, "" );
+ for (uint i = 0; i < _num_blocks; i++ ) {
+ Block *b = _blocks[i];
+ // Check that _loop field are clear...we could clear them if not.
+ assert(b->_loop == NULL, "clear _loop expected");
+ // Sanity check that the RPO numbering is reflected in the _blocks array.
+ // It doesn't have to be for the loop tree to be built, but if it is not,
+ // then the blocks have been reordered since dom graph building...which
+ // may question the RPO numbering
+ assert(b->_rpo == i, "unexpected reverse post order number");
+ }
+#endif
+
+ int idct = 0;
+ CFGLoop* root_loop = new CFGLoop(idct++);
+
+ Block_List worklist;
+
+ // Assign blocks to loops
+ for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
+ Block *b = _blocks[i];
+
+ if (b->head()->is_Loop()) {
+ Block* loop_head = b;
+ assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
+ Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
+ Block* tail = _bbs[tail_n->_idx];
+
+ // Defensively filter out Loop nodes for non-single-entry loops.
+ // For all reasonable loops, the head occurs before the tail in RPO.
+ if (i <= tail->_rpo) {
+
+ // The tail and (recursive) predecessors of the tail
+ // are made members of a new loop.
+
+ assert(worklist.size() == 0, "nonempty worklist");
+ CFGLoop* nloop = new CFGLoop(idct++);
+ assert(loop_head->_loop == NULL, "just checking");
+ loop_head->_loop = nloop;
+ // Add to nloop so push_pred() will skip over inner loops
+ nloop->add_member(loop_head);
+ nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs);
+
+ while (worklist.size() > 0) {
+ Block* member = worklist.pop();
+ if (member != loop_head) {
+ for (uint j = 1; j < member->num_preds(); j++) {
+ nloop->push_pred(member, j, worklist, _bbs);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Create a member list for each loop consisting
+ // of both blocks and (immediate child) loops.
+ for (uint i = 0; i < _num_blocks; i++) {
+ Block *b = _blocks[i];
+ CFGLoop* lp = b->_loop;
+ if (lp == NULL) {
+ // Not assigned to a loop. Add it to the method's pseudo loop.
+ b->_loop = root_loop;
+ lp = root_loop;
+ }
+ if (lp == root_loop || b != lp->head()) { // loop heads are already members
+ lp->add_member(b);
+ }
+ if (lp != root_loop) {
+ if (lp->parent() == NULL) {
+ // Not a nested loop. Make it a child of the method's pseudo loop.
+ root_loop->add_nested_loop(lp);
+ }
+ if (b == lp->head()) {
+ // Add nested loop to member list of parent loop.
+ lp->parent()->add_member(lp);
+ }
+ }
+ }
+
+ return root_loop;
+}
+
+//------------------------------push_pred--------------------------------------
+void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) {
+ Node* pred_n = blk->pred(i);
+ Block* pred = node_to_blk[pred_n->_idx];
+ CFGLoop *pred_loop = pred->_loop;
+ if (pred_loop == NULL) {
+ // Filter out blocks for non-single-entry loops.
+ // For all reasonable loops, the head occurs before the tail in RPO.
+ if (pred->_rpo > head()->_rpo) {
+ pred->_loop = this;
+ worklist.push(pred);
+ }
+ } else if (pred_loop != this) {
+ // Nested loop.
+ while (pred_loop->_parent != NULL && pred_loop->_parent != this) {
+ pred_loop = pred_loop->_parent;
+ }
+ // Make pred's loop be a child
+ if (pred_loop->_parent == NULL) {
+ add_nested_loop(pred_loop);
+ // Continue with loop entry predecessor.
+ Block* pred_head = pred_loop->head();
+ assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
+ assert(pred_head != head(), "loop head in only one loop");
+ push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk);
+ } else {
+ assert(pred_loop->_parent == this && _parent == NULL, "just checking");
+ }
+ }
+}
+
+//------------------------------add_nested_loop--------------------------------
+// Make cl a child of the current loop in the loop tree.
+void CFGLoop::add_nested_loop(CFGLoop* cl) {
+ assert(_parent == NULL, "no parent yet");
+ assert(cl != this, "not my own parent");
+ cl->_parent = this;
+ CFGLoop* ch = _child;
+ if (ch == NULL) {
+ _child = cl;
+ } else {
+ while (ch->_sibling != NULL) { ch = ch->_sibling; }
+ ch->_sibling = cl;
+ }
+}
+
+//------------------------------compute_loop_depth-----------------------------
+// Store the loop depth in each CFGLoop object.
+// Recursively walk the children to do the same for them.
+void CFGLoop::compute_loop_depth(int depth) {
+ _depth = depth;
+ CFGLoop* ch = _child;
+ while (ch != NULL) {
+ ch->compute_loop_depth(depth + 1);
+ ch = ch->_sibling;
+ }
+}
+
+//------------------------------compute_freq-----------------------------------
+// Compute the frequency of each block and loop, relative to a single entry
+// into the dominating loop head.
+void CFGLoop::compute_freq() {
+ // Bottom up traversal of loop tree (visit inner loops first.)
+ // Set loop head frequency to 1.0, then transitively
+ // compute frequency for all successors in the loop,
+ // as well as for each exit edge. Inner loops are
+ // treated as single blocks with loop exit targets
+ // as the successor blocks.
+
+ // Nested loops first
+ CFGLoop* ch = _child;
+ while (ch != NULL) {
+ ch->compute_freq();
+ ch = ch->_sibling;
+ }
+ assert (_members.length() > 0, "no empty loops");
+ Block* hd = head();
+ hd->_freq = 1.0f;
+ for (int i = 0; i < _members.length(); i++) {
+ CFGElement* s = _members.at(i);
+ float freq = s->_freq;
+ if (s->is_block()) {
+ Block* b = s->as_Block();
+ for (uint j = 0; j < b->_num_succs; j++) {
+ Block* sb = b->_succs[j];
+ update_succ_freq(sb, freq * b->succ_prob(j));
+ }
+ } else {
+ CFGLoop* lp = s->as_CFGLoop();
+ assert(lp->_parent == this, "immediate child");
+ for (int k = 0; k < lp->_exits.length(); k++) {
+ Block* eb = lp->_exits.at(k).get_target();
+ float prob = lp->_exits.at(k).get_prob();
+ update_succ_freq(eb, freq * prob);
+ }
+ }
+ }
+
+#if 0
+ // Raise frequency of the loop backedge block, in an effort
+ // to keep it empty. Skip the method level "loop".
+ if (_parent != NULL) {
+ CFGElement* s = _members.at(_members.length() - 1);
+ if (s->is_block()) {
+ Block* bk = s->as_Block();
+ if (bk->_num_succs == 1 && bk->_succs[0] == hd) {
+ // almost any value >= 1.0f works
+ // FIXME: raw constant
+ bk->_freq = 1.05f;
+ }
+ }
+ }
+#endif
+
+ // For all loops other than the outer, "method" loop,
+ // sum and normalize the exit probability. The "method" loop
+ // should keep the initial exit probability of 1, so that
+ // inner blocks do not get erroneously scaled.
+ if (_depth != 0) {
+ // Total the exit probabilities for this loop.
+ float exits_sum = 0.0f;
+ for (int i = 0; i < _exits.length(); i++) {
+ exits_sum += _exits.at(i).get_prob();
+ }
+
+ // Normalize the exit probabilities. Until now, the
+ // probabilities estimate the possibility of exit per
+ // a single loop iteration; afterward, they estimate
+ // the probability of exit per loop entry.
+ for (int i = 0; i < _exits.length(); i++) {
+ Block* et = _exits.at(i).get_target();
+ float new_prob = _exits.at(i).get_prob() / exits_sum;
+ BlockProbPair bpp(et, new_prob);
+ _exits.at_put(i, bpp);
+ }
+
+ // Save the total, but guard against unreasoable probability,
+ // as the value is used to estimate the loop trip count.
+ // An infinite trip count would blur relative block
+ // frequencies.
+ if (exits_sum > 1.0f) exits_sum = 1.0;
+ if (exits_sum < PROB_MIN) exits_sum = PROB_MIN;
+ _exit_prob = exits_sum;
+ }
+}
+
+//------------------------------succ_prob-------------------------------------
+// Determine the probability of reaching successor 'i' from the receiver block.
+float Block::succ_prob(uint i) {
+ int eidx = end_idx();
+ Node *n = _nodes[eidx]; // Get ending Node
+ int op = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : n->Opcode();
+
+ // Switch on branch type
+ switch( op ) {
+ case Op_CountedLoopEnd:
+ case Op_If: {
+ assert (i < 2, "just checking");
+ // Conditionals pass on only part of their frequency
+ float prob = n->as_MachIf()->_prob;
+ assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
+ // If succ[i] is the FALSE branch, invert path info
+ if( _nodes[i + eidx + 1]->Opcode() == Op_IfFalse ) {
+ return 1.0f - prob; // not taken
+ } else {
+ return prob; // taken
+ }
+ }
+
+ case Op_Jump:
+ // Divide the frequency between all successors evenly
+ return 1.0f/_num_succs;
+
+ case Op_Catch: {
+ const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj();
+ if (ci->_con == CatchProjNode::fall_through_index) {
+ // Fall-thru path gets the lion's share.
+ return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
+ } else {
+ // Presume exceptional paths are equally unlikely
+ return PROB_UNLIKELY_MAG(5);
+ }
+ }
+
+ case Op_Root:
+ case Op_Goto:
+ // Pass frequency straight thru to target
+ return 1.0f;
+
+ case Op_NeverBranch:
+ return 0.0f;
+
+ case Op_TailCall:
+ case Op_TailJump:
+ case Op_Return:
+ case Op_Halt:
+ case Op_Rethrow:
+ // Do not push out freq to root block
+ return 0.0f;
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ return 0.0f;
+}
+
+//------------------------------update_succ_freq-------------------------------
+// Update the appropriate frequency associated with block 'b', a succesor of
+// a block in this loop.
+void CFGLoop::update_succ_freq(Block* b, float freq) {
+ if (b->_loop == this) {
+ if (b == head()) {
+ // back branch within the loop
+ // Do nothing now, the loop carried frequency will be
+ // adjust later in scale_freq().
+ } else {
+ // simple branch within the loop
+ b->_freq += freq;
+ }
+ } else if (!in_loop_nest(b)) {
+ // branch is exit from this loop
+ BlockProbPair bpp(b, freq);
+ _exits.append(bpp);
+ } else {
+ // branch into nested loop
+ CFGLoop* ch = b->_loop;
+ ch->_freq += freq;
+ }
+}
+
+//------------------------------in_loop_nest-----------------------------------
+// Determine if block b is in the receiver's loop nest.
+bool CFGLoop::in_loop_nest(Block* b) {
+ int depth = _depth;
+ CFGLoop* b_loop = b->_loop;
+ int b_depth = b_loop->_depth;
+ if (depth == b_depth) {
+ return true;
+ }
+ while (b_depth > depth) {
+ b_loop = b_loop->_parent;
+ b_depth = b_loop->_depth;
+ }
+ return b_loop == this;
+}
+
+//------------------------------scale_freq-------------------------------------
+// Scale frequency of loops and blocks by trip counts from outer loops
+// Do a top down traversal of loop tree (visit outer loops first.)
+void CFGLoop::scale_freq() {
+ float loop_freq = _freq * trip_count();
+ for (int i = 0; i < _members.length(); i++) {
+ CFGElement* s = _members.at(i);
+ s->_freq *= loop_freq;
+ }
+ CFGLoop* ch = _child;
+ while (ch != NULL) {
+ ch->scale_freq();
+ ch = ch->_sibling;
+ }
+}
+
+#ifndef PRODUCT
+//------------------------------dump_tree--------------------------------------
+void CFGLoop::dump_tree() const {
+ dump();
+ if (_child != NULL) _child->dump_tree();
+ if (_sibling != NULL) _sibling->dump_tree();
+}
+
+//------------------------------dump-------------------------------------------
+void CFGLoop::dump() const {
+ for (int i = 0; i < _depth; i++) tty->print(" ");
+ tty->print("%s: %d trip_count: %6.0f freq: %6.0f\n",
+ _depth == 0 ? "Method" : "Loop", _id, trip_count(), _freq);
+ for (int i = 0; i < _depth; i++) tty->print(" ");
+ tty->print(" members:", _id);
+ int k = 0;
+ for (int i = 0; i < _members.length(); i++) {
+ if (k++ >= 6) {
+ tty->print("\n ");
+ for (int j = 0; j < _depth+1; j++) tty->print(" ");
+ k = 0;
+ }
+ CFGElement *s = _members.at(i);
+ if (s->is_block()) {
+ Block *b = s->as_Block();
+ tty->print(" B%d(%6.3f)", b->_pre_order, b->_freq);
+ } else {
+ CFGLoop* lp = s->as_CFGLoop();
+ tty->print(" L%d(%6.3f)", lp->_id, lp->_freq);
+ }
+ }
+ tty->print("\n");
+ for (int i = 0; i < _depth; i++) tty->print(" ");
+ tty->print(" exits: ");
+ k = 0;
+ for (int i = 0; i < _exits.length(); i++) {
+ if (k++ >= 7) {
+ tty->print("\n ");
+ for (int j = 0; j < _depth+1; j++) tty->print(" ");
+ k = 0;
+ }
+ Block *blk = _exits.at(i).get_target();
+ float prob = _exits.at(i).get_prob();
+ tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100));
+ }
+ tty->print("\n");
+}
+#endif
diff --git a/src/share/vm/opto/generateOptoStub.cpp b/src/share/vm/opto/generateOptoStub.cpp
new file mode 100644
index 000000000..490c4e855
--- /dev/null
+++ b/src/share/vm/opto/generateOptoStub.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_generateOptoStub.cpp.incl"
+
+//--------------------gen_stub-------------------------------
+void GraphKit::gen_stub(address C_function,
+ const char *name,
+ int is_fancy_jump,
+ bool pass_tls,
+ bool return_pc) {
+ ResourceMark rm;
+
+ const TypeTuple *jdomain = C->tf()->domain();
+ const TypeTuple *jrange = C->tf()->range();
+
+ // The procedure start
+ StartNode* start = new (C, 2) StartNode(root(), jdomain);
+ _gvn.set_type_bottom(start);
+
+ // Make a map, with JVM state
+ uint parm_cnt = jdomain->cnt();
+ uint max_map = MAX2(2*parm_cnt+1, jrange->cnt());
+ // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
+ assert(SynchronizationEntryBCI == InvocationEntryBci, "");
+ JVMState* jvms = new (C) JVMState(0);
+ jvms->set_bci(InvocationEntryBci);
+ jvms->set_monoff(max_map);
+ jvms->set_endoff(max_map);
+ {
+ SafePointNode *map = new (C, max_map) SafePointNode( max_map, jvms );
+ jvms->set_map(map);
+ set_jvms(jvms);
+ assert(map == this->map(), "kit.map is set");
+ }
+
+ // Make up the parameters
+ uint i;
+ for( i = 0; i < parm_cnt; i++ )
+ map()->init_req(i, _gvn.transform(new (C, 1) ParmNode(start, i)));
+ for( ; i<map()->req(); i++ )
+ map()->init_req(i, top()); // For nicer debugging
+
+ // GraphKit requires memory to be a MergeMemNode:
+ set_all_memory(map()->memory());
+
+ // Get base of thread-local storage area
+ Node* thread = _gvn.transform( new (C, 1) ThreadLocalNode() );
+
+ const int NoAlias = Compile::AliasIdxBot;
+
+ Node* adr_last_Java_pc = basic_plus_adr(top(),
+ thread,
+ in_bytes(JavaThread::frame_anchor_offset()) +
+ in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
+#if defined(SPARC) || defined(IA64)
+ Node* adr_flags = basic_plus_adr(top(),
+ thread,
+ in_bytes(JavaThread::frame_anchor_offset()) +
+ in_bytes(JavaFrameAnchor::flags_offset()));
+#endif /* defined(SPARC) || defined(IA64) */
+
+
+ // Drop in the last_Java_sp. last_Java_fp is not touched.
+ // Always do this after the other "last_Java_frame" fields are set since
+ // as soon as last_Java_sp != NULL the has_last_Java_frame is true and
+ // users will look at the other fields.
+ //
+ Node *adr_sp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_sp_offset()));
+#ifndef IA64
+ Node *last_sp = basic_plus_adr(top(), frameptr(), (intptr_t) STACK_BIAS);
+ store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias);
+#endif
+
+ // Set _thread_in_native
+ // The order of stores into TLS is critical! Setting _thread_in_native MUST
+ // be last, because a GC is allowed at any time after setting it and the GC
+ // will require last_Java_pc and last_Java_sp.
+ Node* adr_state = basic_plus_adr(top(), thread, in_bytes(JavaThread::thread_state_offset()));
+
+ //-----------------------------
+ // Compute signature for C call. Varies from the Java signature!
+ const Type **fields = TypeTuple::fields(2*parm_cnt+2);
+ uint cnt = TypeFunc::Parms;
+ // The C routines gets the base of thread-local storage passed in as an
+ // extra argument. Not all calls need it, but its cheap to add here.
+ for( ; cnt<parm_cnt; cnt++ )
+ fields[cnt] = jdomain->field_at(cnt);
+ fields[cnt++] = TypeRawPtr::BOTTOM; // Thread-local storage
+ // Also pass in the caller's PC, if asked for.
+ if( return_pc )
+ fields[cnt++] = TypeRawPtr::BOTTOM; // Return PC
+
+ const TypeTuple* domain = TypeTuple::make(cnt,fields);
+ // The C routine we are about to call cannot return an oop; it can block on
+ // exit and a GC will trash the oop while it sits in C-land. Instead, we
+ // return the oop through TLS for runtime calls.
+ // Also, C routines returning integer subword values leave the high
+ // order bits dirty; these must be cleaned up by explicit sign extension.
+ const Type* retval = (jrange->cnt() == TypeFunc::Parms) ? Type::TOP : jrange->field_at(TypeFunc::Parms);
+ // Make a private copy of jrange->fields();
+ const Type **rfields = TypeTuple::fields(jrange->cnt() - TypeFunc::Parms);
+ // Fixup oop returns
+ int retval_ptr = retval->isa_oop_ptr();
+ if( retval_ptr ) {
+ assert( pass_tls, "Oop must be returned thru TLS" );
+ // Fancy-jumps return address; others return void
+ rfields[TypeFunc::Parms] = is_fancy_jump ? TypeRawPtr::BOTTOM : Type::TOP;
+
+ } else if( retval->isa_int() ) { // Returning any integer subtype?
+ // "Fatten" byte, char & short return types to 'int' to show that
+ // the native C code can return values with junk high order bits.
+ // We'll sign-extend it below later.
+ rfields[TypeFunc::Parms] = TypeInt::INT; // It's "dirty" and needs sign-ext
+
+ } else if( jrange->cnt() >= TypeFunc::Parms+1 ) { // Else copy other types
+ rfields[TypeFunc::Parms] = jrange->field_at(TypeFunc::Parms);
+ if( jrange->cnt() == TypeFunc::Parms+2 )
+ rfields[TypeFunc::Parms+1] = jrange->field_at(TypeFunc::Parms+1);
+ }
+ const TypeTuple* range = TypeTuple::make(jrange->cnt(),rfields);
+
+ // Final C signature
+ const TypeFunc *c_sig = TypeFunc::make(domain,range);
+
+ //-----------------------------
+ // Make the call node
+ CallRuntimeNode *call = new (C, c_sig->domain()->cnt())
+ CallRuntimeNode(c_sig, C_function, name, TypePtr::BOTTOM);
+ //-----------------------------
+
+ // Fix-up the debug info for the call
+ call->set_jvms( new (C) JVMState(0) );
+ call->jvms()->set_bci(0);
+ call->jvms()->set_offsets(cnt);
+
+ // Set fixed predefined input arguments
+ cnt = 0;
+ for( i=0; i<TypeFunc::Parms; i++ )
+ call->init_req( cnt++, map()->in(i) );
+ // A little too aggressive on the parm copy; return address is not an input
+ call->set_req(TypeFunc::ReturnAdr, top());
+ for( ; i<parm_cnt; i++ ) // Regular input arguments
+ call->init_req( cnt++, map()->in(i) );
+
+ call->init_req( cnt++, thread );
+ if( return_pc ) // Return PC, if asked for
+ call->init_req( cnt++, returnadr() );
+ _gvn.transform_no_reclaim(call);
+
+
+ //-----------------------------
+ // Now set up the return results
+ set_control( _gvn.transform( new (C, 1) ProjNode(call,TypeFunc::Control)) );
+ set_i_o( _gvn.transform( new (C, 1) ProjNode(call,TypeFunc::I_O )) );
+ set_all_memory_call(call);
+ if (range->cnt() > TypeFunc::Parms) {
+ Node* retnode = _gvn.transform( new (C, 1) ProjNode(call,TypeFunc::Parms) );
+ // C-land is allowed to return sub-word values. Convert to integer type.
+ assert( retval != Type::TOP, "" );
+ if (retval == TypeInt::BOOL) {
+ retnode = _gvn.transform( new (C, 3) AndINode(retnode, intcon(0xFF)) );
+ } else if (retval == TypeInt::CHAR) {
+ retnode = _gvn.transform( new (C, 3) AndINode(retnode, intcon(0xFFFF)) );
+ } else if (retval == TypeInt::BYTE) {
+ retnode = _gvn.transform( new (C, 3) LShiftINode(retnode, intcon(24)) );
+ retnode = _gvn.transform( new (C, 3) RShiftINode(retnode, intcon(24)) );
+ } else if (retval == TypeInt::SHORT) {
+ retnode = _gvn.transform( new (C, 3) LShiftINode(retnode, intcon(16)) );
+ retnode = _gvn.transform( new (C, 3) RShiftINode(retnode, intcon(16)) );
+ }
+ map()->set_req( TypeFunc::Parms, retnode );
+ }
+
+ //-----------------------------
+
+ // Clear last_Java_sp
+#ifdef IA64
+ if( os::is_MP() ) insert_mem_bar(Op_MemBarRelease);
+#endif
+
+ store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias);
+#ifdef IA64
+ if (os::is_MP() && UseMembar) insert_mem_bar(new MemBarVolatileNode());
+#endif // def IA64
+ // Clear last_Java_pc and (optionally)_flags
+ store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias);
+#if defined(SPARC) || defined(IA64)
+ store_to_memory(NULL, adr_flags, intcon(0), T_INT, NoAlias);
+#endif /* defined(SPARC) || defined(IA64) */
+#ifdef IA64
+ Node* adr_last_Java_fp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_fp_offset()));
+ if( os::is_MP() ) insert_mem_bar(Op_MemBarRelease);
+ store_to_memory(NULL, adr_last_Java_fp, null(), T_ADDRESS, NoAlias);
+#endif
+
+ // For is-fancy-jump, the C-return value is also the branch target
+ Node* target = map()->in(TypeFunc::Parms);
+ // Runtime call returning oop in TLS? Fetch it out
+ if( pass_tls ) {
+ Node* adr = basic_plus_adr(top(), thread, in_bytes(JavaThread::vm_result_offset()));
+ Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false);
+ map()->set_req(TypeFunc::Parms, vm_result); // vm_result passed as result
+ // clear thread-local-storage(tls)
+ store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias);
+ }
+
+ //-----------------------------
+ // check exception
+ Node* adr = basic_plus_adr(top(), thread, in_bytes(Thread::pending_exception_offset()));
+ Node* pending = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false);
+
+ Node* exit_memory = reset_memory();
+
+ Node* cmp = _gvn.transform( new (C, 3) CmpPNode(pending, null()) );
+ Node* bo = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
+ IfNode *iff = create_and_map_if(control(), bo, PROB_MIN, COUNT_UNKNOWN);
+
+ Node* if_null = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+ Node* if_not_null = _gvn.transform( new (C, 1) IfTrueNode(iff) );
+
+ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+ Node *exc_target = makecon(TypeRawPtr::make( StubRoutines::forward_exception_entry() ));
+ Node *to_exc = new (C, TypeFunc::Parms+2) TailCallNode(if_not_null,
+ i_o(),
+ exit_memory,
+ frameptr(),
+ returnadr(),
+ exc_target, null());
+ root()->add_req(_gvn.transform(to_exc)); // bind to root to keep live
+ C->init_start(start);
+
+ //-----------------------------
+ // If this is a normal subroutine return, issue the return and be done.
+ Node *ret;
+ switch( is_fancy_jump ) {
+ case 0: // Make a return instruction
+ // Return to caller, free any space for return address
+ ret = new (C, TypeFunc::Parms) ReturnNode(TypeFunc::Parms, if_null,
+ i_o(),
+ exit_memory,
+ frameptr(),
+ returnadr());
+ if (C->tf()->range()->cnt() > TypeFunc::Parms)
+ ret->add_req( map()->in(TypeFunc::Parms) );
+ break;
+ case 1: // This is a fancy tail-call jump. Jump to computed address.
+ // Jump to new callee; leave old return address alone.
+ ret = new (C, TypeFunc::Parms+2) TailCallNode(if_null,
+ i_o(),
+ exit_memory,
+ frameptr(),
+ returnadr(),
+ target, map()->in(TypeFunc::Parms));
+ break;
+ case 2: // Pop return address & jump
+ // Throw away old return address; jump to new computed address
+ //assert(C_function == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C), "fancy_jump==2 only for rethrow");
+ ret = new (C, TypeFunc::Parms+2) TailJumpNode(if_null,
+ i_o(),
+ exit_memory,
+ frameptr(),
+ target, map()->in(TypeFunc::Parms));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ root()->add_req(_gvn.transform(ret));
+}
diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp
new file mode 100644
index 000000000..8df5f4272
--- /dev/null
+++ b/src/share/vm/opto/graphKit.cpp
@@ -0,0 +1,3146 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_graphKit.cpp.incl"
+
+//----------------------------GraphKit-----------------------------------------
+// Main utility constructor.
+GraphKit::GraphKit(JVMState* jvms)
+ : Phase(Phase::Parser),
+ _env(C->env()),
+ _gvn(*C->initial_gvn())
+{
+ _exceptions = jvms->map()->next_exception();
+ if (_exceptions != NULL) jvms->map()->set_next_exception(NULL);
+ set_jvms(jvms);
+}
+
+// Private constructor for parser.
+GraphKit::GraphKit()
+ : Phase(Phase::Parser),
+ _env(C->env()),
+ _gvn(*C->initial_gvn())
+{
+ _exceptions = NULL;
+ set_map(NULL);
+ debug_only(_sp = -99);
+ debug_only(set_bci(-99));
+}
+
+
+
+//---------------------------clean_stack---------------------------------------
+// Clear away rubbish from the stack area of the JVM state.
+// This destroys any arguments that may be waiting on the stack.
+void GraphKit::clean_stack(int from_sp) {
+ SafePointNode* map = this->map();
+ JVMState* jvms = this->jvms();
+ int stk_size = jvms->stk_size();
+ int stkoff = jvms->stkoff();
+ Node* top = this->top();
+ for (int i = from_sp; i < stk_size; i++) {
+ if (map->in(stkoff + i) != top) {
+ map->set_req(stkoff + i, top);
+ }
+ }
+}
+
+
+//--------------------------------sync_jvms-----------------------------------
+// Make sure our current jvms agrees with our parse state.
+JVMState* GraphKit::sync_jvms() const {
+ JVMState* jvms = this->jvms();
+ jvms->set_bci(bci()); // Record the new bci in the JVMState
+ jvms->set_sp(sp()); // Record the new sp in the JVMState
+ assert(jvms_in_sync(), "jvms is now in sync");
+ return jvms;
+}
+
+#ifdef ASSERT
+bool GraphKit::jvms_in_sync() const {
+ Parse* parse = is_Parse();
+ if (parse == NULL) {
+ if (bci() != jvms()->bci()) return false;
+ if (sp() != (int)jvms()->sp()) return false;
+ return true;
+ }
+ if (jvms()->method() != parse->method()) return false;
+ if (jvms()->bci() != parse->bci()) return false;
+ int jvms_sp = jvms()->sp();
+ if (jvms_sp != parse->sp()) return false;
+ int jvms_depth = jvms()->depth();
+ if (jvms_depth != parse->depth()) return false;
+ return true;
+}
+
+// Local helper checks for special internal merge points
+// used to accumulate and merge exception states.
+// They are marked by the region's in(0) edge being the map itself.
+// Such merge points must never "escape" into the parser at large,
+// until they have been handed to gvn.transform.
+static bool is_hidden_merge(Node* reg) {
+ if (reg == NULL) return false;
+ if (reg->is_Phi()) {
+ reg = reg->in(0);
+ if (reg == NULL) return false;
+ }
+ return reg->is_Region() && reg->in(0) != NULL && reg->in(0)->is_Root();
+}
+
+void GraphKit::verify_map() const {
+ if (map() == NULL) return; // null map is OK
+ assert(map()->req() <= jvms()->endoff(), "no extra garbage on map");
+ assert(!map()->has_exceptions(), "call add_exception_states_from 1st");
+ assert(!is_hidden_merge(control()), "call use_exception_state, not set_map");
+}
+
+void GraphKit::verify_exception_state(SafePointNode* ex_map) {
+ assert(ex_map->next_exception() == NULL, "not already part of a chain");
+ assert(has_saved_ex_oop(ex_map), "every exception state has an ex_oop");
+}
+#endif
+
+//---------------------------stop_and_kill_map---------------------------------
+// Set _map to NULL, signalling a stop to further bytecode execution.
+// First smash the current map's control to a constant, to mark it dead.
+void GraphKit::stop_and_kill_map() {
+ SafePointNode* dead_map = stop();
+ if (dead_map != NULL) {
+ dead_map->disconnect_inputs(NULL); // Mark the map as killed.
+ assert(dead_map->is_killed(), "must be so marked");
+ }
+}
+
+
+//--------------------------------stopped--------------------------------------
+// Tell if _map is NULL, or control is top.
+bool GraphKit::stopped() {
+ if (map() == NULL) return true;
+ else if (control() == top()) return true;
+ else return false;
+}
+
+
+//-----------------------------has_ex_handler----------------------------------
+// Tell if this method or any caller method has exception handlers.
+bool GraphKit::has_ex_handler() {
+ for (JVMState* jvmsp = jvms(); jvmsp != NULL; jvmsp = jvmsp->caller()) {
+ if (jvmsp->has_method() && jvmsp->method()->has_exception_handlers()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+//------------------------------save_ex_oop------------------------------------
+// Save an exception without blowing stack contents or other JVM state.
+void GraphKit::set_saved_ex_oop(SafePointNode* ex_map, Node* ex_oop) {
+ assert(!has_saved_ex_oop(ex_map), "clear ex-oop before setting again");
+ ex_map->add_req(ex_oop);
+ debug_only(verify_exception_state(ex_map));
+}
+
+inline static Node* common_saved_ex_oop(SafePointNode* ex_map, bool clear_it) {
+ assert(GraphKit::has_saved_ex_oop(ex_map), "ex_oop must be there");
+ Node* ex_oop = ex_map->in(ex_map->req()-1);
+ if (clear_it) ex_map->del_req(ex_map->req()-1);
+ return ex_oop;
+}
+
+//-----------------------------saved_ex_oop------------------------------------
+// Recover a saved exception from its map.
+Node* GraphKit::saved_ex_oop(SafePointNode* ex_map) {
+ return common_saved_ex_oop(ex_map, false);
+}
+
+//--------------------------clear_saved_ex_oop---------------------------------
+// Erase a previously saved exception from its map.
+Node* GraphKit::clear_saved_ex_oop(SafePointNode* ex_map) {
+ return common_saved_ex_oop(ex_map, true);
+}
+
+#ifdef ASSERT
+//---------------------------has_saved_ex_oop----------------------------------
+// Erase a previously saved exception from its map.
+bool GraphKit::has_saved_ex_oop(SafePointNode* ex_map) {
+ return ex_map->req() == ex_map->jvms()->endoff()+1;
+}
+#endif
+
+//-------------------------make_exception_state--------------------------------
+// Turn the current JVM state into an exception state, appending the ex_oop.
+SafePointNode* GraphKit::make_exception_state(Node* ex_oop) {
+ sync_jvms();
+ SafePointNode* ex_map = stop(); // do not manipulate this map any more
+ set_saved_ex_oop(ex_map, ex_oop);
+ return ex_map;
+}
+
+
+//--------------------------add_exception_state--------------------------------
+// Add an exception to my list of exceptions.
+void GraphKit::add_exception_state(SafePointNode* ex_map) {
+ if (ex_map == NULL || ex_map->control() == top()) {
+ return;
+ }
+#ifdef ASSERT
+ verify_exception_state(ex_map);
+ if (has_exceptions()) {
+ assert(ex_map->jvms()->same_calls_as(_exceptions->jvms()), "all collected exceptions must come from the same place");
+ }
+#endif
+
+ // If there is already an exception of exactly this type, merge with it.
+ // In particular, null-checks and other low-level exceptions common up here.
+ Node* ex_oop = saved_ex_oop(ex_map);
+ const Type* ex_type = _gvn.type(ex_oop);
+ if (ex_oop == top()) {
+ // No action needed.
+ return;
+ }
+ assert(ex_type->isa_instptr(), "exception must be an instance");
+ for (SafePointNode* e2 = _exceptions; e2 != NULL; e2 = e2->next_exception()) {
+ const Type* ex_type2 = _gvn.type(saved_ex_oop(e2));
+ // We check sp also because call bytecodes can generate exceptions
+ // both before and after arguments are popped!
+ if (ex_type2 == ex_type
+ && e2->_jvms->sp() == ex_map->_jvms->sp()) {
+ combine_exception_states(ex_map, e2);
+ return;
+ }
+ }
+
+ // No pre-existing exception of the same type. Chain it on the list.
+ push_exception_state(ex_map);
+}
+
+//-----------------------add_exception_states_from-----------------------------
+void GraphKit::add_exception_states_from(JVMState* jvms) {
+ SafePointNode* ex_map = jvms->map()->next_exception();
+ if (ex_map != NULL) {
+ jvms->map()->set_next_exception(NULL);
+ for (SafePointNode* next_map; ex_map != NULL; ex_map = next_map) {
+ next_map = ex_map->next_exception();
+ ex_map->set_next_exception(NULL);
+ add_exception_state(ex_map);
+ }
+ }
+}
+
+//-----------------------transfer_exceptions_into_jvms-------------------------
+JVMState* GraphKit::transfer_exceptions_into_jvms() {
+ if (map() == NULL) {
+ // We need a JVMS to carry the exceptions, but the map has gone away.
+ // Create a scratch JVMS, cloned from any of the exception states...
+ if (has_exceptions()) {
+ _map = _exceptions;
+ _map = clone_map();
+ _map->set_next_exception(NULL);
+ clear_saved_ex_oop(_map);
+ debug_only(verify_map());
+ } else {
+ // ...or created from scratch
+ JVMState* jvms = new (C) JVMState(_method, NULL);
+ jvms->set_bci(_bci);
+ jvms->set_sp(_sp);
+ jvms->set_map(new (C, TypeFunc::Parms) SafePointNode(TypeFunc::Parms, jvms));
+ set_jvms(jvms);
+ for (uint i = 0; i < map()->req(); i++) map()->init_req(i, top());
+ set_all_memory(top());
+ while (map()->req() < jvms->endoff()) map()->add_req(top());
+ }
+ // (This is a kludge, in case you didn't notice.)
+ set_control(top());
+ }
+ JVMState* jvms = sync_jvms();
+ assert(!jvms->map()->has_exceptions(), "no exceptions on this map yet");
+ jvms->map()->set_next_exception(_exceptions);
+ _exceptions = NULL; // done with this set of exceptions
+ return jvms;
+}
+
+static inline void add_n_reqs(Node* dstphi, Node* srcphi) {
+ assert(is_hidden_merge(dstphi), "must be a special merge node");
+ assert(is_hidden_merge(srcphi), "must be a special merge node");
+ uint limit = srcphi->req();
+ for (uint i = PhiNode::Input; i < limit; i++) {
+ dstphi->add_req(srcphi->in(i));
+ }
+}
+static inline void add_one_req(Node* dstphi, Node* src) {
+ assert(is_hidden_merge(dstphi), "must be a special merge node");
+ assert(!is_hidden_merge(src), "must not be a special merge node");
+ dstphi->add_req(src);
+}
+
+//-----------------------combine_exception_states------------------------------
+// This helper function combines exception states by building phis on a
+// specially marked state-merging region. These regions and phis are
+// untransformed, and can build up gradually. The region is marked by
+// having a control input of its exception map, rather than NULL. Such
+// regions do not appear except in this function, and in use_exception_state.
+void GraphKit::combine_exception_states(SafePointNode* ex_map, SafePointNode* phi_map) {
+ if (failing()) return; // dying anyway...
+ JVMState* ex_jvms = ex_map->_jvms;
+ assert(ex_jvms->same_calls_as(phi_map->_jvms), "consistent call chains");
+ assert(ex_jvms->stkoff() == phi_map->_jvms->stkoff(), "matching locals");
+ assert(ex_jvms->sp() == phi_map->_jvms->sp(), "matching stack sizes");
+ assert(ex_jvms->monoff() == phi_map->_jvms->monoff(), "matching JVMS");
+ assert(ex_map->req() == phi_map->req(), "matching maps");
+ uint tos = ex_jvms->stkoff() + ex_jvms->sp();
+ Node* hidden_merge_mark = root();
+ Node* region = phi_map->control();
+ MergeMemNode* phi_mem = phi_map->merged_memory();
+ MergeMemNode* ex_mem = ex_map->merged_memory();
+ if (region->in(0) != hidden_merge_mark) {
+ // The control input is not (yet) a specially-marked region in phi_map.
+ // Make it so, and build some phis.
+ region = new (C, 2) RegionNode(2);
+ _gvn.set_type(region, Type::CONTROL);
+ region->set_req(0, hidden_merge_mark); // marks an internal ex-state
+ region->init_req(1, phi_map->control());
+ phi_map->set_control(region);
+ Node* io_phi = PhiNode::make(region, phi_map->i_o(), Type::ABIO);
+ record_for_igvn(io_phi);
+ _gvn.set_type(io_phi, Type::ABIO);
+ phi_map->set_i_o(io_phi);
+ for (MergeMemStream mms(phi_mem); mms.next_non_empty(); ) {
+ Node* m = mms.memory();
+ Node* m_phi = PhiNode::make(region, m, Type::MEMORY, mms.adr_type(C));
+ record_for_igvn(m_phi);
+ _gvn.set_type(m_phi, Type::MEMORY);
+ mms.set_memory(m_phi);
+ }
+ }
+
+ // Either or both of phi_map and ex_map might already be converted into phis.
+ Node* ex_control = ex_map->control();
+ // if there is special marking on ex_map also, we add multiple edges from src
+ bool add_multiple = (ex_control->in(0) == hidden_merge_mark);
+ // how wide was the destination phi_map, originally?
+ uint orig_width = region->req();
+
+ if (add_multiple) {
+ add_n_reqs(region, ex_control);
+ add_n_reqs(phi_map->i_o(), ex_map->i_o());
+ } else {
+ // ex_map has no merges, so we just add single edges everywhere
+ add_one_req(region, ex_control);
+ add_one_req(phi_map->i_o(), ex_map->i_o());
+ }
+ for (MergeMemStream mms(phi_mem, ex_mem); mms.next_non_empty2(); ) {
+ if (mms.is_empty()) {
+ // get a copy of the base memory, and patch some inputs into it
+ const TypePtr* adr_type = mms.adr_type(C);
+ Node* phi = mms.force_memory()->as_Phi()->slice_memory(adr_type);
+ assert(phi->as_Phi()->region() == mms.base_memory()->in(0), "");
+ mms.set_memory(phi);
+ // Prepare to append interesting stuff onto the newly sliced phi:
+ while (phi->req() > orig_width) phi->del_req(phi->req()-1);
+ }
+ // Append stuff from ex_map:
+ if (add_multiple) {
+ add_n_reqs(mms.memory(), mms.memory2());
+ } else {
+ add_one_req(mms.memory(), mms.memory2());
+ }
+ }
+ uint limit = ex_map->req();
+ for (uint i = TypeFunc::Parms; i < limit; i++) {
+ // Skip everything in the JVMS after tos. (The ex_oop follows.)
+ if (i == tos) i = ex_jvms->monoff();
+ Node* src = ex_map->in(i);
+ Node* dst = phi_map->in(i);
+ if (src != dst) {
+ PhiNode* phi;
+ if (dst->in(0) != region) {
+ dst = phi = PhiNode::make(region, dst, _gvn.type(dst));
+ record_for_igvn(phi);
+ _gvn.set_type(phi, phi->type());
+ phi_map->set_req(i, dst);
+ // Prepare to append interesting stuff onto the new phi:
+ while (dst->req() > orig_width) dst->del_req(dst->req()-1);
+ } else {
+ assert(dst->is_Phi(), "nobody else uses a hidden region");
+ phi = (PhiNode*)dst;
+ }
+ if (add_multiple && src->in(0) == ex_control) {
+ // Both are phis.
+ add_n_reqs(dst, src);
+ } else {
+ while (dst->req() < region->req()) add_one_req(dst, src);
+ }
+ const Type* srctype = _gvn.type(src);
+ if (phi->type() != srctype) {
+ const Type* dsttype = phi->type()->meet(srctype);
+ if (phi->type() != dsttype) {
+ phi->set_type(dsttype);
+ _gvn.set_type(phi, dsttype);
+ }
+ }
+ }
+ }
+}
+
+//--------------------------use_exception_state--------------------------------
+Node* GraphKit::use_exception_state(SafePointNode* phi_map) {
+ if (failing()) { stop(); return top(); }
+ Node* region = phi_map->control();
+ Node* hidden_merge_mark = root();
+ assert(phi_map->jvms()->map() == phi_map, "sanity: 1-1 relation");
+ Node* ex_oop = clear_saved_ex_oop(phi_map);
+ if (region->in(0) == hidden_merge_mark) {
+ // Special marking for internal ex-states. Process the phis now.
+ region->set_req(0, region); // now it's an ordinary region
+ set_jvms(phi_map->jvms()); // ...so now we can use it as a map
+ // Note: Setting the jvms also sets the bci and sp.
+ set_control(_gvn.transform(region));
+ uint tos = jvms()->stkoff() + sp();
+ for (uint i = 1; i < tos; i++) {
+ Node* x = phi_map->in(i);
+ if (x->in(0) == region) {
+ assert(x->is_Phi(), "expected a special phi");
+ phi_map->set_req(i, _gvn.transform(x));
+ }
+ }
+ for (MergeMemStream mms(merged_memory()); mms.next_non_empty(); ) {
+ Node* x = mms.memory();
+ if (x->in(0) == region) {
+ assert(x->is_Phi(), "nobody else uses a hidden region");
+ mms.set_memory(_gvn.transform(x));
+ }
+ }
+ if (ex_oop->in(0) == region) {
+ assert(ex_oop->is_Phi(), "expected a special phi");
+ ex_oop = _gvn.transform(ex_oop);
+ }
+ } else {
+ set_jvms(phi_map->jvms());
+ }
+
+ assert(!is_hidden_merge(phi_map->control()), "hidden ex. states cleared");
+ assert(!is_hidden_merge(phi_map->i_o()), "hidden ex. states cleared");
+ return ex_oop;
+}
+
+//---------------------------------java_bc-------------------------------------
+Bytecodes::Code GraphKit::java_bc() const {
+ ciMethod* method = this->method();
+ int bci = this->bci();
+ if (method != NULL && bci != InvocationEntryBci)
+ return method->java_code_at_bci(bci);
+ else
+ return Bytecodes::_illegal;
+}
+
+//------------------------------builtin_throw----------------------------------
+void GraphKit::builtin_throw(Deoptimization::DeoptReason reason, Node* arg) {
+ bool must_throw = true;
+
+ if (JvmtiExport::can_post_exceptions()) {
+ // Do not try anything fancy if we're notifying the VM on every throw.
+ // Cf. case Bytecodes::_athrow in parse2.cpp.
+ uncommon_trap(reason, Deoptimization::Action_none,
+ (ciKlass*)NULL, (char*)NULL, must_throw);
+ return;
+ }
+
+ // If this particular condition has not yet happened at this
+ // bytecode, then use the uncommon trap mechanism, and allow for
+ // a future recompilation if several traps occur here.
+ // If the throw is hot, try to use a more complicated inline mechanism
+ // which keeps execution inside the compiled code.
+ bool treat_throw_as_hot = false;
+ ciMethodData* md = method()->method_data();
+
+ if (ProfileTraps) {
+ if (too_many_traps(reason)) {
+ treat_throw_as_hot = true;
+ }
+ // (If there is no MDO at all, assume it is early in
+ // execution, and that any deopts are part of the
+ // startup transient, and don't need to be remembered.)
+
+ // Also, if there is a local exception handler, treat all throws
+ // as hot if there has been at least one in this method.
+ if (C->trap_count(reason) != 0
+ && method()->method_data()->trap_count(reason) != 0
+ && has_ex_handler()) {
+ treat_throw_as_hot = true;
+ }
+ }
+
+ // If this throw happens frequently, an uncommon trap might cause
+ // a performance pothole. If there is a local exception handler,
+ // and if this particular bytecode appears to be deoptimizing often,
+ // let us handle the throw inline, with a preconstructed instance.
+ // Note: If the deopt count has blown up, the uncommon trap
+ // runtime is going to flush this nmethod, not matter what.
+ if (treat_throw_as_hot
+ && (!StackTraceInThrowable || OmitStackTraceInFastThrow)) {
+ // If the throw is local, we use a pre-existing instance and
+ // punt on the backtrace. This would lead to a missing backtrace
+ // (a repeat of 4292742) if the backtrace object is ever asked
+ // for its backtrace.
+ // Fixing this remaining case of 4292742 requires some flavor of
+ // escape analysis. Leave that for the future.
+ ciInstance* ex_obj = NULL;
+ switch (reason) {
+ case Deoptimization::Reason_null_check:
+ ex_obj = env()->NullPointerException_instance();
+ break;
+ case Deoptimization::Reason_div0_check:
+ ex_obj = env()->ArithmeticException_instance();
+ break;
+ case Deoptimization::Reason_range_check:
+ ex_obj = env()->ArrayIndexOutOfBoundsException_instance();
+ break;
+ case Deoptimization::Reason_class_check:
+ if (java_bc() == Bytecodes::_aastore) {
+ ex_obj = env()->ArrayStoreException_instance();
+ } else {
+ ex_obj = env()->ClassCastException_instance();
+ }
+ break;
+ }
+ if (failing()) { stop(); return; } // exception allocation might fail
+ if (ex_obj != NULL) {
+ // Cheat with a preallocated exception object.
+ if (C->log() != NULL)
+ C->log()->elem("hot_throw preallocated='1' reason='%s'",
+ Deoptimization::trap_reason_name(reason));
+ const TypeInstPtr* ex_con = TypeInstPtr::make(ex_obj);
+ Node* ex_node = _gvn.transform(new (C, 1) ConPNode(ex_con));
+
+ // Clear the detail message of the preallocated exception object.
+ // Weblogic sometimes mutates the detail message of exceptions
+ // using reflection.
+ int offset = java_lang_Throwable::get_detailMessage_offset();
+ const TypePtr* adr_typ = ex_con->add_offset(offset);
+
+ Node *adr = basic_plus_adr(ex_node, ex_node, offset);
+ Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), ex_con, T_OBJECT);
+
+ add_exception_state(make_exception_state(ex_node));
+ return;
+ }
+ }
+
+ // %%% Maybe add entry to OptoRuntime which directly throws the exc.?
+ // It won't be much cheaper than bailing to the interp., since we'll
+ // have to pass up all the debug-info, and the runtime will have to
+ // create the stack trace.
+
+ // Usual case: Bail to interpreter.
+ // Reserve the right to recompile if we haven't seen anything yet.
+
+ Deoptimization::DeoptAction action = Deoptimization::Action_maybe_recompile;
+ if (treat_throw_as_hot
+ && (method()->method_data()->trap_recompiled_at(bci())
+ || C->too_many_traps(reason))) {
+ // We cannot afford to take more traps here. Suffer in the interpreter.
+ if (C->log() != NULL)
+ C->log()->elem("hot_throw preallocated='0' reason='%s' mcount='%d'",
+ Deoptimization::trap_reason_name(reason),
+ C->trap_count(reason));
+ action = Deoptimization::Action_none;
+ }
+
+ // "must_throw" prunes the JVM state to include only the stack, if there
+ // are no local exception handlers. This should cut down on register
+ // allocation time and code size, by drastically reducing the number
+ // of in-edges on the call to the uncommon trap.
+
+ uncommon_trap(reason, action, (ciKlass*)NULL, (char*)NULL, must_throw);
+}
+
+
+//----------------------------PreserveJVMState---------------------------------
+PreserveJVMState::PreserveJVMState(GraphKit* kit, bool clone_map) {
+ debug_only(kit->verify_map());
+ _kit = kit;
+ _map = kit->map(); // preserve the map
+ _sp = kit->sp();
+ kit->set_map(clone_map ? kit->clone_map() : NULL);
+#ifdef ASSERT
+ _bci = kit->bci();
+ Parse* parser = kit->is_Parse();
+ int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+ _block = block;
+#endif
+}
+PreserveJVMState::~PreserveJVMState() {
+ GraphKit* kit = _kit;
+#ifdef ASSERT
+ assert(kit->bci() == _bci, "bci must not shift");
+ Parse* parser = kit->is_Parse();
+ int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+ assert(block == _block, "block must not shift");
+#endif
+ kit->set_map(_map);
+ kit->set_sp(_sp);
+}
+
+
+//-----------------------------BuildCutout-------------------------------------
+BuildCutout::BuildCutout(GraphKit* kit, Node* p, float prob, float cnt)
+ : PreserveJVMState(kit)
+{
+ assert(p->is_Con() || p->is_Bool(), "test must be a bool");
+ SafePointNode* outer_map = _map; // preserved map is caller's
+ SafePointNode* inner_map = kit->map();
+ IfNode* iff = kit->create_and_map_if(outer_map->control(), p, prob, cnt);
+ outer_map->set_control(kit->gvn().transform( new (kit->C, 1) IfTrueNode(iff) ));
+ inner_map->set_control(kit->gvn().transform( new (kit->C, 1) IfFalseNode(iff) ));
+}
+BuildCutout::~BuildCutout() {
+ GraphKit* kit = _kit;
+ assert(kit->stopped(), "cutout code must stop, throw, return, etc.");
+}
+
+
+//------------------------------clone_map--------------------------------------
+// Implementation of PreserveJVMState
+//
+// Only clone_map(...) here. If this function is only used in the
+// PreserveJVMState class we may want to get rid of this extra
+// function eventually and do it all there.
+
+SafePointNode* GraphKit::clone_map() {
+ if (map() == NULL) return NULL;
+
+ // Clone the memory edge first
+ Node* mem = MergeMemNode::make(C, map()->memory());
+ gvn().set_type_bottom(mem);
+
+ SafePointNode *clonemap = (SafePointNode*)map()->clone();
+ JVMState* jvms = this->jvms();
+ JVMState* clonejvms = jvms->clone_shallow(C);
+ clonemap->set_memory(mem);
+ clonemap->set_jvms(clonejvms);
+ clonejvms->set_map(clonemap);
+ record_for_igvn(clonemap);
+ gvn().set_type_bottom(clonemap);
+ return clonemap;
+}
+
+
+//-----------------------------set_map_clone-----------------------------------
+void GraphKit::set_map_clone(SafePointNode* m) {
+ _map = m;
+ _map = clone_map();
+ _map->set_next_exception(NULL);
+ debug_only(verify_map());
+}
+
+
+//----------------------------kill_dead_locals---------------------------------
+// Detect any locals which are known to be dead, and force them to top.
+void GraphKit::kill_dead_locals() {
+ // Consult the liveness information for the locals. If any
+ // of them are unused, then they can be replaced by top(). This
+ // should help register allocation time and cut down on the size
+ // of the deoptimization information.
+
+ // This call is made from many of the bytecode handling
+ // subroutines called from the Big Switch in do_one_bytecode.
+ // Every bytecode which might include a slow path is responsible
+ // for killing its dead locals. The more consistent we
+ // are about killing deads, the fewer useless phis will be
+ // constructed for them at various merge points.
+
+ // bci can be -1 (InvocationEntryBci). We return the entry
+ // liveness for the method.
+
+ if (method() == NULL || method()->code_size() == 0) {
+ // We are building a graph for a call to a native method.
+ // All locals are live.
+ return;
+ }
+
+ ResourceMark rm;
+
+ // Consult the liveness information for the locals. If any
+ // of them are unused, then they can be replaced by top(). This
+ // should help register allocation time and cut down on the size
+ // of the deoptimization information.
+ MethodLivenessResult live_locals = method()->liveness_at_bci(bci());
+
+ int len = (int)live_locals.size();
+ assert(len <= jvms()->loc_size(), "too many live locals");
+ for (int local = 0; local < len; local++) {
+ if (!live_locals.at(local)) {
+ set_local(local, top());
+ }
+ }
+}
+
+#ifdef ASSERT
+//-------------------------dead_locals_are_killed------------------------------
+// Return true if all dead locals are set to top in the map.
+// Used to assert "clean" debug info at various points.
+bool GraphKit::dead_locals_are_killed() {
+ if (method() == NULL || method()->code_size() == 0) {
+ // No locals need to be dead, so all is as it should be.
+ return true;
+ }
+
+ // Make sure somebody called kill_dead_locals upstream.
+ ResourceMark rm;
+ for (JVMState* jvms = this->jvms(); jvms != NULL; jvms = jvms->caller()) {
+ if (jvms->loc_size() == 0) continue; // no locals to consult
+ SafePointNode* map = jvms->map();
+ ciMethod* method = jvms->method();
+ int bci = jvms->bci();
+ if (jvms == this->jvms()) {
+ bci = this->bci(); // it might not yet be synched
+ }
+ MethodLivenessResult live_locals = method->liveness_at_bci(bci);
+ int len = (int)live_locals.size();
+ if (!live_locals.is_valid() || len == 0)
+ // This method is trivial, or is poisoned by a breakpoint.
+ return true;
+ assert(len == jvms->loc_size(), "live map consistent with locals map");
+ for (int local = 0; local < len; local++) {
+ if (!live_locals.at(local) && map->local(jvms, local) != top()) {
+ if (PrintMiscellaneous && (Verbose || WizardMode)) {
+ tty->print_cr("Zombie local %d: ", local);
+ jvms->dump();
+ }
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+#endif //ASSERT
+
+// Helper function for adding JVMState and debug information to node
+void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) {
+ // Add the safepoint edges to the call (or other safepoint).
+
+ // Make sure dead locals are set to top. This
+ // should help register allocation time and cut down on the size
+ // of the deoptimization information.
+ assert(dead_locals_are_killed(), "garbage in debug info before safepoint");
+
+ // Walk the inline list to fill in the correct set of JVMState's
+ // Also fill in the associated edges for each JVMState.
+
+ JVMState* youngest_jvms = sync_jvms();
+
+ // Do we need debug info here? If it is a SafePoint and this method
+ // cannot de-opt, then we do NOT need any debug info.
+ bool full_info = (C->deopt_happens() || call->Opcode() != Op_SafePoint);
+
+ // If we are guaranteed to throw, we can prune everything but the
+ // input to the current bytecode.
+ bool can_prune_locals = false;
+ uint stack_slots_not_pruned = 0;
+ int inputs = 0, depth = 0;
+ if (must_throw) {
+ assert(method() == youngest_jvms->method(), "sanity");
+ if (compute_stack_effects(inputs, depth)) {
+ can_prune_locals = true;
+ stack_slots_not_pruned = inputs;
+ }
+ }
+
+ if (JvmtiExport::can_examine_or_deopt_anywhere()) {
+ // At any safepoint, this method can get breakpointed, which would
+ // then require an immediate deoptimization.
+ full_info = true;
+ can_prune_locals = false; // do not prune locals
+ stack_slots_not_pruned = 0;
+ }
+
+ // do not scribble on the input jvms
+ JVMState* out_jvms = youngest_jvms->clone_deep(C);
+ call->set_jvms(out_jvms); // Start jvms list for call node
+
+ // Presize the call:
+ debug_only(uint non_debug_edges = call->req());
+ call->add_req_batch(top(), youngest_jvms->debug_depth());
+ assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), "");
+
+ // Set up edges so that the call looks like this:
+ // Call [state:] ctl io mem fptr retadr
+ // [parms:] parm0 ... parmN
+ // [root:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+ // [...mid:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN [...]
+ // [young:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+ // Note that caller debug info precedes callee debug info.
+
+ // Fill pointer walks backwards from "young:" to "root:" in the diagram above:
+ uint debug_ptr = call->req();
+
+ // Loop over the map input edges associated with jvms, add them
+ // to the call node, & reset all offsets to match call node array.
+ for (JVMState* in_jvms = youngest_jvms; in_jvms != NULL; ) {
+ uint debug_end = debug_ptr;
+ uint debug_start = debug_ptr - in_jvms->debug_size();
+ debug_ptr = debug_start; // back up the ptr
+
+ uint p = debug_start; // walks forward in [debug_start, debug_end)
+ uint j, k, l;
+ SafePointNode* in_map = in_jvms->map();
+ out_jvms->set_map(call);
+
+ if (can_prune_locals) {
+ assert(in_jvms->method() == out_jvms->method(), "sanity");
+ // If the current throw can reach an exception handler in this JVMS,
+ // then we must keep everything live that can reach that handler.
+ // As a quick and dirty approximation, we look for any handlers at all.
+ if (in_jvms->method()->has_exception_handlers()) {
+ can_prune_locals = false;
+ }
+ }
+
+ // Add the Locals
+ k = in_jvms->locoff();
+ l = in_jvms->loc_size();
+ out_jvms->set_locoff(p);
+ if (full_info && !can_prune_locals) {
+ for (j = 0; j < l; j++)
+ call->set_req(p++, in_map->in(k+j));
+ } else {
+ p += l; // already set to top above by add_req_batch
+ }
+
+ // Add the Expression Stack
+ k = in_jvms->stkoff();
+ l = in_jvms->sp();
+ out_jvms->set_stkoff(p);
+ if (full_info && !can_prune_locals) {
+ for (j = 0; j < l; j++)
+ call->set_req(p++, in_map->in(k+j));
+ } else if (can_prune_locals && stack_slots_not_pruned != 0) {
+ // Divide stack into {S0,...,S1}, where S0 is set to top.
+ uint s1 = stack_slots_not_pruned;
+ stack_slots_not_pruned = 0; // for next iteration
+ if (s1 > l) s1 = l;
+ uint s0 = l - s1;
+ p += s0; // skip the tops preinstalled by add_req_batch
+ for (j = s0; j < l; j++)
+ call->set_req(p++, in_map->in(k+j));
+ } else {
+ p += l; // already set to top above by add_req_batch
+ }
+
+ // Add the Monitors
+ k = in_jvms->monoff();
+ l = in_jvms->mon_size();
+ out_jvms->set_monoff(p);
+ for (j = 0; j < l; j++)
+ call->set_req(p++, in_map->in(k+j));
+
+ // Finish the new jvms.
+ out_jvms->set_endoff(p);
+
+ assert(out_jvms->endoff() == debug_end, "fill ptr must match");
+ assert(out_jvms->depth() == in_jvms->depth(), "depth must match");
+ assert(out_jvms->loc_size() == in_jvms->loc_size(), "size must match");
+ assert(out_jvms->mon_size() == in_jvms->mon_size(), "size must match");
+ assert(out_jvms->debug_size() == in_jvms->debug_size(), "size must match");
+
+ // Update the two tail pointers in parallel.
+ out_jvms = out_jvms->caller();
+ in_jvms = in_jvms->caller();
+ }
+
+ assert(debug_ptr == non_debug_edges, "debug info must fit exactly");
+
+ // Test the correctness of JVMState::debug_xxx accessors:
+ assert(call->jvms()->debug_start() == non_debug_edges, "");
+ assert(call->jvms()->debug_end() == call->req(), "");
+ assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, "");
+}
+
+bool GraphKit::compute_stack_effects(int& inputs, int& depth) {
+ Bytecodes::Code code = java_bc();
+ if (code == Bytecodes::_wide) {
+ code = method()->java_code_at_bci(bci() + 1);
+ }
+
+ BasicType rtype = T_ILLEGAL;
+ int rsize = 0;
+
+ if (code != Bytecodes::_illegal) {
+ depth = Bytecodes::depth(code); // checkcast=0, athrow=-1
+ rtype = Bytecodes::result_type(code); // checkcast=P, athrow=V
+ if (rtype < T_CONFLICT)
+ rsize = type2size[rtype];
+ }
+
+ switch (code) {
+ case Bytecodes::_illegal:
+ return false;
+
+ case Bytecodes::_ldc:
+ case Bytecodes::_ldc_w:
+ case Bytecodes::_ldc2_w:
+ inputs = 0;
+ break;
+
+ case Bytecodes::_dup: inputs = 1; break;
+ case Bytecodes::_dup_x1: inputs = 2; break;
+ case Bytecodes::_dup_x2: inputs = 3; break;
+ case Bytecodes::_dup2: inputs = 2; break;
+ case Bytecodes::_dup2_x1: inputs = 3; break;
+ case Bytecodes::_dup2_x2: inputs = 4; break;
+ case Bytecodes::_swap: inputs = 2; break;
+ case Bytecodes::_arraylength: inputs = 1; break;
+
+ case Bytecodes::_getstatic:
+ case Bytecodes::_putstatic:
+ case Bytecodes::_getfield:
+ case Bytecodes::_putfield:
+ {
+ bool is_get = (depth >= 0), is_static = (depth & 1);
+ bool ignore;
+ ciBytecodeStream iter(method());
+ iter.reset_to_bci(bci());
+ iter.next();
+ ciField* field = iter.get_field(ignore);
+ int size = field->type()->size();
+ inputs = (is_static ? 0 : 1);
+ if (is_get) {
+ depth = size - inputs;
+ } else {
+ inputs += size; // putxxx pops the value from the stack
+ depth = - inputs;
+ }
+ }
+ break;
+
+ case Bytecodes::_invokevirtual:
+ case Bytecodes::_invokespecial:
+ case Bytecodes::_invokestatic:
+ case Bytecodes::_invokeinterface:
+ {
+ bool is_static = (depth == 0);
+ bool ignore;
+ ciBytecodeStream iter(method());
+ iter.reset_to_bci(bci());
+ iter.next();
+ ciMethod* method = iter.get_method(ignore);
+ inputs = method->arg_size_no_receiver();
+ if (!is_static) inputs += 1;
+ int size = method->return_type()->size();
+ depth = size - inputs;
+ }
+ break;
+
+ case Bytecodes::_multianewarray:
+ {
+ ciBytecodeStream iter(method());
+ iter.reset_to_bci(bci());
+ iter.next();
+ inputs = iter.get_dimensions();
+ assert(rsize == 1, "");
+ depth = rsize - inputs;
+ }
+ break;
+
+ case Bytecodes::_ireturn:
+ case Bytecodes::_lreturn:
+ case Bytecodes::_freturn:
+ case Bytecodes::_dreturn:
+ case Bytecodes::_areturn:
+ assert(rsize = -depth, "");
+ inputs = rsize;
+ break;
+
+ case Bytecodes::_jsr:
+ case Bytecodes::_jsr_w:
+ inputs = 0;
+ depth = 1; // S.B. depth=1, not zero
+ break;
+
+ default:
+ // bytecode produces a typed result
+ inputs = rsize - depth;
+ assert(inputs >= 0, "");
+ break;
+ }
+
+#ifdef ASSERT
+ // spot check
+ int outputs = depth + inputs;
+ assert(outputs >= 0, "sanity");
+ switch (code) {
+ case Bytecodes::_checkcast: assert(inputs == 1 && outputs == 1, ""); break;
+ case Bytecodes::_athrow: assert(inputs == 1 && outputs == 0, ""); break;
+ case Bytecodes::_aload_0: assert(inputs == 0 && outputs == 1, ""); break;
+ case Bytecodes::_return: assert(inputs == 0 && outputs == 0, ""); break;
+ case Bytecodes::_drem: assert(inputs == 4 && outputs == 2, ""); break;
+ }
+#endif //ASSERT
+
+ return true;
+}
+
+
+
+//------------------------------basic_plus_adr---------------------------------
+Node* GraphKit::basic_plus_adr(Node* base, Node* ptr, Node* offset) {
+ // short-circuit a common case
+ if (offset == intcon(0)) return ptr;
+ return _gvn.transform( new (C, 4) AddPNode(base, ptr, offset) );
+}
+
+Node* GraphKit::ConvI2L(Node* offset) {
+ // short-circuit a common case
+ jint offset_con = find_int_con(offset, Type::OffsetBot);
+ if (offset_con != Type::OffsetBot) {
+ return longcon((long) offset_con);
+ }
+ return _gvn.transform( new (C, 2) ConvI2LNode(offset));
+}
+Node* GraphKit::ConvL2I(Node* offset) {
+ // short-circuit a common case
+ jlong offset_con = find_long_con(offset, (jlong)Type::OffsetBot);
+ if (offset_con != (jlong)Type::OffsetBot) {
+ return intcon((int) offset_con);
+ }
+ return _gvn.transform( new (C, 2) ConvL2INode(offset));
+}
+
+//-------------------------load_object_klass-----------------------------------
+Node* GraphKit::load_object_klass(Node* obj) {
+ // Special-case a fresh allocation to avoid building nodes:
+ Node* akls = AllocateNode::Ideal_klass(obj, &_gvn);
+ if (akls != NULL) return akls;
+ Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
+ return _gvn.transform( new (C, 3) LoadKlassNode(0, immutable_memory(), k_adr, TypeInstPtr::KLASS) );
+}
+
+//-------------------------load_array_length-----------------------------------
+Node* GraphKit::load_array_length(Node* array) {
+ // Special-case a fresh allocation to avoid building nodes:
+ Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn);
+ if (alen != NULL) return alen;
+ Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
+ return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+}
+
+//------------------------------do_null_check----------------------------------
+// Helper function to do a NULL pointer check. Returned value is
+// the incoming address with NULL casted away. You are allowed to use the
+// not-null value only if you are control dependent on the test.
+extern int explicit_null_checks_inserted,
+ explicit_null_checks_elided;
+Node* GraphKit::null_check_common(Node* value, BasicType type,
+ // optional arguments for variations:
+ bool assert_null,
+ Node* *null_control) {
+ assert(!assert_null || null_control == NULL, "not both at once");
+ if (stopped()) return top();
+ if (!GenerateCompilerNullChecks && !assert_null && null_control == NULL) {
+ // For some performance testing, we may wish to suppress null checking.
+ value = cast_not_null(value); // Make it appear to be non-null (4962416).
+ return value;
+ }
+ explicit_null_checks_inserted++;
+
+ // Construct NULL check
+ Node *chk = NULL;
+ switch(type) {
+ case T_LONG : chk = new (C, 3) CmpLNode(value, _gvn.zerocon(T_LONG)); break;
+ case T_INT : chk = new (C, 3) CmpINode( value, _gvn.intcon(0)); break;
+ case T_ARRAY : // fall through
+ type = T_OBJECT; // simplify further tests
+ case T_OBJECT : {
+ const Type *t = _gvn.type( value );
+
+ const TypeInstPtr* tp = t->isa_instptr();
+ if (tp != NULL && !tp->klass()->is_loaded()
+ // Only for do_null_check, not any of its siblings:
+ && !assert_null && null_control == NULL) {
+ // Usually, any field access or invocation on an unloaded oop type
+ // will simply fail to link, since the statically linked class is
+ // likely also to be unloaded. However, in -Xcomp mode, sometimes
+ // the static class is loaded but the sharper oop type is not.
+ // Rather than checking for this obscure case in lots of places,
+ // we simply observe that a null check on an unloaded class
+ // will always be followed by a nonsense operation, so we
+ // can just issue the uncommon trap here.
+ // Our access to the unloaded class will only be correct
+ // after it has been loaded and initialized, which requires
+ // a trip through the interpreter.
+#ifndef PRODUCT
+ if (WizardMode) { tty->print("Null check of unloaded "); tp->klass()->print(); tty->cr(); }
+#endif
+ uncommon_trap(Deoptimization::Reason_unloaded,
+ Deoptimization::Action_reinterpret,
+ tp->klass(), "!loaded");
+ return top();
+ }
+
+ if (assert_null) {
+ // See if the type is contained in NULL_PTR.
+ // If so, then the value is already null.
+ if (t->higher_equal(TypePtr::NULL_PTR)) {
+ explicit_null_checks_elided++;
+ return value; // Elided null assert quickly!
+ }
+ } else {
+ // See if mixing in the NULL pointer changes type.
+ // If so, then the NULL pointer was not allowed in the original
+ // type. In other words, "value" was not-null.
+ if (t->meet(TypePtr::NULL_PTR) != t) {
+ // same as: if (!TypePtr::NULL_PTR->higher_equal(t)) ...
+ explicit_null_checks_elided++;
+ return value; // Elided null check quickly!
+ }
+ }
+ chk = new (C, 3) CmpPNode( value, null() );
+ break;
+ }
+
+ default : ShouldNotReachHere();
+ }
+ assert(chk != NULL, "sanity check");
+ chk = _gvn.transform(chk);
+
+ BoolTest::mask btest = assert_null ? BoolTest::eq : BoolTest::ne;
+ BoolNode *btst = new (C, 2) BoolNode( chk, btest);
+ Node *tst = _gvn.transform( btst );
+
+ //-----------
+ // if peephole optimizations occured, a prior test existed.
+ // If a prior test existed, maybe it dominates as we can avoid this test.
+ if (tst != btst && type == T_OBJECT) {
+ // At this point we want to scan up the CFG to see if we can
+ // find an identical test (and so avoid this test altogether).
+ Node *cfg = control();
+ int depth = 0;
+ while( depth < 16 ) { // Limit search depth for speed
+ if( cfg->Opcode() == Op_IfTrue &&
+ cfg->in(0)->in(1) == tst ) {
+ // Found prior test. Use "cast_not_null" to construct an identical
+ // CastPP (and hence hash to) as already exists for the prior test.
+ // Return that casted value.
+ if (assert_null) {
+ replace_in_map(value, null());
+ return null(); // do not issue the redundant test
+ }
+ Node *oldcontrol = control();
+ set_control(cfg);
+ Node *res = cast_not_null(value);
+ set_control(oldcontrol);
+ explicit_null_checks_elided++;
+ return res;
+ }
+ cfg = IfNode::up_one_dom(cfg, /*linear_only=*/ true);
+ if (cfg == NULL) break; // Quit at region nodes
+ depth++;
+ }
+ }
+
+ //-----------
+ // Branch to failure if null
+ float ok_prob = PROB_MAX; // a priori estimate: nulls never happen
+ Deoptimization::DeoptReason reason;
+ if (assert_null)
+ reason = Deoptimization::Reason_null_assert;
+ else if (type == T_OBJECT)
+ reason = Deoptimization::Reason_null_check;
+ else
+ reason = Deoptimization::Reason_div0_check;
+
+ // To cause an implicit null check, we set the not-null probability
+ // to the maximum (PROB_MAX). For an explicit check the probablity
+ // is set to a smaller value.
+ if (null_control != NULL || too_many_traps(reason)) {
+ // probability is less likely
+ ok_prob = PROB_LIKELY_MAG(3);
+ } else if (!assert_null &&
+ (ImplicitNullCheckThreshold > 0) &&
+ method() != NULL &&
+ (method()->method_data()->trap_count(reason)
+ >= (uint)ImplicitNullCheckThreshold)) {
+ ok_prob = PROB_LIKELY_MAG(3);
+ }
+
+ if (null_control != NULL) {
+ IfNode* iff = create_and_map_if(control(), tst, ok_prob, COUNT_UNKNOWN);
+ Node* null_true = _gvn.transform( new (C, 1) IfFalseNode(iff));
+ set_control( _gvn.transform( new (C, 1) IfTrueNode(iff)));
+ if (null_true == top())
+ explicit_null_checks_elided++;
+ (*null_control) = null_true;
+ } else {
+ BuildCutout unless(this, tst, ok_prob);
+ // Check for optimizer eliding test at parse time
+ if (stopped()) {
+ // Failure not possible; do not bother making uncommon trap.
+ explicit_null_checks_elided++;
+ } else if (assert_null) {
+ uncommon_trap(reason,
+ Deoptimization::Action_make_not_entrant,
+ NULL, "assert_null");
+ } else {
+ builtin_throw(reason);
+ }
+ }
+
+ // Must throw exception, fall-thru not possible?
+ if (stopped()) {
+ return top(); // No result
+ }
+
+ if (assert_null) {
+ // Cast obj to null on this path.
+ replace_in_map(value, zerocon(type));
+ return zerocon(type);
+ }
+
+ // Cast obj to not-null on this path, if there is no null_control.
+ // (If there is a null_control, a non-null value may come back to haunt us.)
+ if (type == T_OBJECT) {
+ Node* cast = cast_not_null(value, false);
+ if (null_control == NULL || (*null_control) == top())
+ replace_in_map(value, cast);
+ value = cast;
+ }
+
+ return value;
+}
+
+
+//------------------------------cast_not_null----------------------------------
+// Cast obj to not-null on this path
+Node* GraphKit::cast_not_null(Node* obj, bool do_replace_in_map) {
+ const Type *t = _gvn.type(obj);
+ const Type *t_not_null = t->join(TypePtr::NOTNULL);
+ // Object is already not-null?
+ if( t == t_not_null ) return obj;
+
+ Node *cast = new (C, 2) CastPPNode(obj,t_not_null);
+ cast->init_req(0, control());
+ cast = _gvn.transform( cast );
+
+ // Scan for instances of 'obj' in the current JVM mapping.
+ // These instances are known to be not-null after the test.
+ if (do_replace_in_map)
+ replace_in_map(obj, cast);
+
+ return cast; // Return casted value
+}
+
+
+//--------------------------replace_in_map-------------------------------------
+void GraphKit::replace_in_map(Node* old, Node* neww) {
+ this->map()->replace_edge(old, neww);
+
+ // Note: This operation potentially replaces any edge
+ // on the map. This includes locals, stack, and monitors
+ // of the current (innermost) JVM state.
+
+ // We can consider replacing in caller maps.
+ // The idea would be that an inlined function's null checks
+ // can be shared with the entire inlining tree.
+ // The expense of doing this is that the PreserveJVMState class
+ // would have to preserve caller states too, with a deep copy.
+}
+
+
+
+//=============================================================================
+//--------------------------------memory---------------------------------------
+Node* GraphKit::memory(uint alias_idx) {
+ MergeMemNode* mem = merged_memory();
+ Node* p = mem->memory_at(alias_idx);
+ _gvn.set_type(p, Type::MEMORY); // must be mapped
+ return p;
+}
+
+//-----------------------------reset_memory------------------------------------
+Node* GraphKit::reset_memory() {
+ Node* mem = map()->memory();
+ // do not use this node for any more parsing!
+ debug_only( map()->set_memory((Node*)NULL) );
+ return _gvn.transform( mem );
+}
+
+//------------------------------set_all_memory---------------------------------
+void GraphKit::set_all_memory(Node* newmem) {
+ Node* mergemem = MergeMemNode::make(C, newmem);
+ gvn().set_type_bottom(mergemem);
+ map()->set_memory(mergemem);
+}
+
+//------------------------------set_all_memory_call----------------------------
+void GraphKit::set_all_memory_call(Node* call) {
+ Node* newmem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+ set_all_memory(newmem);
+}
+
+//=============================================================================
+//
+// parser factory methods for MemNodes
+//
+// These are layered on top of the factory methods in LoadNode and StoreNode,
+// and integrate with the parser's memory state and _gvn engine.
+//
+
+// factory methods in "int adr_idx"
+Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
+ int adr_idx,
+ bool require_atomic_access) {
+ assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
+ const TypePtr* adr_type = NULL; // debug-mode-only argument
+ debug_only(adr_type = C->get_adr_type(adr_idx));
+ Node* mem = memory(adr_idx);
+ Node* ld;
+ if (require_atomic_access && bt == T_LONG) {
+ ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t);
+ } else {
+ ld = LoadNode::make(C, ctl, mem, adr, adr_type, t, bt);
+ }
+ return _gvn.transform(ld);
+}
+
+Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
+ int adr_idx,
+ bool require_atomic_access) {
+ assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+ const TypePtr* adr_type = NULL;
+ debug_only(adr_type = C->get_adr_type(adr_idx));
+ Node *mem = memory(adr_idx);
+ Node* st;
+ if (require_atomic_access && bt == T_LONG) {
+ st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val);
+ } else {
+ st = StoreNode::make(C, ctl, mem, adr, adr_type, val, bt);
+ }
+ st = _gvn.transform(st);
+ set_memory(st, adr_idx);
+ // Back-to-back stores can only remove intermediate store with DU info
+ // so push on worklist for optimizer.
+ if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address))
+ record_for_igvn(st);
+
+ return st;
+}
+
+void GraphKit::pre_barrier(Node* ctl,
+ Node* obj,
+ Node* adr,
+ uint adr_idx,
+ Node *val,
+ const Type* val_type,
+ BasicType bt) {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ set_control(ctl);
+ switch (bs->kind()) {
+
+ case BarrierSet::CardTableModRef:
+ case BarrierSet::CardTableExtension:
+ case BarrierSet::ModRef:
+ break;
+
+ case BarrierSet::Other:
+ default :
+ ShouldNotReachHere();
+
+ }
+}
+
+void GraphKit::post_barrier(Node* ctl,
+ Node* store,
+ Node* obj,
+ Node* adr,
+ uint adr_idx,
+ Node *val,
+ BasicType bt,
+ bool use_precise) {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ set_control(ctl);
+ switch (bs->kind()) {
+
+ case BarrierSet::CardTableModRef:
+ case BarrierSet::CardTableExtension:
+ write_barrier_post(store, obj, adr, val, use_precise);
+ break;
+
+ case BarrierSet::ModRef:
+ break;
+
+ case BarrierSet::Other:
+ default :
+ ShouldNotReachHere();
+
+ }
+}
+
+Node* GraphKit::store_oop_to_object(Node* ctl,
+ Node* obj,
+ Node* adr,
+ const TypePtr* adr_type,
+ Node *val,
+ const Type* val_type,
+ BasicType bt) {
+ uint adr_idx = C->get_alias_index(adr_type);
+ Node* store;
+ pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
+ store = store_to_memory(control(), adr, val, bt, adr_idx);
+ post_barrier(control(), store, obj, adr, adr_idx, val, bt, false);
+ return store;
+}
+
+Node* GraphKit::store_oop_to_array(Node* ctl,
+ Node* obj,
+ Node* adr,
+ const TypePtr* adr_type,
+ Node *val,
+ const Type* val_type,
+ BasicType bt) {
+ uint adr_idx = C->get_alias_index(adr_type);
+ Node* store;
+ pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
+ store = store_to_memory(control(), adr, val, bt, adr_idx);
+ post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
+ return store;
+}
+
+Node* GraphKit::store_oop_to_unknown(Node* ctl,
+ Node* obj,
+ Node* adr,
+ const TypePtr* adr_type,
+ Node *val,
+ const Type* val_type,
+ BasicType bt) {
+ uint adr_idx = C->get_alias_index(adr_type);
+ Node* store;
+ pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
+ store = store_to_memory(control(), adr, val, bt, adr_idx);
+ post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
+ return store;
+}
+
+
+//-------------------------array_element_address-------------------------
+Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt,
+ const TypeInt* sizetype) {
+ uint shift = exact_log2(type2aelembytes[elembt]);
+ uint header = arrayOopDesc::base_offset_in_bytes(elembt);
+
+ // short-circuit a common case (saves lots of confusing waste motion)
+ jint idx_con = find_int_con(idx, -1);
+ if (idx_con >= 0) {
+ intptr_t offset = header + ((intptr_t)idx_con << shift);
+ return basic_plus_adr(ary, offset);
+ }
+
+ // must be correct type for alignment purposes
+ Node* base = basic_plus_adr(ary, header);
+#ifdef _LP64
+ // The scaled index operand to AddP must be a clean 64-bit value.
+ // Java allows a 32-bit int to be incremented to a negative
+ // value, which appears in a 64-bit register as a large
+ // positive number. Using that large positive number as an
+ // operand in pointer arithmetic has bad consequences.
+ // On the other hand, 32-bit overflow is rare, and the possibility
+ // can often be excluded, if we annotate the ConvI2L node with
+ // a type assertion that its value is known to be a small positive
+ // number. (The prior range check has ensured this.)
+ // This assertion is used by ConvI2LNode::Ideal.
+ int index_max = max_jint - 1; // array size is max_jint, index is one less
+ if (sizetype != NULL) index_max = sizetype->_hi - 1;
+ const TypeLong* lidxtype = TypeLong::make(CONST64(0), index_max, Type::WidenMax);
+ idx = _gvn.transform( new (C, 2) ConvI2LNode(idx, lidxtype) );
+#endif
+ Node* scale = _gvn.transform( new (C, 3) LShiftXNode(idx, intcon(shift)) );
+ return basic_plus_adr(ary, base, scale);
+}
+
+//-------------------------load_array_element-------------------------
+Node* GraphKit::load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype) {
+ const Type* elemtype = arytype->elem();
+ BasicType elembt = elemtype->array_element_basic_type();
+ Node* adr = array_element_address(ary, idx, elembt, arytype->size());
+ Node* ld = make_load(ctl, adr, elemtype, elembt, arytype);
+ return ld;
+}
+
+//-------------------------set_arguments_for_java_call-------------------------
+// Arguments (pre-popped from the stack) are taken from the JVMS.
+void GraphKit::set_arguments_for_java_call(CallJavaNode* call) {
+ // Add the call arguments:
+ uint nargs = call->method()->arg_size();
+ for (uint i = 0; i < nargs; i++) {
+ Node* arg = argument(i);
+ call->init_req(i + TypeFunc::Parms, arg);
+ }
+}
+
+//---------------------------set_edges_for_java_call---------------------------
+// Connect a newly created call into the current JVMS.
+// A return value node (if any) is returned from set_edges_for_java_call.
+void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw) {
+
+ // Add the predefined inputs:
+ call->init_req( TypeFunc::Control, control() );
+ call->init_req( TypeFunc::I_O , i_o() );
+ call->init_req( TypeFunc::Memory , reset_memory() );
+ call->init_req( TypeFunc::FramePtr, frameptr() );
+ call->init_req( TypeFunc::ReturnAdr, top() );
+
+ add_safepoint_edges(call, must_throw);
+
+ Node* xcall = _gvn.transform(call);
+
+ if (xcall == top()) {
+ set_control(top());
+ return;
+ }
+ assert(xcall == call, "call identity is stable");
+
+ // Re-use the current map to produce the result.
+
+ set_control(_gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Control)));
+ set_i_o( _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O )));
+ set_all_memory_call(xcall);
+
+ //return xcall; // no need, caller already has it
+}
+
+Node* GraphKit::set_results_for_java_call(CallJavaNode* call) {
+ if (stopped()) return top(); // maybe the call folded up?
+
+ // Capture the return value, if any.
+ Node* ret;
+ if (call->method() == NULL ||
+ call->method()->return_type()->basic_type() == T_VOID)
+ ret = top();
+ else ret = _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
+
+ // Note: Since any out-of-line call can produce an exception,
+ // we always insert an I_O projection from the call into the result.
+
+ make_slow_call_ex(call, env()->Throwable_klass(), false);
+
+ return ret;
+}
+
+//--------------------set_predefined_input_for_runtime_call--------------------
+// Reading and setting the memory state is way conservative here.
+// The real problem is that I am not doing real Type analysis on memory,
+// so I cannot distinguish card mark stores from other stores. Across a GC
+// point the Store Barrier and the card mark memory has to agree. I cannot
+// have a card mark store and its barrier split across the GC point from
+// either above or below. Here I get that to happen by reading ALL of memory.
+// A better answer would be to separate out card marks from other memory.
+// For now, return the input memory state, so that it can be reused
+// after the call, if this call has restricted memory effects.
+Node* GraphKit::set_predefined_input_for_runtime_call(SafePointNode* call) {
+ // Set fixed predefined input arguments
+ Node* memory = reset_memory();
+ call->init_req( TypeFunc::Control, control() );
+ call->init_req( TypeFunc::I_O, top() ); // does no i/o
+ call->init_req( TypeFunc::Memory, memory ); // may gc ptrs
+ call->init_req( TypeFunc::FramePtr, frameptr() );
+ call->init_req( TypeFunc::ReturnAdr, top() );
+ return memory;
+}
+
+//-------------------set_predefined_output_for_runtime_call--------------------
+// Set control and memory (not i_o) from the call.
+// If keep_mem is not NULL, use it for the output state,
+// except for the RawPtr output of the call, if hook_mem is TypeRawPtr::BOTTOM.
+// If hook_mem is NULL, this call produces no memory effects at all.
+// If hook_mem is a Java-visible memory slice (such as arraycopy operands),
+// then only that memory slice is taken from the call.
+// In the last case, we must put an appropriate memory barrier before
+// the call, so as to create the correct anti-dependencies on loads
+// preceding the call.
+void GraphKit::set_predefined_output_for_runtime_call(Node* call,
+ Node* keep_mem,
+ const TypePtr* hook_mem) {
+ // no i/o
+ set_control(_gvn.transform( new (C, 1) ProjNode(call,TypeFunc::Control) ));
+ if (keep_mem) {
+ // First clone the existing memory state
+ set_all_memory(keep_mem);
+ if (hook_mem != NULL) {
+ // Make memory for the call
+ Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+ // Set the RawPtr memory state only. This covers all the heap top/GC stuff
+ // We also use hook_mem to extract specific effects from arraycopy stubs.
+ set_memory(mem, hook_mem);
+ }
+ // ...else the call has NO memory effects.
+
+ // Make sure the call advertises its memory effects precisely.
+ // This lets us build accurate anti-dependences in gcm.cpp.
+ assert(C->alias_type(call->adr_type()) == C->alias_type(hook_mem),
+ "call node must be constructed correctly");
+ } else {
+ assert(hook_mem == NULL, "");
+ // This is not a "slow path" call; all memory comes from the call.
+ set_all_memory_call(call);
+ }
+}
+
+//------------------------------increment_counter------------------------------
+// for statistics: increment a VM counter by 1
+
+void GraphKit::increment_counter(address counter_addr) {
+ Node* adr1 = makecon(TypeRawPtr::make(counter_addr));
+ increment_counter(adr1);
+}
+
+void GraphKit::increment_counter(Node* counter_addr) {
+ int adr_type = Compile::AliasIdxRaw;
+ Node* cnt = make_load(NULL, counter_addr, TypeInt::INT, T_INT, adr_type);
+ Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(1)));
+ store_to_memory( NULL, counter_addr, incr, T_INT, adr_type );
+}
+
+
+//------------------------------uncommon_trap----------------------------------
+// Bail out to the interpreter in mid-method. Implemented by calling the
+// uncommon_trap blob. This helper function inserts a runtime call with the
+// right debug info.
+void GraphKit::uncommon_trap(int trap_request,
+ ciKlass* klass, const char* comment,
+ bool must_throw,
+ bool keep_exact_action) {
+ if (failing()) stop();
+ if (stopped()) return; // trap reachable?
+
+ // Note: If ProfileTraps is true, and if a deopt. actually
+ // occurs here, the runtime will make sure an MDO exists. There is
+ // no need to call method()->build_method_data() at this point.
+
+#ifdef ASSERT
+ if (!must_throw) {
+ // Make sure the stack has at least enough depth to execute
+ // the current bytecode.
+ int inputs, ignore;
+ if (compute_stack_effects(inputs, ignore)) {
+ assert(sp() >= inputs, "must have enough JVMS stack to execute");
+ // It is a frequent error in library_call.cpp to issue an
+ // uncommon trap with the _sp value already popped.
+ }
+ }
+#endif
+
+ Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(trap_request);
+ Deoptimization::DeoptAction action = Deoptimization::trap_request_action(trap_request);
+
+ switch (action) {
+ case Deoptimization::Action_maybe_recompile:
+ case Deoptimization::Action_reinterpret:
+ // Temporary fix for 6529811 to allow virtual calls to be sure they
+ // get the chance to go from mono->bi->mega
+ if (!keep_exact_action &&
+ Deoptimization::trap_request_index(trap_request) < 0 &&
+ too_many_recompiles(reason)) {
+ // This BCI is causing too many recompilations.
+ action = Deoptimization::Action_none;
+ trap_request = Deoptimization::make_trap_request(reason, action);
+ } else {
+ C->set_trap_can_recompile(true);
+ }
+ break;
+ case Deoptimization::Action_make_not_entrant:
+ C->set_trap_can_recompile(true);
+ break;
+#ifdef ASSERT
+ case Deoptimization::Action_none:
+ case Deoptimization::Action_make_not_compilable:
+ break;
+ default:
+ assert(false, "bad action");
+#endif
+ }
+
+ if (TraceOptoParse) {
+ char buf[100];
+ tty->print_cr("Uncommon trap %s at bci:%d",
+ Deoptimization::format_trap_request(buf, sizeof(buf),
+ trap_request), bci());
+ }
+
+ CompileLog* log = C->log();
+ if (log != NULL) {
+ int kid = (klass == NULL)? -1: log->identify(klass);
+ log->begin_elem("uncommon_trap bci='%d'", bci());
+ char buf[100];
+ log->print(" %s", Deoptimization::format_trap_request(buf, sizeof(buf),
+ trap_request));
+ if (kid >= 0) log->print(" klass='%d'", kid);
+ if (comment != NULL) log->print(" comment='%s'", comment);
+ log->end_elem();
+ }
+
+ // Make sure any guarding test views this path as very unlikely
+ Node *i0 = control()->in(0);
+ if (i0 != NULL && i0->is_If()) { // Found a guarding if test?
+ IfNode *iff = i0->as_If();
+ float f = iff->_prob; // Get prob
+ if (control()->Opcode() == Op_IfTrue) {
+ if (f > PROB_UNLIKELY_MAG(4))
+ iff->_prob = PROB_MIN;
+ } else {
+ if (f < PROB_LIKELY_MAG(4))
+ iff->_prob = PROB_MAX;
+ }
+ }
+
+ // Clear out dead values from the debug info.
+ kill_dead_locals();
+
+ // Now insert the uncommon trap subroutine call
+ address call_addr = SharedRuntime::uncommon_trap_blob()->instructions_begin();
+ const TypePtr* no_memory_effects = NULL;
+ // Pass the index of the class to be loaded
+ Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON |
+ (must_throw ? RC_MUST_THROW : 0),
+ OptoRuntime::uncommon_trap_Type(),
+ call_addr, "uncommon_trap", no_memory_effects,
+ intcon(trap_request));
+ assert(call->as_CallStaticJava()->uncommon_trap_request() == trap_request,
+ "must extract request correctly from the graph");
+ assert(trap_request != 0, "zero value reserved by uncommon_trap_request");
+
+ call->set_req(TypeFunc::ReturnAdr, returnadr());
+ // The debug info is the only real input to this call.
+
+ // Halt-and-catch fire here. The above call should never return!
+ HaltNode* halt = new(C, TypeFunc::Parms) HaltNode(control(), frameptr());
+ _gvn.set_type_bottom(halt);
+ root()->add_req(halt);
+
+ stop_and_kill_map();
+}
+
+
+//--------------------------just_allocated_object------------------------------
+// Report the object that was just allocated.
+// It must be the case that there are no intervening safepoints.
+// We use this to determine if an object is so "fresh" that
+// it does not require card marks.
+Node* GraphKit::just_allocated_object(Node* current_control) {
+ if (C->recent_alloc_ctl() == current_control)
+ return C->recent_alloc_obj();
+ return NULL;
+}
+
+
+//------------------------------store_barrier----------------------------------
+// Insert a write-barrier store. This is to let generational GC work; we have
+// to flag all oop-stores before the next GC point.
+void GraphKit::write_barrier_post(Node* oop_store, Node* obj, Node* adr,
+ Node* val, bool use_precise) {
+ // No store check needed if we're storing a NULL or an old object
+ // (latter case is probably a string constant). The concurrent
+ // mark sweep garbage collector, however, needs to have all nonNull
+ // oop updates flagged via card-marks.
+ if (val != NULL && val->is_Con()) {
+ // must be either an oop or NULL
+ const Type* t = val->bottom_type();
+ if (t == TypePtr::NULL_PTR || t == Type::TOP)
+ // stores of null never (?) need barriers
+ return;
+ ciObject* con = t->is_oopptr()->const_oop();
+ if (con != NULL
+ && con->is_perm()
+ && Universe::heap()->can_elide_permanent_oop_store_barriers())
+ // no store barrier needed, because no old-to-new ref created
+ return;
+ }
+
+ if (use_ReduceInitialCardMarks()
+ && obj == just_allocated_object(control())) {
+ // We can skip marks on a freshly-allocated object.
+ // Keep this code in sync with do_eager_card_mark in runtime.cpp.
+ // That routine eagerly marks the occasional object which is produced
+ // by the slow path, so that we don't have to do it here.
+ return;
+ }
+
+ if (!use_precise) {
+ // All card marks for a (non-array) instance are in one place:
+ adr = obj;
+ }
+ // (Else it's an array (or unknown), and we want more precise card marks.)
+ assert(adr != NULL, "");
+
+ // Get the alias_index for raw card-mark memory
+ int adr_type = Compile::AliasIdxRaw;
+ // Convert the pointer to an int prior to doing math on it
+ Node* cast = _gvn.transform(new (C, 2) CastP2XNode(control(), adr));
+ // Divide by card size
+ assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
+ "Only one we handle so far.");
+ CardTableModRefBS* ct =
+ (CardTableModRefBS*)(Universe::heap()->barrier_set());
+ Node *b = _gvn.transform(new (C, 3) URShiftXNode( cast, _gvn.intcon(CardTableModRefBS::card_shift) ));
+ // We store into a byte array, so do not bother to left-shift by zero
+ // Get base of card map
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte),
+ "adjust this code");
+ Node *c = makecon(TypeRawPtr::make((address)ct->byte_map_base));
+ // Combine
+ Node *sb_ctl = control();
+ Node *sb_adr = _gvn.transform(new (C, 4) AddPNode( top()/*no base ptr*/, c, b ));
+ Node *sb_val = _gvn.intcon(0);
+ // Smash zero into card
+ if( !UseConcMarkSweepGC ) {
+ BasicType bt = T_BYTE;
+ store_to_memory(sb_ctl, sb_adr, sb_val, bt, adr_type);
+ } else {
+ // Specialized path for CM store barrier
+ cms_card_mark( sb_ctl, sb_adr, sb_val, oop_store);
+ }
+}
+
+// Specialized path for CMS store barrier
+void GraphKit::cms_card_mark(Node* ctl, Node* adr, Node* val, Node *oop_store) {
+ BasicType bt = T_BYTE;
+ int adr_idx = Compile::AliasIdxRaw;
+ Node* mem = memory(adr_idx);
+
+ // The type input is NULL in PRODUCT builds
+ const TypePtr* type = NULL;
+ debug_only(type = C->get_adr_type(adr_idx));
+
+ // Add required edge to oop_store, optimizer does not support precedence edges.
+ // Convert required edge to precedence edge before allocation.
+ Node *store = _gvn.transform( new (C, 5) StoreCMNode(ctl, mem, adr, type, val, oop_store) );
+ set_memory(store, adr_idx);
+
+ // For CMS, back-to-back card-marks can only remove the first one
+ // and this requires DU info. Push on worklist for optimizer.
+ if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address))
+ record_for_igvn(store);
+}
+
+
+void GraphKit::round_double_arguments(ciMethod* dest_method) {
+ // (Note: TypeFunc::make has a cache that makes this fast.)
+ const TypeFunc* tf = TypeFunc::make(dest_method);
+ int nargs = tf->_domain->_cnt - TypeFunc::Parms;
+ for (int j = 0; j < nargs; j++) {
+ const Type *targ = tf->_domain->field_at(j + TypeFunc::Parms);
+ if( targ->basic_type() == T_DOUBLE ) {
+ // If any parameters are doubles, they must be rounded before
+ // the call, dstore_rounding does gvn.transform
+ Node *arg = argument(j);
+ arg = dstore_rounding(arg);
+ set_argument(j, arg);
+ }
+ }
+}
+
+void GraphKit::round_double_result(ciMethod* dest_method) {
+ // A non-strict method may return a double value which has an extended
+ // exponent, but this must not be visible in a caller which is 'strict'
+ // If a strict caller invokes a non-strict callee, round a double result
+
+ BasicType result_type = dest_method->return_type()->basic_type();
+ assert( method() != NULL, "must have caller context");
+ if( result_type == T_DOUBLE && method()->is_strict() && !dest_method->is_strict() ) {
+ // Destination method's return value is on top of stack
+ // dstore_rounding() does gvn.transform
+ Node *result = pop_pair();
+ result = dstore_rounding(result);
+ push_pair(result);
+ }
+}
+
+// rounding for strict float precision conformance
+Node* GraphKit::precision_rounding(Node* n) {
+ return UseStrictFP && _method->flags().is_strict()
+ && UseSSE == 0 && Matcher::strict_fp_requires_explicit_rounding
+ ? _gvn.transform( new (C, 2) RoundFloatNode(0, n) )
+ : n;
+}
+
+// rounding for strict double precision conformance
+Node* GraphKit::dprecision_rounding(Node *n) {
+ return UseStrictFP && _method->flags().is_strict()
+ && UseSSE <= 1 && Matcher::strict_fp_requires_explicit_rounding
+ ? _gvn.transform( new (C, 2) RoundDoubleNode(0, n) )
+ : n;
+}
+
+// rounding for non-strict double stores
+Node* GraphKit::dstore_rounding(Node* n) {
+ return Matcher::strict_fp_requires_explicit_rounding
+ && UseSSE <= 1
+ ? _gvn.transform( new (C, 2) RoundDoubleNode(0, n) )
+ : n;
+}
+
+//=============================================================================
+// Generate a fast path/slow path idiom. Graph looks like:
+// [foo] indicates that 'foo' is a parameter
+//
+// [in] NULL
+// \ /
+// CmpP
+// Bool ne
+// If
+// / \
+// True False-<2>
+// / |
+// / cast_not_null
+// Load | | ^
+// [fast_test] | |
+// gvn to opt_test | |
+// / \ | <1>
+// True False |
+// | \\ |
+// [slow_call] \[fast_result]
+// Ctl Val \ \
+// | \ \
+// Catch <1> \ \
+// / \ ^ \ \
+// Ex No_Ex | \ \
+// | \ \ | \ <2> \
+// ... \ [slow_res] | | \ [null_result]
+// \ \--+--+--- | |
+// \ | / \ | /
+// --------Region Phi
+//
+//=============================================================================
+// Code is structured as a series of driver functions all called 'do_XXX' that
+// call a set of helper functions. Helper functions first, then drivers.
+
+//------------------------------null_check_oop---------------------------------
+// Null check oop. Set null-path control into Region in slot 3.
+// Make a cast-not-nullness use the other not-null control. Return cast.
+Node* GraphKit::null_check_oop(Node* value, Node* *null_control,
+ bool never_see_null) {
+ // Initial NULL check taken path
+ (*null_control) = top();
+ Node* cast = null_check_common(value, T_OBJECT, false, null_control);
+
+ // Generate uncommon_trap:
+ if (never_see_null && (*null_control) != top()) {
+ // If we see an unexpected null at a check-cast we record it and force a
+ // recompile; the offending check-cast will be compiled to handle NULLs.
+ // If we see more than one offending BCI, then all checkcasts in the
+ // method will be compiled to handle NULLs.
+ PreserveJVMState pjvms(this);
+ set_control(*null_control);
+ uncommon_trap(Deoptimization::Reason_null_check,
+ Deoptimization::Action_make_not_entrant);
+ (*null_control) = top(); // NULL path is dead
+ }
+
+ // Cast away null-ness on the result
+ return cast;
+}
+
+//------------------------------opt_iff----------------------------------------
+// Optimize the fast-check IfNode. Set the fast-path region slot 2.
+// Return slow-path control.
+Node* GraphKit::opt_iff(Node* region, Node* iff) {
+ IfNode *opt_iff = _gvn.transform(iff)->as_If();
+
+ // Fast path taken; set region slot 2
+ Node *fast_taken = _gvn.transform( new (C, 1) IfFalseNode(opt_iff) );
+ region->init_req(2,fast_taken); // Capture fast-control
+
+ // Fast path not-taken, i.e. slow path
+ Node *slow_taken = _gvn.transform( new (C, 1) IfTrueNode(opt_iff) );
+ return slow_taken;
+}
+
+//-----------------------------make_runtime_call-------------------------------
+Node* GraphKit::make_runtime_call(int flags,
+ const TypeFunc* call_type, address call_addr,
+ const char* call_name,
+ const TypePtr* adr_type,
+ // The following parms are all optional.
+ // The first NULL ends the list.
+ Node* parm0, Node* parm1,
+ Node* parm2, Node* parm3,
+ Node* parm4, Node* parm5,
+ Node* parm6, Node* parm7) {
+ // Slow-path call
+ int size = call_type->domain()->cnt();
+ bool is_leaf = !(flags & RC_NO_LEAF);
+ bool has_io = (!is_leaf && !(flags & RC_NO_IO));
+ if (call_name == NULL) {
+ assert(!is_leaf, "must supply name for leaf");
+ call_name = OptoRuntime::stub_name(call_addr);
+ }
+ CallNode* call;
+ if (!is_leaf) {
+ call = new(C, size) CallStaticJavaNode(call_type, call_addr, call_name,
+ bci(), adr_type);
+ } else if (flags & RC_NO_FP) {
+ call = new(C, size) CallLeafNoFPNode(call_type, call_addr, call_name, adr_type);
+ } else {
+ call = new(C, size) CallLeafNode(call_type, call_addr, call_name, adr_type);
+ }
+
+ // The following is similar to set_edges_for_java_call,
+ // except that the memory effects of the call are restricted to AliasIdxRaw.
+
+ // Slow path call has no side-effects, uses few values
+ bool wide_in = !(flags & RC_NARROW_MEM);
+ bool wide_out = (C->get_alias_index(adr_type) == Compile::AliasIdxBot);
+
+ Node* prev_mem = NULL;
+ if (wide_in) {
+ prev_mem = set_predefined_input_for_runtime_call(call);
+ } else {
+ assert(!wide_out, "narrow in => narrow out");
+ Node* narrow_mem = memory(adr_type);
+ prev_mem = reset_memory();
+ map()->set_memory(narrow_mem);
+ set_predefined_input_for_runtime_call(call);
+ }
+
+ // Hook each parm in order. Stop looking at the first NULL.
+ if (parm0 != NULL) { call->init_req(TypeFunc::Parms+0, parm0);
+ if (parm1 != NULL) { call->init_req(TypeFunc::Parms+1, parm1);
+ if (parm2 != NULL) { call->init_req(TypeFunc::Parms+2, parm2);
+ if (parm3 != NULL) { call->init_req(TypeFunc::Parms+3, parm3);
+ if (parm4 != NULL) { call->init_req(TypeFunc::Parms+4, parm4);
+ if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
+ if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
+ if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
+ /* close each nested if ===> */ } } } } } } } }
+ assert(call->in(call->req()-1) != NULL, "must initialize all parms");
+
+ if (!is_leaf) {
+ // Non-leaves can block and take safepoints:
+ add_safepoint_edges(call, ((flags & RC_MUST_THROW) != 0));
+ }
+ // Non-leaves can throw exceptions:
+ if (has_io) {
+ call->set_req(TypeFunc::I_O, i_o());
+ }
+
+ if (flags & RC_UNCOMMON) {
+ // Set the count to a tiny probability. Cf. Estimate_Block_Frequency.
+ // (An "if" probability corresponds roughly to an unconditional count.
+ // Sort of.)
+ call->set_cnt(PROB_UNLIKELY_MAG(4));
+ }
+
+ Node* c = _gvn.transform(call);
+ assert(c == call, "cannot disappear");
+
+ if (wide_out) {
+ // Slow path call has full side-effects.
+ set_predefined_output_for_runtime_call(call);
+ } else {
+ // Slow path call has few side-effects, and/or sets few values.
+ set_predefined_output_for_runtime_call(call, prev_mem, adr_type);
+ }
+
+ if (has_io) {
+ set_i_o(_gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O)));
+ }
+ return call;
+
+}
+
+//------------------------------merge_memory-----------------------------------
+// Merge memory from one path into the current memory state.
+void GraphKit::merge_memory(Node* new_mem, Node* region, int new_path) {
+ for (MergeMemStream mms(merged_memory(), new_mem->as_MergeMem()); mms.next_non_empty2(); ) {
+ Node* old_slice = mms.force_memory();
+ Node* new_slice = mms.memory2();
+ if (old_slice != new_slice) {
+ PhiNode* phi;
+ if (new_slice->is_Phi() && new_slice->as_Phi()->region() == region) {
+ phi = new_slice->as_Phi();
+ #ifdef ASSERT
+ if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region)
+ old_slice = old_slice->in(new_path);
+ // Caller is responsible for ensuring that any pre-existing
+ // phis are already aware of old memory.
+ int old_path = (new_path > 1) ? 1 : 2; // choose old_path != new_path
+ assert(phi->in(old_path) == old_slice, "pre-existing phis OK");
+ #endif
+ mms.set_memory(phi);
+ } else {
+ phi = PhiNode::make(region, old_slice, Type::MEMORY, mms.adr_type(C));
+ _gvn.set_type(phi, Type::MEMORY);
+ phi->set_req(new_path, new_slice);
+ mms.set_memory(_gvn.transform(phi)); // assume it is complete
+ }
+ }
+ }
+}
+
+//------------------------------make_slow_call_ex------------------------------
+// Make the exception handler hookups for the slow call
+void GraphKit::make_slow_call_ex(Node* call, ciInstanceKlass* ex_klass, bool separate_io_proj) {
+ if (stopped()) return;
+
+ // Make a catch node with just two handlers: fall-through and catch-all
+ Node* i_o = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::I_O, separate_io_proj) );
+ Node* catc = _gvn.transform( new (C, 2) CatchNode(control(), i_o, 2) );
+ Node* norm = _gvn.transform( new (C, 1) CatchProjNode(catc, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci) );
+ Node* excp = _gvn.transform( new (C, 1) CatchProjNode(catc, CatchProjNode::catch_all_index, CatchProjNode::no_handler_bci) );
+
+ { PreserveJVMState pjvms(this);
+ set_control(excp);
+ set_i_o(i_o);
+
+ if (excp != top()) {
+ // Create an exception state also.
+ // Use an exact type if the caller has specified a specific exception.
+ const Type* ex_type = TypeOopPtr::make_from_klass_unique(ex_klass)->cast_to_ptr_type(TypePtr::NotNull);
+ Node* ex_oop = new (C, 2) CreateExNode(ex_type, control(), i_o);
+ add_exception_state(make_exception_state(_gvn.transform(ex_oop)));
+ }
+ }
+
+ // Get the no-exception control from the CatchNode.
+ set_control(norm);
+}
+
+
+//-------------------------------gen_subtype_check-----------------------------
+// Generate a subtyping check. Takes as input the subtype and supertype.
+// Returns 2 values: sets the default control() to the true path and returns
+// the false path. Only reads invariant memory; sets no (visible) memory.
+// The PartialSubtypeCheckNode sets the hidden 1-word cache in the encoding
+// but that's not exposed to the optimizer. This call also doesn't take in an
+// Object; if you wish to check an Object you need to load the Object's class
+// prior to coming here.
+Node* GraphKit::gen_subtype_check(Node* subklass, Node* superklass) {
+ // Fast check for identical types, perhaps identical constants.
+ // The types can even be identical non-constants, in cases
+ // involving Array.newInstance, Object.clone, etc.
+ if (subklass == superklass)
+ return top(); // false path is dead; no test needed.
+
+ if (_gvn.type(superklass)->singleton()) {
+ ciKlass* superk = _gvn.type(superklass)->is_klassptr()->klass();
+ ciKlass* subk = _gvn.type(subklass)->is_klassptr()->klass();
+
+ // In the common case of an exact superklass, try to fold up the
+ // test before generating code. You may ask, why not just generate
+ // the code and then let it fold up? The answer is that the generated
+ // code will necessarily include null checks, which do not always
+ // completely fold away. If they are also needless, then they turn
+ // into a performance loss. Example:
+ // Foo[] fa = blah(); Foo x = fa[0]; fa[1] = x;
+ // Here, the type of 'fa' is often exact, so the store check
+ // of fa[1]=x will fold up, without testing the nullness of x.
+ switch (static_subtype_check(superk, subk)) {
+ case SSC_always_false:
+ {
+ Node* always_fail = control();
+ set_control(top());
+ return always_fail;
+ }
+ case SSC_always_true:
+ return top();
+ case SSC_easy_test:
+ {
+ // Just do a direct pointer compare and be done.
+ Node* cmp = _gvn.transform( new(C, 3) CmpPNode(subklass, superklass) );
+ Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
+ IfNode* iff = create_and_xform_if(control(), bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+ set_control( _gvn.transform( new(C, 1) IfTrueNode (iff) ) );
+ return _gvn.transform( new(C, 1) IfFalseNode(iff) );
+ }
+ case SSC_full_test:
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ }
+
+ // %%% Possible further optimization: Even if the superklass is not exact,
+ // if the subklass is the unique subtype of the superklass, the check
+ // will always succeed. We could leave a dependency behind to ensure this.
+
+ // First load the super-klass's check-offset
+ Node *p1 = basic_plus_adr( superklass, superklass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() );
+ Node *chk_off = _gvn.transform( new (C, 3) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) );
+ int cacheoff_con = sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes();
+ bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con);
+
+ // Load from the sub-klass's super-class display list, or a 1-word cache of
+ // the secondary superclass list, or a failing value with a sentinel offset
+ // if the super-klass is an interface or exceptionally deep in the Java
+ // hierarchy and we have to scan the secondary superclass list the hard way.
+ // Worst-case type is a little odd: NULL is allowed as a result (usually
+ // klass loads can never produce a NULL).
+ Node *chk_off_X = ConvI2X(chk_off);
+ Node *p2 = _gvn.transform( new (C, 4) AddPNode(subklass,subklass,chk_off_X) );
+ // For some types like interfaces the following loadKlass is from a 1-word
+ // cache which is mutable so can't use immutable memory. Other
+ // types load from the super-class display table which is immutable.
+ Node *kmem = might_be_cache ? memory(p2) : immutable_memory();
+ Node *nkls = _gvn.transform( new (C, 3) LoadKlassNode( NULL, kmem, p2, _gvn.type(p2)->is_ptr(), TypeKlassPtr::OBJECT_OR_NULL ) );
+
+ // Compile speed common case: ARE a subtype and we canNOT fail
+ if( superklass == nkls )
+ return top(); // false path is dead; no test needed.
+
+ // See if we get an immediate positive hit. Happens roughly 83% of the
+ // time. Test to see if the value loaded just previously from the subklass
+ // is exactly the superklass.
+ Node *cmp1 = _gvn.transform( new (C, 3) CmpPNode( superklass, nkls ) );
+ Node *bol1 = _gvn.transform( new (C, 2) BoolNode( cmp1, BoolTest::eq ) );
+ IfNode *iff1 = create_and_xform_if( control(), bol1, PROB_LIKELY(0.83f), COUNT_UNKNOWN );
+ Node *iftrue1 = _gvn.transform( new (C, 1) IfTrueNode ( iff1 ) );
+ set_control( _gvn.transform( new (C, 1) IfFalseNode( iff1 ) ) );
+
+ // Compile speed common case: Check for being deterministic right now. If
+ // chk_off is a constant and not equal to cacheoff then we are NOT a
+ // subklass. In this case we need exactly the 1 test above and we can
+ // return those results immediately.
+ if (!might_be_cache) {
+ Node* not_subtype_ctrl = control();
+ set_control(iftrue1); // We need exactly the 1 test above
+ return not_subtype_ctrl;
+ }
+
+ // Gather the various success & failures here
+ RegionNode *r_ok_subtype = new (C, 4) RegionNode(4);
+ record_for_igvn(r_ok_subtype);
+ RegionNode *r_not_subtype = new (C, 3) RegionNode(3);
+ record_for_igvn(r_not_subtype);
+
+ r_ok_subtype->init_req(1, iftrue1);
+
+ // Check for immediate negative hit. Happens roughly 11% of the time (which
+ // is roughly 63% of the remaining cases). Test to see if the loaded
+ // check-offset points into the subklass display list or the 1-element
+ // cache. If it points to the display (and NOT the cache) and the display
+ // missed then it's not a subtype.
+ Node *cacheoff = _gvn.intcon(cacheoff_con);
+ Node *cmp2 = _gvn.transform( new (C, 3) CmpINode( chk_off, cacheoff ) );
+ Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmp2, BoolTest::ne ) );
+ IfNode *iff2 = create_and_xform_if( control(), bol2, PROB_LIKELY(0.63f), COUNT_UNKNOWN );
+ r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
+ set_control( _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
+
+ // Check for self. Very rare to get here, but its taken 1/3 the time.
+ // No performance impact (too rare) but allows sharing of secondary arrays
+ // which has some footprint reduction.
+ Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
+ Node *bol3 = _gvn.transform( new (C, 2) BoolNode( cmp3, BoolTest::eq ) );
+ IfNode *iff3 = create_and_xform_if( control(), bol3, PROB_LIKELY(0.36f), COUNT_UNKNOWN );
+ r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
+ set_control( _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
+
+ // Now do a linear scan of the secondary super-klass array. Again, no real
+ // performance impact (too rare) but it's gotta be done.
+ // (The stub also contains the self-check of subklass == superklass.
+ // Since the code is rarely used, there is no penalty for moving it
+ // out of line, and it can only improve I-cache density.)
+ Node* psc = _gvn.transform(
+ new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );
+
+ Node *cmp4 = _gvn.transform( new (C, 3) CmpPNode( psc, null() ) );
+ Node *bol4 = _gvn.transform( new (C, 2) BoolNode( cmp4, BoolTest::ne ) );
+ IfNode *iff4 = create_and_xform_if( control(), bol4, PROB_FAIR, COUNT_UNKNOWN );
+ r_not_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode (iff4) ) );
+ r_ok_subtype ->init_req(3, _gvn.transform( new (C, 1) IfFalseNode(iff4) ) );
+
+ // Return false path; set default control to true path.
+ set_control( _gvn.transform(r_ok_subtype) );
+ return _gvn.transform(r_not_subtype);
+}
+
+//----------------------------static_subtype_check-----------------------------
+// Shortcut important common cases when superklass is exact:
+// (0) superklass is java.lang.Object (can occur in reflective code)
+// (1) subklass is already limited to a subtype of superklass => always ok
+// (2) subklass does not overlap with superklass => always fail
+// (3) superklass has NO subtypes and we can check with a simple compare.
+int GraphKit::static_subtype_check(ciKlass* superk, ciKlass* subk) {
+ if (StressReflectiveCode) {
+ return SSC_full_test; // Let caller generate the general case.
+ }
+
+ if (superk == env()->Object_klass()) {
+ return SSC_always_true; // (0) this test cannot fail
+ }
+
+ ciType* superelem = superk;
+ if (superelem->is_array_klass())
+ superelem = superelem->as_array_klass()->base_element_type();
+
+ if (!subk->is_interface()) { // cannot trust static interface types yet
+ if (subk->is_subtype_of(superk)) {
+ return SSC_always_true; // (1) false path dead; no dynamic test needed
+ }
+ if (!(superelem->is_klass() && superelem->as_klass()->is_interface()) &&
+ !superk->is_subtype_of(subk)) {
+ return SSC_always_false;
+ }
+ }
+
+ // If casting to an instance klass, it must have no subtypes
+ if (superk->is_interface()) {
+ // Cannot trust interfaces yet.
+ // %%% S.B. superk->nof_implementors() == 1
+ } else if (superelem->is_instance_klass()) {
+ ciInstanceKlass* ik = superelem->as_instance_klass();
+ if (!ik->has_subklass() && !ik->is_interface()) {
+ if (!ik->is_final()) {
+ // Add a dependency if there is a chance of a later subclass.
+ C->dependencies()->assert_leaf_type(ik);
+ }
+ return SSC_easy_test; // (3) caller can do a simple ptr comparison
+ }
+ } else {
+ // A primitive array type has no subtypes.
+ return SSC_easy_test; // (3) caller can do a simple ptr comparison
+ }
+
+ return SSC_full_test;
+}
+
+// Profile-driven exact type check:
+Node* GraphKit::type_check_receiver(Node* receiver, ciKlass* klass,
+ float prob,
+ Node* *casted_receiver) {
+ const TypeKlassPtr* tklass = TypeKlassPtr::make(klass);
+ Node* recv_klass = load_object_klass(receiver);
+ Node* want_klass = makecon(tklass);
+ Node* cmp = _gvn.transform( new(C, 3) CmpPNode(recv_klass, want_klass) );
+ Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
+ IfNode* iff = create_and_xform_if(control(), bol, prob, COUNT_UNKNOWN);
+ set_control( _gvn.transform( new(C, 1) IfTrueNode (iff) ));
+ Node* fail = _gvn.transform( new(C, 1) IfFalseNode(iff) );
+
+ const TypeOopPtr* recv_xtype = tklass->as_instance_type();
+ assert(recv_xtype->klass_is_exact(), "");
+
+ // Subsume downstream occurrences of receiver with a cast to
+ // recv_xtype, since now we know what the type will be.
+ Node* cast = new(C, 2) CheckCastPPNode(control(), receiver, recv_xtype);
+ (*casted_receiver) = _gvn.transform(cast);
+ // (User must make the replace_in_map call.)
+
+ return fail;
+}
+
+
+//-------------------------------gen_instanceof--------------------------------
+// Generate an instance-of idiom. Used by both the instance-of bytecode
+// and the reflective instance-of call.
+Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) {
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+ assert( !stopped(), "dead parse path should be checked in callers" );
+ assert(!TypePtr::NULL_PTR->higher_equal(_gvn.type(superklass)->is_klassptr()),
+ "must check for not-null not-dead klass in callers");
+
+ // Make the merge point
+ enum { _obj_path = 1, _fail_path, _null_path, PATH_LIMIT };
+ RegionNode* region = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ Node* phi = new(C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL);
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ // Null check; get casted pointer; set region slot 3
+ Node* null_ctl = top();
+ Node* not_null_obj = null_check_oop(subobj, &null_ctl);
+
+ // If not_null_obj is dead, only null-path is taken
+ if (stopped()) { // Doing instance-of on a NULL?
+ set_control(null_ctl);
+ return intcon(0);
+ }
+ region->init_req(_null_path, null_ctl);
+ phi ->init_req(_null_path, intcon(0)); // Set null path value
+
+ // Load the object's klass
+ Node* obj_klass = load_object_klass(not_null_obj);
+
+ // Generate the subtype check
+ Node* not_subtype_ctrl = gen_subtype_check(obj_klass, superklass);
+
+ // Plug in the success path to the general merge in slot 1.
+ region->init_req(_obj_path, control());
+ phi ->init_req(_obj_path, intcon(1));
+
+ // Plug in the failing path to the general merge in slot 2.
+ region->init_req(_fail_path, not_subtype_ctrl);
+ phi ->init_req(_fail_path, intcon(0));
+
+ // Return final merged results
+ set_control( _gvn.transform(region) );
+ record_for_igvn(region);
+ return _gvn.transform(phi);
+}
+
+//-------------------------------gen_checkcast---------------------------------
+// Generate a checkcast idiom. Used by both the checkcast bytecode and the
+// array store bytecode. Stack must be as-if BEFORE doing the bytecode so the
+// uncommon-trap paths work. Adjust stack after this call.
+// If failure_control is supplied and not null, it is filled in with
+// the control edge for the cast failure. Otherwise, an appropriate
+// uncommon trap or exception is thrown.
+Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
+ Node* *failure_control) {
+ kill_dead_locals(); // Benefit all the uncommon traps
+ const TypeKlassPtr *tk = _gvn.type(superklass)->is_klassptr();
+ const Type *toop = TypeOopPtr::make_from_klass(tk->klass());
+
+ // Fast cutout: Check the case that the cast is vacuously true.
+ // This detects the common cases where the test will short-circuit
+ // away completely. We do this before we perform the null check,
+ // because if the test is going to turn into zero code, we don't
+ // want a residual null check left around. (Causes a slowdown,
+ // for example, in some objArray manipulations, such as a[i]=a[j].)
+ if (tk->singleton()) {
+ const TypeOopPtr* objtp = _gvn.type(obj)->isa_oopptr();
+ if (objtp != NULL && objtp->klass() != NULL) {
+ switch (static_subtype_check(tk->klass(), objtp->klass())) {
+ case SSC_always_true:
+ return obj;
+ case SSC_always_false:
+ // It needs a null check because a null will *pass* the cast check.
+ // A non-null value will always produce an exception.
+ return do_null_assert(obj, T_OBJECT);
+ }
+ }
+ }
+
+ ciProfileData* data = NULL;
+ if (failure_control == NULL) { // use MDO in regular case only
+ assert(java_bc() == Bytecodes::_aastore ||
+ java_bc() == Bytecodes::_checkcast,
+ "interpreter profiles type checks only for these BCs");
+ data = method()->method_data()->bci_to_data(bci());
+ }
+
+ // Make the merge point
+ enum { _obj_path = 1, _null_path, PATH_LIMIT };
+ RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ Node* phi = new (C, PATH_LIMIT) PhiNode(region, toop);
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ // Use null-cast information if it is available
+ bool never_see_null = false;
+ // If we see an unexpected null at a check-cast we record it and force a
+ // recompile; the offending check-cast will be compiled to handle NULLs.
+ // If we see several offending BCIs, then all checkcasts in the
+ // method will be compiled to handle NULLs.
+ if (UncommonNullCast // Cutout for this technique
+ && failure_control == NULL // regular case
+ && obj != null() // And not the -Xcomp stupid case?
+ && !too_many_traps(Deoptimization::Reason_null_check)) {
+ // Finally, check the "null_seen" bit from the interpreter.
+ if (data == NULL || !data->as_BitData()->null_seen()) {
+ never_see_null = true;
+ }
+ }
+
+ // Null check; get casted pointer; set region slot 3
+ Node* null_ctl = top();
+ Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null);
+
+ // If not_null_obj is dead, only null-path is taken
+ if (stopped()) { // Doing instance-of on a NULL?
+ set_control(null_ctl);
+ return null();
+ }
+ region->init_req(_null_path, null_ctl);
+ phi ->init_req(_null_path, null()); // Set null path value
+
+ Node* cast_obj = NULL; // the casted version of the object
+
+ // If the profile has seen exactly one type, narrow to that type.
+ // (The subsequent subtype check will always fold up.)
+ if (UseTypeProfile && TypeProfileCasts && data != NULL &&
+ // Counter has never been decremented (due to cast failure).
+ // ...This is a reasonable thing to expect. It is true of
+ // all casts inserted by javac to implement generic types.
+ data->as_CounterData()->count() >= 0 &&
+ !too_many_traps(Deoptimization::Reason_class_check)) {
+ // (No, this isn't a call, but it's enough like a virtual call
+ // to use the same ciMethod accessor to get the profile info...)
+ ciCallProfile profile = method()->call_profile_at_bci(bci());
+ if (profile.count() >= 0 && // no cast failures here
+ profile.has_receiver(0) &&
+ profile.morphism() == 1) {
+ ciKlass* exact_kls = profile.receiver(0);
+ int ssc = static_subtype_check(tk->klass(), exact_kls);
+ if (ssc == SSC_always_true) {
+ // If we narrow the type to match what the type profile sees,
+ // we can then remove the rest of the cast.
+ // This is a win, even if the exact_kls is very specific,
+ // because downstream operations, such as method calls,
+ // will often benefit from the sharper type.
+ Node* exact_obj = not_null_obj; // will get updated in place...
+ Node* slow_ctl = type_check_receiver(exact_obj, exact_kls, 1.0,
+ &exact_obj);
+ { PreserveJVMState pjvms(this);
+ set_control(slow_ctl);
+ uncommon_trap(Deoptimization::Reason_class_check,
+ Deoptimization::Action_maybe_recompile);
+ }
+ if (failure_control != NULL) // failure is now impossible
+ (*failure_control) = top();
+ replace_in_map(not_null_obj, exact_obj);
+ // adjust the type of the phi to the exact klass:
+ phi->raise_bottom_type(_gvn.type(exact_obj)->meet(TypePtr::NULL_PTR));
+ cast_obj = exact_obj;
+ }
+ // assert(cast_obj != NULL)... except maybe the profile lied to us.
+ }
+ }
+
+ if (cast_obj == NULL) {
+ // Load the object's klass
+ Node* obj_klass = load_object_klass(not_null_obj);
+
+ // Generate the subtype check
+ Node* not_subtype_ctrl = gen_subtype_check( obj_klass, superklass );
+
+ // Plug in success path into the merge
+ cast_obj = _gvn.transform(new (C, 2) CheckCastPPNode(control(),
+ not_null_obj, toop));
+ // Failure path ends in uncommon trap (or may be dead - failure impossible)
+ if (failure_control == NULL) {
+ if (not_subtype_ctrl != top()) { // If failure is possible
+ PreserveJVMState pjvms(this);
+ set_control(not_subtype_ctrl);
+ builtin_throw(Deoptimization::Reason_class_check, obj_klass);
+ }
+ } else {
+ (*failure_control) = not_subtype_ctrl;
+ }
+ }
+
+ region->init_req(_obj_path, control());
+ phi ->init_req(_obj_path, cast_obj);
+
+ // A merge of NULL or Casted-NotNull obj
+ Node* res = _gvn.transform(phi);
+
+ // Note I do NOT always 'replace_in_map(obj,result)' here.
+ // if( tk->klass()->can_be_primary_super() )
+ // This means that if I successfully store an Object into an array-of-String
+ // I 'forget' that the Object is really now known to be a String. I have to
+ // do this because we don't have true union types for interfaces - if I store
+ // a Baz into an array-of-Interface and then tell the optimizer it's an
+ // Interface, I forget that it's also a Baz and cannot do Baz-like field
+ // references to it. FIX THIS WHEN UNION TYPES APPEAR!
+ // replace_in_map( obj, res );
+
+ // Return final merged results
+ set_control( _gvn.transform(region) );
+ record_for_igvn(region);
+ return res;
+}
+
+//------------------------------next_monitor-----------------------------------
+// What number should be given to the next monitor?
+int GraphKit::next_monitor() {
+ int current = jvms()->monitor_depth()* C->sync_stack_slots();
+ int next = current + C->sync_stack_slots();
+ // Keep the toplevel high water mark current:
+ if (C->fixed_slots() < next) C->set_fixed_slots(next);
+ return current;
+}
+
+//------------------------------insert_mem_bar---------------------------------
+// Memory barrier to avoid floating things around
+// The membar serves as a pinch point between both control and all memory slices.
+Node* GraphKit::insert_mem_bar(int opcode, Node* precedent) {
+ MemBarNode* mb = MemBarNode::make(C, opcode, Compile::AliasIdxBot, precedent);
+ mb->init_req(TypeFunc::Control, control());
+ mb->init_req(TypeFunc::Memory, reset_memory());
+ Node* membar = _gvn.transform(mb);
+ set_control(_gvn.transform(new (C, 1) ProjNode(membar,TypeFunc::Control) ));
+ set_all_memory_call(membar);
+ return membar;
+}
+
+//-------------------------insert_mem_bar_volatile----------------------------
+// Memory barrier to avoid floating things around
+// The membar serves as a pinch point between both control and memory(alias_idx).
+// If you want to make a pinch point on all memory slices, do not use this
+// function (even with AliasIdxBot); use insert_mem_bar() instead.
+Node* GraphKit::insert_mem_bar_volatile(int opcode, int alias_idx, Node* precedent) {
+ // When Parse::do_put_xxx updates a volatile field, it appends a series
+ // of MemBarVolatile nodes, one for *each* volatile field alias category.
+ // The first membar is on the same memory slice as the field store opcode.
+ // This forces the membar to follow the store. (Bug 6500685 broke this.)
+ // All the other membars (for other volatile slices, including AliasIdxBot,
+ // which stands for all unknown volatile slices) are control-dependent
+ // on the first membar. This prevents later volatile loads or stores
+ // from sliding up past the just-emitted store.
+
+ MemBarNode* mb = MemBarNode::make(C, opcode, alias_idx, precedent);
+ mb->set_req(TypeFunc::Control,control());
+ if (alias_idx == Compile::AliasIdxBot) {
+ mb->set_req(TypeFunc::Memory, merged_memory()->base_memory());
+ } else {
+ assert(!(opcode == Op_Initialize && alias_idx != Compile::AliasIdxRaw), "fix caller");
+ mb->set_req(TypeFunc::Memory, memory(alias_idx));
+ }
+ Node* membar = _gvn.transform(mb);
+ set_control(_gvn.transform(new (C, 1) ProjNode(membar, TypeFunc::Control)));
+ if (alias_idx == Compile::AliasIdxBot) {
+ merged_memory()->set_base_memory(_gvn.transform(new (C, 1) ProjNode(membar, TypeFunc::Memory)));
+ } else {
+ set_memory(_gvn.transform(new (C, 1) ProjNode(membar, TypeFunc::Memory)),alias_idx);
+ }
+ return membar;
+}
+
+//------------------------------shared_lock------------------------------------
+// Emit locking code.
+FastLockNode* GraphKit::shared_lock(Node* obj) {
+ // bci is either a monitorenter bc or InvocationEntryBci
+ // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
+ assert(SynchronizationEntryBCI == InvocationEntryBci, "");
+
+ if( !GenerateSynchronizationCode )
+ return NULL; // Not locking things?
+ if (stopped()) // Dead monitor?
+ return NULL;
+
+ assert(dead_locals_are_killed(), "should kill locals before sync. point");
+
+ // Box the stack location
+ Node* box = _gvn.transform(new (C, 1) BoxLockNode(next_monitor()));
+ Node* mem = reset_memory();
+
+ FastLockNode * flock = _gvn.transform(new (C, 3) FastLockNode(0, obj, box) )->as_FastLock();
+ if (PrintPreciseBiasedLockingStatistics) {
+ // Create the counters for this fast lock.
+ flock->create_lock_counter(sync_jvms()); // sync_jvms used to get current bci
+ }
+ // Add monitor to debug info for the slow path. If we block inside the
+ // slow path and de-opt, we need the monitor hanging around
+ map()->push_monitor( flock );
+
+ const TypeFunc *tf = LockNode::lock_type();
+ LockNode *lock = new (C, tf->domain()->cnt()) LockNode(C, tf);
+
+ lock->init_req( TypeFunc::Control, control() );
+ lock->init_req( TypeFunc::Memory , mem );
+ lock->init_req( TypeFunc::I_O , top() ) ; // does no i/o
+ lock->init_req( TypeFunc::FramePtr, frameptr() );
+ lock->init_req( TypeFunc::ReturnAdr, top() );
+
+ lock->init_req(TypeFunc::Parms + 0, obj);
+ lock->init_req(TypeFunc::Parms + 1, box);
+ lock->init_req(TypeFunc::Parms + 2, flock);
+ add_safepoint_edges(lock);
+
+ lock = _gvn.transform( lock )->as_Lock();
+
+ // lock has no side-effects, sets few values
+ set_predefined_output_for_runtime_call(lock, mem, TypeRawPtr::BOTTOM);
+
+ insert_mem_bar(Op_MemBarAcquire);
+
+ // Add this to the worklist so that the lock can be eliminated
+ record_for_igvn(lock);
+
+#ifndef PRODUCT
+ if (PrintLockStatistics) {
+ // Update the counter for this lock. Don't bother using an atomic
+ // operation since we don't require absolute accuracy.
+ lock->create_lock_counter(map()->jvms());
+ int adr_type = Compile::AliasIdxRaw;
+ Node* counter_addr = makecon(TypeRawPtr::make(lock->counter()->addr()));
+ Node* cnt = make_load(NULL, counter_addr, TypeInt::INT, T_INT, adr_type);
+ Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(1)));
+ store_to_memory(control(), counter_addr, incr, T_INT, adr_type);
+ }
+#endif
+
+ return flock;
+}
+
+
+//------------------------------shared_unlock----------------------------------
+// Emit unlocking code.
+void GraphKit::shared_unlock(Node* box, Node* obj) {
+ // bci is either a monitorenter bc or InvocationEntryBci
+ // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
+ assert(SynchronizationEntryBCI == InvocationEntryBci, "");
+
+ if( !GenerateSynchronizationCode )
+ return;
+ if (stopped()) { // Dead monitor?
+ map()->pop_monitor(); // Kill monitor from debug info
+ return;
+ }
+
+ // Memory barrier to avoid floating things down past the locked region
+ insert_mem_bar(Op_MemBarRelease);
+
+ const TypeFunc *tf = OptoRuntime::complete_monitor_exit_Type();
+ UnlockNode *unlock = new (C, tf->domain()->cnt()) UnlockNode(C, tf);
+ uint raw_idx = Compile::AliasIdxRaw;
+ unlock->init_req( TypeFunc::Control, control() );
+ unlock->init_req( TypeFunc::Memory , memory(raw_idx) );
+ unlock->init_req( TypeFunc::I_O , top() ) ; // does no i/o
+ unlock->init_req( TypeFunc::FramePtr, frameptr() );
+ unlock->init_req( TypeFunc::ReturnAdr, top() );
+
+ unlock->init_req(TypeFunc::Parms + 0, obj);
+ unlock->init_req(TypeFunc::Parms + 1, box);
+ unlock = _gvn.transform(unlock)->as_Unlock();
+
+ Node* mem = reset_memory();
+
+ // unlock has no side-effects, sets few values
+ set_predefined_output_for_runtime_call(unlock, mem, TypeRawPtr::BOTTOM);
+
+ // Kill monitor from debug info
+ map()->pop_monitor( );
+}
+
+//-------------------------------get_layout_helper-----------------------------
+// If the given klass is a constant or known to be an array,
+// fetch the constant layout helper value into constant_value
+// and return (Node*)NULL. Otherwise, load the non-constant
+// layout helper value, and return the node which represents it.
+// This two-faced routine is useful because allocation sites
+// almost always feature constant types.
+Node* GraphKit::get_layout_helper(Node* klass_node, jint& constant_value) {
+ const TypeKlassPtr* inst_klass = _gvn.type(klass_node)->isa_klassptr();
+ if (!StressReflectiveCode && inst_klass != NULL) {
+ ciKlass* klass = inst_klass->klass();
+ bool xklass = inst_klass->klass_is_exact();
+ if (xklass || klass->is_array_klass()) {
+ jint lhelper = klass->layout_helper();
+ if (lhelper != Klass::_lh_neutral_value) {
+ constant_value = lhelper;
+ return (Node*) NULL;
+ }
+ }
+ }
+ constant_value = Klass::_lh_neutral_value; // put in a known value
+ Node* lhp = basic_plus_adr(klass_node, klass_node, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+ return make_load(NULL, lhp, TypeInt::INT, T_INT);
+}
+
+// We just put in an allocate/initialize with a big raw-memory effect.
+// Hook selected additional alias categories on the initialization.
+static void hook_memory_on_init(GraphKit& kit, int alias_idx,
+ MergeMemNode* init_in_merge,
+ Node* init_out_raw) {
+ DEBUG_ONLY(Node* init_in_raw = init_in_merge->base_memory());
+ assert(init_in_merge->memory_at(alias_idx) == init_in_raw, "");
+
+ Node* prevmem = kit.memory(alias_idx);
+ init_in_merge->set_memory_at(alias_idx, prevmem);
+ kit.set_memory(init_out_raw, alias_idx);
+}
+
+//---------------------------set_output_for_allocation-------------------------
+Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
+ const TypeOopPtr* oop_type,
+ bool raw_mem_only) {
+ int rawidx = Compile::AliasIdxRaw;
+ alloc->set_req( TypeFunc::FramePtr, frameptr() );
+ add_safepoint_edges(alloc);
+ Node* allocx = _gvn.transform(alloc);
+ set_control( _gvn.transform(new (C, 1) ProjNode(allocx, TypeFunc::Control) ) );
+ // create memory projection for i_o
+ set_memory ( _gvn.transform( new (C, 1) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx );
+ make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true);
+
+ // create a memory projection as for the normal control path
+ Node* malloc = _gvn.transform(new (C, 1) ProjNode(allocx, TypeFunc::Memory));
+ set_memory(malloc, rawidx);
+
+ // a normal slow-call doesn't change i_o, but an allocation does
+ // we create a separate i_o projection for the normal control path
+ set_i_o(_gvn.transform( new (C, 1) ProjNode(allocx, TypeFunc::I_O, false) ) );
+ Node* rawoop = _gvn.transform( new (C, 1) ProjNode(allocx, TypeFunc::Parms) );
+
+ // put in an initialization barrier
+ InitializeNode* init = insert_mem_bar_volatile(Op_Initialize, rawidx,
+ rawoop)->as_Initialize();
+ assert(alloc->initialization() == init, "2-way macro link must work");
+ assert(init ->allocation() == alloc, "2-way macro link must work");
+ if (ReduceFieldZeroing && !raw_mem_only) {
+ // Extract memory strands which may participate in the new object's
+ // initialization, and source them from the new InitializeNode.
+ // This will allow us to observe initializations when they occur,
+ // and link them properly (as a group) to the InitializeNode.
+ Node* klass_node = alloc->in(AllocateNode::KlassNode);
+ assert(init->in(InitializeNode::Memory) == malloc, "");
+ MergeMemNode* minit_in = MergeMemNode::make(C, malloc);
+ init->set_req(InitializeNode::Memory, minit_in);
+ record_for_igvn(minit_in); // fold it up later, if possible
+ Node* minit_out = memory(rawidx);
+ assert(minit_out->is_Proj() && minit_out->in(0) == init, "");
+ if (oop_type->isa_aryptr()) {
+ const TypePtr* telemref = oop_type->add_offset(Type::OffsetBot);
+ int elemidx = C->get_alias_index(telemref);
+ hook_memory_on_init(*this, elemidx, minit_in, minit_out);
+ } else if (oop_type->isa_instptr()) {
+ ciInstanceKlass* ik = oop_type->klass()->as_instance_klass();
+ for (int i = 0, len = ik->nof_nonstatic_fields(); i < len; i++) {
+ ciField* field = ik->nonstatic_field_at(i);
+ if (field->offset() >= TrackedInitializationLimit)
+ continue; // do not bother to track really large numbers of fields
+ // Find (or create) the alias category for this field:
+ int fieldidx = C->alias_type(field)->index();
+ hook_memory_on_init(*this, fieldidx, minit_in, minit_out);
+ }
+ }
+ }
+
+ // Cast raw oop to the real thing...
+ Node* javaoop = new (C, 2) CheckCastPPNode(control(), rawoop, oop_type);
+ javaoop = _gvn.transform(javaoop);
+ C->set_recent_alloc(control(), javaoop);
+ assert(just_allocated_object(control()) == javaoop, "just allocated");
+
+#ifdef ASSERT
+ { // Verify that the AllocateNode::Ideal_foo recognizers work:
+ Node* kn = alloc->in(AllocateNode::KlassNode);
+ Node* ln = alloc->in(AllocateNode::ALength);
+ assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn,
+ "Ideal_klass works");
+ assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn,
+ "Ideal_klass works");
+ if (alloc->is_AllocateArray()) {
+ assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln,
+ "Ideal_length works");
+ assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln,
+ "Ideal_length works");
+ } else {
+ assert(ln->is_top(), "no length, please");
+ }
+ }
+#endif //ASSERT
+
+ return javaoop;
+}
+
+//---------------------------new_instance--------------------------------------
+// This routine takes a klass_node which may be constant (for a static type)
+// or may be non-constant (for reflective code). It will work equally well
+// for either, and the graph will fold nicely if the optimizer later reduces
+// the type to a constant.
+// The optional arguments are for specialized use by intrinsics:
+// - If 'extra_slow_test' if not null is an extra condition for the slow-path.
+// - If 'raw_mem_only', do not cast the result to an oop.
+// - If 'return_size_val', report the the total object size to the caller.
+Node* GraphKit::new_instance(Node* klass_node,
+ Node* extra_slow_test,
+ bool raw_mem_only, // affect only raw memory
+ Node* *return_size_val) {
+ // Compute size in doublewords
+ // The size is always an integral number of doublewords, represented
+ // as a positive bytewise size stored in the klass's layout_helper.
+ // The layout_helper also encodes (in a low bit) the need for a slow path.
+ jint layout_con = Klass::_lh_neutral_value;
+ Node* layout_val = get_layout_helper(klass_node, layout_con);
+ int layout_is_con = (layout_val == NULL);
+
+ if (extra_slow_test == NULL) extra_slow_test = intcon(0);
+ // Generate the initial go-slow test. It's either ALWAYS (return a
+ // Node for 1) or NEVER (return a NULL) or perhaps (in the reflective
+ // case) a computed value derived from the layout_helper.
+ Node* initial_slow_test = NULL;
+ if (layout_is_con) {
+ assert(!StressReflectiveCode, "stress mode does not use these paths");
+ bool must_go_slow = Klass::layout_helper_needs_slow_path(layout_con);
+ initial_slow_test = must_go_slow? intcon(1): extra_slow_test;
+
+ } else { // reflective case
+ // This reflective path is used by Unsafe.allocateInstance.
+ // (It may be stress-tested by specifying StressReflectiveCode.)
+ // Basically, we want to get into the VM is there's an illegal argument.
+ Node* bit = intcon(Klass::_lh_instance_slow_path_bit);
+ initial_slow_test = _gvn.transform( new (C, 3) AndINode(layout_val, bit) );
+ if (extra_slow_test != intcon(0)) {
+ initial_slow_test = _gvn.transform( new (C, 3) OrINode(initial_slow_test, extra_slow_test) );
+ }
+ // (Macro-expander will further convert this to a Bool, if necessary.)
+ }
+
+ // Find the size in bytes. This is easy; it's the layout_helper.
+ // The size value must be valid even if the slow path is taken.
+ Node* size = NULL;
+ if (layout_is_con) {
+ size = MakeConX(Klass::layout_helper_size_in_bytes(layout_con));
+ } else { // reflective case
+ // This reflective path is used by clone and Unsafe.allocateInstance.
+ size = ConvI2X(layout_val);
+
+ // Clear the low bits to extract layout_helper_size_in_bytes:
+ assert((int)Klass::_lh_instance_slow_path_bit < BytesPerLong, "clear bit");
+ Node* mask = MakeConX(~ (intptr_t)right_n_bits(LogBytesPerLong));
+ size = _gvn.transform( new (C, 3) AndXNode(size, mask) );
+ }
+ if (return_size_val != NULL) {
+ (*return_size_val) = size;
+ }
+
+ // This is a precise notnull oop of the klass.
+ // (Actually, it need not be precise if this is a reflective allocation.)
+ // It's what we cast the result to.
+ const TypeKlassPtr* tklass = _gvn.type(klass_node)->isa_klassptr();
+ if (!tklass) tklass = TypeKlassPtr::OBJECT;
+ const TypeOopPtr* oop_type = tklass->as_instance_type();
+
+ // Now generate allocation code
+ AllocateNode* alloc
+ = new (C, AllocateNode::ParmLimit)
+ AllocateNode(C, AllocateNode::alloc_type(),
+ control(), memory(Compile::AliasIdxRaw), i_o(),
+ size, klass_node,
+ initial_slow_test);
+
+ return set_output_for_allocation(alloc, oop_type, raw_mem_only);
+}
+
+//-------------------------------new_array-------------------------------------
+// helper for both newarray and anewarray
+// The 'length' parameter is (obviously) the length of the array.
+// See comments on new_instance for the meaning of the other arguments.
+Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable)
+ Node* length, // number of array elements
+ bool raw_mem_only, // affect only raw memory
+ Node* *return_size_val) {
+ jint layout_con = Klass::_lh_neutral_value;
+ Node* layout_val = get_layout_helper(klass_node, layout_con);
+ int layout_is_con = (layout_val == NULL);
+
+ if (!layout_is_con && !StressReflectiveCode &&
+ !too_many_traps(Deoptimization::Reason_class_check)) {
+ // This is a reflective array creation site.
+ // Optimistically assume that it is a subtype of Object[],
+ // so that we can fold up all the address arithmetic.
+ layout_con = Klass::array_layout_helper(T_OBJECT);
+ Node* cmp_lh = _gvn.transform( new(C, 3) CmpINode(layout_val, intcon(layout_con)) );
+ Node* bol_lh = _gvn.transform( new(C, 2) BoolNode(cmp_lh, BoolTest::eq) );
+ { BuildCutout unless(this, bol_lh, PROB_MAX);
+ uncommon_trap(Deoptimization::Reason_class_check,
+ Deoptimization::Action_maybe_recompile);
+ }
+ layout_val = NULL;
+ layout_is_con = true;
+ }
+
+ // Generate the initial go-slow test. Make sure we do not overflow
+ // if length is huge (near 2Gig) or negative! We do not need
+ // exact double-words here, just a close approximation of needed
+ // double-words. We can't add any offset or rounding bits, lest we
+ // take a size -1 of bytes and make it positive. Use an unsigned
+ // compare, so negative sizes look hugely positive.
+ int fast_size_limit = FastAllocateSizeLimit;
+ if (layout_is_con) {
+ assert(!StressReflectiveCode, "stress mode does not use these paths");
+ // Increase the size limit if we have exact knowledge of array type.
+ int log2_esize = Klass::layout_helper_log2_element_size(layout_con);
+ fast_size_limit <<= (LogBytesPerLong - log2_esize);
+ }
+
+ Node* initial_slow_cmp = _gvn.transform( new (C, 3) CmpUNode( length, intcon( fast_size_limit ) ) );
+ Node* initial_slow_test = _gvn.transform( new (C, 2) BoolNode( initial_slow_cmp, BoolTest::gt ) );
+ if (initial_slow_test->is_Bool()) {
+ // Hide it behind a CMoveI, or else PhaseIdealLoop::split_up will get sick.
+ initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn);
+ }
+
+ // --- Size Computation ---
+ // array_size = round_to_heap(array_header + (length << elem_shift));
+ // where round_to_heap(x) == round_to(x, MinObjAlignmentInBytes)
+ // and round_to(x, y) == ((x + y-1) & ~(y-1))
+ // The rounding mask is strength-reduced, if possible.
+ int round_mask = MinObjAlignmentInBytes - 1;
+ Node* header_size = NULL;
+ int header_size_min = arrayOopDesc::base_offset_in_bytes(T_BYTE);
+ // (T_BYTE has the weakest alignment and size restrictions...)
+ if (layout_is_con) {
+ int hsize = Klass::layout_helper_header_size(layout_con);
+ int eshift = Klass::layout_helper_log2_element_size(layout_con);
+ BasicType etype = Klass::layout_helper_element_type(layout_con);
+ if ((round_mask & ~right_n_bits(eshift)) == 0)
+ round_mask = 0; // strength-reduce it if it goes away completely
+ assert((hsize & right_n_bits(eshift)) == 0, "hsize is pre-rounded");
+ assert(header_size_min <= hsize, "generic minimum is smallest");
+ header_size_min = hsize;
+ header_size = intcon(hsize + round_mask);
+ } else {
+ Node* hss = intcon(Klass::_lh_header_size_shift);
+ Node* hsm = intcon(Klass::_lh_header_size_mask);
+ Node* hsize = _gvn.transform( new(C, 3) URShiftINode(layout_val, hss) );
+ hsize = _gvn.transform( new(C, 3) AndINode(hsize, hsm) );
+ Node* mask = intcon(round_mask);
+ header_size = _gvn.transform( new(C, 3) AddINode(hsize, mask) );
+ }
+
+ Node* elem_shift = NULL;
+ if (layout_is_con) {
+ int eshift = Klass::layout_helper_log2_element_size(layout_con);
+ if (eshift != 0)
+ elem_shift = intcon(eshift);
+ } else {
+ // There is no need to mask or shift this value.
+ // The semantics of LShiftINode include an implicit mask to 0x1F.
+ assert(Klass::_lh_log2_element_size_shift == 0, "use shift in place");
+ elem_shift = layout_val;
+ }
+
+ // Transition to native address size for all offset calculations:
+ Node* lengthx = ConvI2X(length);
+ Node* headerx = ConvI2X(header_size);
+#ifdef _LP64
+ { const TypeLong* tllen = _gvn.find_long_type(lengthx);
+ if (tllen != NULL && tllen->_lo < 0) {
+ // Add a manual constraint to a positive range. Cf. array_element_address.
+ jlong size_max = arrayOopDesc::max_array_length(T_BYTE);
+ if (size_max > tllen->_hi) size_max = tllen->_hi;
+ const TypeLong* tlcon = TypeLong::make(CONST64(0), size_max, Type::WidenMin);
+ lengthx = _gvn.transform( new (C, 2) ConvI2LNode(length, tlcon));
+ }
+ }
+#endif
+
+ // Combine header size (plus rounding) and body size. Then round down.
+ // This computation cannot overflow, because it is used only in two
+ // places, one where the length is sharply limited, and the other
+ // after a successful allocation.
+ Node* abody = lengthx;
+ if (elem_shift != NULL)
+ abody = _gvn.transform( new(C, 3) LShiftXNode(lengthx, elem_shift) );
+ Node* size = _gvn.transform( new(C, 3) AddXNode(headerx, abody) );
+ if (round_mask != 0) {
+ Node* mask = MakeConX(~round_mask);
+ size = _gvn.transform( new(C, 3) AndXNode(size, mask) );
+ }
+ // else if round_mask == 0, the size computation is self-rounding
+
+ if (return_size_val != NULL) {
+ // This is the size
+ (*return_size_val) = size;
+ }
+
+ // Now generate allocation code
+ // Create the AllocateArrayNode and its result projections
+ AllocateArrayNode* alloc
+ = new (C, AllocateArrayNode::ParmLimit)
+ AllocateArrayNode(C, AllocateArrayNode::alloc_type(),
+ control(), memory(Compile::AliasIdxRaw), i_o(),
+ size, klass_node,
+ initial_slow_test,
+ length);
+
+ // Cast to correct type. Note that the klass_node may be constant or not,
+ // and in the latter case the actual array type will be inexact also.
+ // (This happens via a non-constant argument to inline_native_newArray.)
+ // In any case, the value of klass_node provides the desired array type.
+ const TypeInt* length_type = _gvn.find_int_type(length);
+ const TypeInt* narrow_length_type = NULL;
+ const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
+ if (ary_type->isa_aryptr() && length_type != NULL) {
+ // Try to get a better type than POS for the size
+ ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
+ narrow_length_type = ary_type->is_aryptr()->size();
+ if (narrow_length_type == length_type)
+ narrow_length_type = NULL;
+ }
+
+ Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);
+
+ // Cast length on remaining path to be positive:
+ if (narrow_length_type != NULL) {
+ Node* ccast = new (C, 2) CastIINode(length, narrow_length_type);
+ ccast->set_req(0, control());
+ _gvn.set_type_bottom(ccast);
+ record_for_igvn(ccast);
+ if (map()->find_edge(length) >= 0) {
+ replace_in_map(length, ccast);
+ }
+ }
+
+ return javaoop;
+}
+
+// The following "Ideal_foo" functions are placed here because they recognize
+// the graph shapes created by the functions immediately above.
+
+//---------------------------Ideal_allocation----------------------------------
+// Given an oop pointer or raw pointer, see if it feeds from an AllocateNode.
+AllocateNode* AllocateNode::Ideal_allocation(Node* ptr, PhaseTransform* phase) {
+ if (ptr == NULL) { // reduce dumb test in callers
+ return NULL;
+ }
+ if (ptr->is_CheckCastPP()) { // strip a raw-to-oop cast
+ ptr = ptr->in(1);
+ if (ptr == NULL) return NULL;
+ }
+ if (ptr->is_Proj()) {
+ Node* allo = ptr->in(0);
+ if (allo != NULL && allo->is_Allocate()) {
+ return allo->as_Allocate();
+ }
+ }
+ // Report failure to match.
+ return NULL;
+}
+
+// Fancy version which also strips off an offset (and reports it to caller).
+AllocateNode* AllocateNode::Ideal_allocation(Node* ptr, PhaseTransform* phase,
+ intptr_t& offset) {
+ Node* base = AddPNode::Ideal_base_and_offset(ptr, phase, offset);
+ if (base == NULL) return NULL;
+ return Ideal_allocation(base, phase);
+}
+
+// Trace Initialize <- Proj[Parm] <- Allocate
+AllocateNode* InitializeNode::allocation() {
+ Node* rawoop = in(InitializeNode::RawAddress);
+ if (rawoop->is_Proj()) {
+ Node* alloc = rawoop->in(0);
+ if (alloc->is_Allocate()) {
+ return alloc->as_Allocate();
+ }
+ }
+ return NULL;
+}
+
+// Trace Allocate -> Proj[Parm] -> Initialize
+InitializeNode* AllocateNode::initialization() {
+ ProjNode* rawoop = proj_out(AllocateNode::RawAddress);
+ if (rawoop == NULL) return NULL;
+ for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) {
+ Node* init = rawoop->fast_out(i);
+ if (init->is_Initialize()) {
+ assert(init->as_Initialize()->allocation() == this, "2-way link");
+ return init->as_Initialize();
+ }
+ }
+ return NULL;
+}
diff --git a/src/share/vm/opto/graphKit.hpp b/src/share/vm/opto/graphKit.hpp
new file mode 100644
index 000000000..c9ea02625
--- /dev/null
+++ b/src/share/vm/opto/graphKit.hpp
@@ -0,0 +1,720 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class FastLockNode;
+class FastUnlockNode;
+class Parse;
+class RootNode;
+
+//-----------------------------------------------------------------------------
+//----------------------------GraphKit-----------------------------------------
+// Toolkit for building the common sorts of subgraphs.
+// Does not know about bytecode parsing or type-flow results.
+// It is able to create graphs implementing the semantics of most
+// or all bytecodes, so that it can expand intrinsics and calls.
+// It may depend on JVMState structure, but it must not depend
+// on specific bytecode streams.
+class GraphKit : public Phase {
+ friend class PreserveJVMState;
+
+ protected:
+ ciEnv* _env; // Compilation environment
+ PhaseGVN &_gvn; // Some optimizations while parsing
+ SafePointNode* _map; // Parser map from JVM to Nodes
+ SafePointNode* _exceptions;// Parser map(s) for exception state(s)
+ int _sp; // JVM Expression Stack Pointer
+ int _bci; // JVM Bytecode Pointer
+ ciMethod* _method; // JVM Current Method
+
+ private:
+ SafePointNode* map_not_null() const {
+ assert(_map != NULL, "must call stopped() to test for reset compiler map");
+ return _map;
+ }
+
+ public:
+ GraphKit(); // empty constructor
+ GraphKit(JVMState* jvms); // the JVM state on which to operate
+
+#ifdef ASSERT
+ ~GraphKit() {
+ assert(!has_exceptions(), "user must call transfer_exceptions_into_jvms");
+ }
+#endif
+
+ virtual Parse* is_Parse() const { return NULL; }
+
+ ciEnv* env() const { return _env; }
+ PhaseGVN& gvn() const { return _gvn; }
+
+ void record_for_igvn(Node* n) const { C->record_for_igvn(n); } // delegate to Compile
+
+ // Handy well-known nodes:
+ Node* null() const { return zerocon(T_OBJECT); }
+ Node* top() const { return C->top(); }
+ RootNode* root() const { return C->root(); }
+
+ // Create or find a constant node
+ Node* intcon(jint con) const { return _gvn.intcon(con); }
+ Node* longcon(jlong con) const { return _gvn.longcon(con); }
+ Node* makecon(const Type *t) const { return _gvn.makecon(t); }
+ Node* zerocon(BasicType bt) const { return _gvn.zerocon(bt); }
+ // (See also macro MakeConX in type.hpp, which uses intcon or longcon.)
+
+ jint find_int_con(Node* n, jint value_if_unknown) {
+ return _gvn.find_int_con(n, value_if_unknown);
+ }
+ jlong find_long_con(Node* n, jlong value_if_unknown) {
+ return _gvn.find_long_con(n, value_if_unknown);
+ }
+ // (See also macro find_intptr_t_con in type.hpp, which uses one of these.)
+
+ // JVM State accessors:
+ // Parser mapping from JVM indices into Nodes.
+ // Low slots are accessed by the StartNode::enum.
+ // Then come the locals at StartNode::Parms to StartNode::Parms+max_locals();
+ // Then come JVM stack slots.
+ // Finally come the monitors, if any.
+ // See layout accessors in class JVMState.
+
+ SafePointNode* map() const { return _map; }
+ bool has_exceptions() const { return _exceptions != NULL; }
+ JVMState* jvms() const { return map_not_null()->_jvms; }
+ int sp() const { return _sp; }
+ int bci() const { return _bci; }
+ Bytecodes::Code java_bc() const;
+ ciMethod* method() const { return _method; }
+
+ void set_jvms(JVMState* jvms) { set_map(jvms->map());
+ assert(jvms == this->jvms(), "sanity");
+ _sp = jvms->sp();
+ _bci = jvms->bci();
+ _method = jvms->has_method() ? jvms->method() : NULL; }
+ void set_map(SafePointNode* m) { _map = m; debug_only(verify_map()); }
+ void set_sp(int i) { assert(i >= 0, "must be non-negative"); _sp = i; }
+ void clean_stack(int from_sp); // clear garbage beyond from_sp to top
+
+ void inc_sp(int i) { set_sp(sp() + i); }
+ void set_bci(int bci) { _bci = bci; }
+
+ // Make sure jvms has current bci & sp.
+ JVMState* sync_jvms() const;
+#ifdef ASSERT
+ // Make sure JVMS has an updated copy of bci and sp.
+ // Also sanity-check method, depth, and monitor depth.
+ bool jvms_in_sync() const;
+
+ // Make sure the map looks OK.
+ void verify_map() const;
+
+ // Make sure a proposed exception state looks OK.
+ static void verify_exception_state(SafePointNode* ex_map);
+#endif
+
+ // Clone the existing map state. (Implements PreserveJVMState.)
+ SafePointNode* clone_map();
+
+ // Set the map to a clone of the given one.
+ void set_map_clone(SafePointNode* m);
+
+ // Tell if the compilation is failing.
+ bool failing() const { return C->failing(); }
+
+ // Set _map to NULL, signalling a stop to further bytecode execution.
+ // Preserve the map intact for future use, and return it back to the caller.
+ SafePointNode* stop() { SafePointNode* m = map(); set_map(NULL); return m; }
+
+ // Stop, but first smash the map's inputs to NULL, to mark it dead.
+ void stop_and_kill_map();
+
+ // Tell if _map is NULL, or control is top.
+ bool stopped();
+
+ // Tell if this method or any caller method has exception handlers.
+ bool has_ex_handler();
+
+ // Save an exception without blowing stack contents or other JVM state.
+ // (The extra pointer is stuck with add_req on the map, beyond the JVMS.)
+ static void set_saved_ex_oop(SafePointNode* ex_map, Node* ex_oop);
+
+ // Recover a saved exception from its map.
+ static Node* saved_ex_oop(SafePointNode* ex_map);
+
+ // Recover a saved exception from its map, and remove it from the map.
+ static Node* clear_saved_ex_oop(SafePointNode* ex_map);
+
+#ifdef ASSERT
+ // Recover a saved exception from its map, and remove it from the map.
+ static bool has_saved_ex_oop(SafePointNode* ex_map);
+#endif
+
+ // Push an exception in the canonical position for handlers (stack(0)).
+ void push_ex_oop(Node* ex_oop) {
+ ensure_stack(1); // ensure room to push the exception
+ set_stack(0, ex_oop);
+ set_sp(1);
+ clean_stack(1);
+ }
+
+ // Detach and return an exception state.
+ SafePointNode* pop_exception_state() {
+ SafePointNode* ex_map = _exceptions;
+ if (ex_map != NULL) {
+ _exceptions = ex_map->next_exception();
+ ex_map->set_next_exception(NULL);
+ debug_only(verify_exception_state(ex_map));
+ }
+ return ex_map;
+ }
+
+ // Add an exception, using the given JVM state, without commoning.
+ void push_exception_state(SafePointNode* ex_map) {
+ debug_only(verify_exception_state(ex_map));
+ ex_map->set_next_exception(_exceptions);
+ _exceptions = ex_map;
+ }
+
+ // Turn the current JVM state into an exception state, appending the ex_oop.
+ SafePointNode* make_exception_state(Node* ex_oop);
+
+ // Add an exception, using the given JVM state.
+ // Combine all exceptions with a common exception type into a single state.
+ // (This is done via combine_exception_states.)
+ void add_exception_state(SafePointNode* ex_map);
+
+ // Combine all exceptions of any sort whatever into a single master state.
+ SafePointNode* combine_and_pop_all_exception_states() {
+ if (_exceptions == NULL) return NULL;
+ SafePointNode* phi_map = pop_exception_state();
+ SafePointNode* ex_map;
+ while ((ex_map = pop_exception_state()) != NULL) {
+ combine_exception_states(ex_map, phi_map);
+ }
+ return phi_map;
+ }
+
+ // Combine the two exception states, building phis as necessary.
+ // The second argument is updated to include contributions from the first.
+ void combine_exception_states(SafePointNode* ex_map, SafePointNode* phi_map);
+
+ // Reset the map to the given state. If there are any half-finished phis
+ // in it (created by combine_exception_states), transform them now.
+ // Returns the exception oop. (Caller must call push_ex_oop if required.)
+ Node* use_exception_state(SafePointNode* ex_map);
+
+ // Collect exceptions from a given JVM state into my exception list.
+ void add_exception_states_from(JVMState* jvms);
+
+ // Collect all raised exceptions into the current JVM state.
+ // Clear the current exception list and map, returns the combined states.
+ JVMState* transfer_exceptions_into_jvms();
+
+ // Helper to throw a built-in exception.
+ // Range checks take the offending index.
+ // Cast and array store checks take the offending class.
+ // Others do not take the optional argument.
+ // The JVMS must allow the bytecode to be re-executed
+ // via an uncommon trap.
+ void builtin_throw(Deoptimization::DeoptReason reason, Node* arg = NULL);
+
+ // Helper Functions for adding debug information
+ void kill_dead_locals();
+#ifdef ASSERT
+ bool dead_locals_are_killed();
+#endif
+ // The call may deoptimize. Supply required JVM state as debug info.
+ // If must_throw is true, the call is guaranteed not to return normally.
+ void add_safepoint_edges(SafePointNode* call,
+ bool must_throw = false);
+
+ // How many stack inputs does the current BC consume?
+ // And, how does the stack change after the bytecode?
+ // Returns false if unknown.
+ bool compute_stack_effects(int& inputs, int& depth);
+
+ // Add a fixed offset to a pointer
+ Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset) {
+ return basic_plus_adr(base, ptr, MakeConX(offset));
+ }
+ Node* basic_plus_adr(Node* base, intptr_t offset) {
+ return basic_plus_adr(base, base, MakeConX(offset));
+ }
+ // Add a variable offset to a pointer
+ Node* basic_plus_adr(Node* base, Node* offset) {
+ return basic_plus_adr(base, base, offset);
+ }
+ Node* basic_plus_adr(Node* base, Node* ptr, Node* offset);
+
+ // Convert between int and long, and size_t.
+ // (See macros ConvI2X, etc., in type.hpp for ConvI2X, etc.)
+ Node* ConvI2L(Node* offset);
+ Node* ConvL2I(Node* offset);
+ // Find out the klass of an object.
+ Node* load_object_klass(Node* object);
+ // Find out the length of an array.
+ Node* load_array_length(Node* array);
+ // Helper function to do a NULL pointer check or ZERO check based on type.
+ Node* null_check_common(Node* value, BasicType type,
+ bool assert_null, Node* *null_control);
+ // Throw an exception if a given value is null.
+ // Return the value cast to not-null.
+ // Be clever about equivalent dominating null checks.
+ Node* do_null_check(Node* value, BasicType type) {
+ return null_check_common(value, type, false, NULL);
+ }
+ // Throw an uncommon trap if a given value is __not__ null.
+ // Return the value cast to null, and be clever about dominating checks.
+ Node* do_null_assert(Node* value, BasicType type) {
+ return null_check_common(value, type, true, NULL);
+ }
+ // Null check oop. Return null-path control into (*null_control).
+ // Return a cast-not-null node which depends on the not-null control.
+ // If never_see_null, use an uncommon trap (*null_control sees a top).
+ // The cast is not valid along the null path; keep a copy of the original.
+ Node* null_check_oop(Node* value, Node* *null_control,
+ bool never_see_null = false);
+
+ // Cast obj to not-null on this path
+ Node* cast_not_null(Node* obj, bool do_replace_in_map = true);
+ // Replace all occurrences of one node by another.
+ void replace_in_map(Node* old, Node* neww);
+
+ void push(Node* n) { map_not_null(); _map->set_stack(_map->_jvms,_sp++,n); }
+ Node* pop() { map_not_null(); return _map->stack(_map->_jvms,--_sp); }
+ Node* peek(int off=0) { map_not_null(); return _map->stack(_map->_jvms, _sp - off - 1); }
+
+ void push_pair(Node* ldval) {
+ push(ldval);
+ push(top()); // the halfword is merely a placeholder
+ }
+ void push_pair_local(int i) {
+ // longs are stored in locals in "push" order
+ push( local(i+0) ); // the real value
+ assert(local(i+1) == top(), "");
+ push(top()); // halfword placeholder
+ }
+ Node* pop_pair() {
+ // the second half is pushed last & popped first; it contains exactly nothing
+ Node* halfword = pop();
+ assert(halfword == top(), "");
+ // the long bits are pushed first & popped last:
+ return pop();
+ }
+ void set_pair_local(int i, Node* lval) {
+ // longs are stored in locals as a value/half pair (like doubles)
+ set_local(i+0, lval);
+ set_local(i+1, top());
+ }
+
+ // Push the node, which may be zero, one, or two words.
+ void push_node(BasicType n_type, Node* n) {
+ int n_size = type2size[n_type];
+ if (n_size == 1) push( n ); // T_INT, ...
+ else if (n_size == 2) push_pair( n ); // T_DOUBLE, T_LONG
+ else { assert(n_size == 0, "must be T_VOID"); }
+ }
+
+ Node* pop_node(BasicType n_type) {
+ int n_size = type2size[n_type];
+ if (n_size == 1) return pop();
+ else if (n_size == 2) return pop_pair();
+ else return NULL;
+ }
+
+ Node* control() const { return map_not_null()->control(); }
+ Node* i_o() const { return map_not_null()->i_o(); }
+ Node* returnadr() const { return map_not_null()->returnadr(); }
+ Node* frameptr() const { return map_not_null()->frameptr(); }
+ Node* local(uint idx) const { map_not_null(); return _map->local( _map->_jvms, idx); }
+ Node* stack(uint idx) const { map_not_null(); return _map->stack( _map->_jvms, idx); }
+ Node* argument(uint idx) const { map_not_null(); return _map->argument( _map->_jvms, idx); }
+ Node* monitor_box(uint idx) const { map_not_null(); return _map->monitor_box(_map->_jvms, idx); }
+ Node* monitor_obj(uint idx) const { map_not_null(); return _map->monitor_obj(_map->_jvms, idx); }
+
+ void set_control (Node* c) { map_not_null()->set_control(c); }
+ void set_i_o (Node* c) { map_not_null()->set_i_o(c); }
+ void set_local(uint idx, Node* c) { map_not_null(); _map->set_local( _map->_jvms, idx, c); }
+ void set_stack(uint idx, Node* c) { map_not_null(); _map->set_stack( _map->_jvms, idx, c); }
+ void set_argument(uint idx, Node* c){ map_not_null(); _map->set_argument(_map->_jvms, idx, c); }
+ void ensure_stack(uint stk_size) { map_not_null(); _map->ensure_stack(_map->_jvms, stk_size); }
+
+ // Access unaliased memory
+ Node* memory(uint alias_idx);
+ Node* memory(const TypePtr *tp) { return memory(C->get_alias_index(tp)); }
+ Node* memory(Node* adr) { return memory(_gvn.type(adr)->is_ptr()); }
+
+ // Access immutable memory
+ Node* immutable_memory() { return C->immutable_memory(); }
+
+ // Set unaliased memory
+ void set_memory(Node* c, uint alias_idx) { merged_memory()->set_memory_at(alias_idx, c); }
+ void set_memory(Node* c, const TypePtr *tp) { set_memory(c,C->get_alias_index(tp)); }
+ void set_memory(Node* c, Node* adr) { set_memory(c,_gvn.type(adr)->is_ptr()); }
+
+ // Get the entire memory state (probably a MergeMemNode), and reset it
+ // (The resetting prevents somebody from using the dangling Node pointer.)
+ Node* reset_memory();
+
+ // Get the entire memory state, asserted to be a MergeMemNode.
+ MergeMemNode* merged_memory() {
+ Node* mem = map_not_null()->memory();
+ assert(mem->is_MergeMem(), "parse memory is always pre-split");
+ return mem->as_MergeMem();
+ }
+
+ // Set the entire memory state; produce a new MergeMemNode.
+ void set_all_memory(Node* newmem);
+
+ // Create a memory projection from the call, then set_all_memory.
+ void set_all_memory_call(Node* call);
+
+ // Create a LoadNode, reading from the parser's memory state.
+ // (Note: require_atomic_access is useful only with T_LONG.)
+ Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
+ bool require_atomic_access = false) {
+ // This version computes alias_index from bottom_type
+ return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
+ require_atomic_access);
+ }
+ Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, bool require_atomic_access = false) {
+ // This version computes alias_index from an address type
+ assert(adr_type != NULL, "use other make_load factory");
+ return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
+ require_atomic_access);
+ }
+ // This is the base version which is given an alias index.
+ Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, bool require_atomic_access = false);
+
+ // Create & transform a StoreNode and store the effect into the
+ // parser's memory state.
+ Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
+ const TypePtr* adr_type,
+ bool require_atomic_access = false) {
+ // This version computes alias_index from an address type
+ assert(adr_type != NULL, "use other store_to_memory factory");
+ return store_to_memory(ctl, adr, val, bt,
+ C->get_alias_index(adr_type),
+ require_atomic_access);
+ }
+ // This is the base version which is given alias index
+ // Return the new StoreXNode
+ Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
+ int adr_idx,
+ bool require_atomic_access = false);
+
+
+ // All in one pre-barrier, store, post_barrier
+ // Insert a write-barrier'd store. This is to let generational GC
+ // work; we have to flag all oop-stores before the next GC point.
+ //
+ // It comes in 3 flavors of store to an object, array, or unknown.
+ // We use precise card marks for arrays to avoid scanning the entire
+ // array. We use imprecise for object. We use precise for unknown
+ // since we don't know if we have an array or and object or even
+ // where the object starts.
+ //
+ // If val==NULL, it is taken to be a completely unknown value. QQQ
+
+ Node* store_oop_to_object(Node* ctl,
+ Node* obj, // containing obj
+ Node* adr, // actual adress to store val at
+ const TypePtr* adr_type,
+ Node* val,
+ const Type* val_type,
+ BasicType bt);
+
+ Node* store_oop_to_array(Node* ctl,
+ Node* obj, // containing obj
+ Node* adr, // actual adress to store val at
+ const TypePtr* adr_type,
+ Node* val,
+ const Type* val_type,
+ BasicType bt);
+
+ // Could be an array or object we don't know at compile time (unsafe ref.)
+ Node* store_oop_to_unknown(Node* ctl,
+ Node* obj, // containing obj
+ Node* adr, // actual adress to store val at
+ const TypePtr* adr_type,
+ Node* val,
+ const Type* val_type,
+ BasicType bt);
+
+ // For the few case where the barriers need special help
+ void pre_barrier(Node* ctl, Node* obj, Node* adr, uint adr_idx,
+ Node* val, const Type* val_type, BasicType bt);
+
+ void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx,
+ Node* val, BasicType bt, bool use_precise);
+
+ // Return addressing for an array element.
+ Node* array_element_address(Node* ary, Node* idx, BasicType elembt,
+ // Optional constraint on the array size:
+ const TypeInt* sizetype = NULL);
+
+ // Return a load of array element at idx.
+ Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype);
+
+ // CMS card-marks have an input from the corresponding oop_store
+ void cms_card_mark(Node* ctl, Node* adr, Node* val, Node* oop_store);
+
+ //---------------- Dtrace support --------------------
+ void make_dtrace_method_entry_exit(ciMethod* method, bool is_entry);
+ void make_dtrace_method_entry(ciMethod* method) {
+ make_dtrace_method_entry_exit(method, true);
+ }
+ void make_dtrace_method_exit(ciMethod* method) {
+ make_dtrace_method_entry_exit(method, false);
+ }
+
+ //--------------- stub generation -------------------
+ public:
+ void gen_stub(address C_function,
+ const char *name,
+ int is_fancy_jump,
+ bool pass_tls,
+ bool return_pc);
+
+ //---------- help for generating calls --------------
+
+ // Do a null check on the receiver, which is in argument(0).
+ Node* null_check_receiver(ciMethod* callee) {
+ assert(!callee->is_static(), "must be a virtual method");
+ int nargs = 1 + callee->signature()->size();
+ // Null check on self without removing any arguments. The argument
+ // null check technically happens in the wrong place, which can lead to
+ // invalid stack traces when the primitive is inlined into a method
+ // which handles NullPointerExceptions.
+ Node* receiver = argument(0);
+ _sp += nargs;
+ receiver = do_null_check(receiver, T_OBJECT);
+ _sp -= nargs;
+ return receiver;
+ }
+
+ // Fill in argument edges for the call from argument(0), argument(1), ...
+ // (The next step is to call set_edges_for_java_call.)
+ void set_arguments_for_java_call(CallJavaNode* call);
+
+ // Fill in non-argument edges for the call.
+ // Transform the call, and update the basics: control, i_o, memory.
+ // (The next step is usually to call set_results_for_java_call.)
+ void set_edges_for_java_call(CallJavaNode* call,
+ bool must_throw = false);
+
+ // Finish up a java call that was started by set_edges_for_java_call.
+ // Call add_exception on any throw arising from the call.
+ // Return the call result (transformed).
+ Node* set_results_for_java_call(CallJavaNode* call);
+
+ // Similar to set_edges_for_java_call, but simplified for runtime calls.
+ void set_predefined_output_for_runtime_call(Node* call) {
+ set_predefined_output_for_runtime_call(call, NULL, NULL);
+ }
+ void set_predefined_output_for_runtime_call(Node* call,
+ Node* keep_mem,
+ const TypePtr* hook_mem);
+ Node* set_predefined_input_for_runtime_call(SafePointNode* call);
+
+ // helper functions for statistics
+ void increment_counter(address counter_addr); // increment a debug counter
+ void increment_counter(Node* counter_addr); // increment a debug counter
+
+ // Bail out to the interpreter right now
+ // The optional klass is the one causing the trap.
+ // The optional reason is debug information written to the compile log.
+ // Optional must_throw is the same as with add_safepoint_edges.
+ void uncommon_trap(int trap_request,
+ ciKlass* klass = NULL, const char* reason_string = NULL,
+ bool must_throw = false, bool keep_exact_action = false);
+
+ // Shorthand, to avoid saying "Deoptimization::" so many times.
+ void uncommon_trap(Deoptimization::DeoptReason reason,
+ Deoptimization::DeoptAction action,
+ ciKlass* klass = NULL, const char* reason_string = NULL,
+ bool must_throw = false, bool keep_exact_action = false) {
+ uncommon_trap(Deoptimization::make_trap_request(reason, action),
+ klass, reason_string, must_throw, keep_exact_action);
+ }
+
+ // Report if there were too many traps at the current method and bci.
+ // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
+ // If there is no MDO at all, report no trap unless told to assume it.
+ bool too_many_traps(Deoptimization::DeoptReason reason) {
+ return C->too_many_traps(method(), bci(), reason);
+ }
+
+ // Report if there were too many recompiles at the current method and bci.
+ bool too_many_recompiles(Deoptimization::DeoptReason reason) {
+ return C->too_many_recompiles(method(), bci(), reason);
+ }
+
+ // vanilla/CMS post barrier
+ void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
+
+ // Returns the object (if any) which was created the moment before.
+ Node* just_allocated_object(Node* current_control);
+
+ static bool use_ReduceInitialCardMarks() {
+ return (ReduceInitialCardMarks
+ && Universe::heap()->can_elide_tlab_store_barriers());
+ }
+
+ // Helper function to round double arguments before a call
+ void round_double_arguments(ciMethod* dest_method);
+ void round_double_result(ciMethod* dest_method);
+
+ // rounding for strict float precision conformance
+ Node* precision_rounding(Node* n);
+
+ // rounding for strict double precision conformance
+ Node* dprecision_rounding(Node* n);
+
+ // rounding for non-strict double stores
+ Node* dstore_rounding(Node* n);
+
+ // Helper functions for fast/slow path codes
+ Node* opt_iff(Node* region, Node* iff);
+ Node* make_runtime_call(int flags,
+ const TypeFunc* call_type, address call_addr,
+ const char* call_name,
+ const TypePtr* adr_type, // NULL if no memory effects
+ Node* parm0 = NULL, Node* parm1 = NULL,
+ Node* parm2 = NULL, Node* parm3 = NULL,
+ Node* parm4 = NULL, Node* parm5 = NULL,
+ Node* parm6 = NULL, Node* parm7 = NULL);
+ enum { // flag values for make_runtime_call
+ RC_NO_FP = 1, // CallLeafNoFPNode
+ RC_NO_IO = 2, // do not hook IO edges
+ RC_NO_LEAF = 4, // CallStaticJavaNode
+ RC_MUST_THROW = 8, // flag passed to add_safepoint_edges
+ RC_NARROW_MEM = 16, // input memory is same as output
+ RC_UNCOMMON = 32, // freq. expected to be like uncommon trap
+ RC_LEAF = 0 // null value: no flags set
+ };
+
+ // merge in all memory slices from new_mem, along the given path
+ void merge_memory(Node* new_mem, Node* region, int new_path);
+ void make_slow_call_ex(Node* call, ciInstanceKlass* ex_klass, bool separate_io_proj);
+
+ // Helper functions to build synchronizations
+ int next_monitor();
+ Node* insert_mem_bar(int opcode, Node* precedent = NULL);
+ Node* insert_mem_bar_volatile(int opcode, int alias_idx, Node* precedent = NULL);
+ // Optional 'precedent' is appended as an extra edge, to force ordering.
+ FastLockNode* shared_lock(Node* obj);
+ void shared_unlock(Node* box, Node* obj);
+
+ // helper functions for the fast path/slow path idioms
+ Node* fast_and_slow(Node* in, const Type *result_type, Node* null_result, IfNode* fast_test, Node* fast_result, address slow_call, const TypeFunc *slow_call_type, Node* slow_arg, klassOop ex_klass, Node* slow_result);
+
+ // Generate an instance-of idiom. Used by both the instance-of bytecode
+ // and the reflective instance-of call.
+ Node* gen_instanceof( Node *subobj, Node* superkls );
+
+ // Generate a check-cast idiom. Used by both the check-cast bytecode
+ // and the array-store bytecode
+ Node* gen_checkcast( Node *subobj, Node* superkls,
+ Node* *failure_control = NULL );
+
+ // Generate a subtyping check. Takes as input the subtype and supertype.
+ // Returns 2 values: sets the default control() to the true path and
+ // returns the false path. Only reads from constant memory taken from the
+ // default memory; does not write anything. It also doesn't take in an
+ // Object; if you wish to check an Object you need to load the Object's
+ // class prior to coming here.
+ Node* gen_subtype_check(Node* subklass, Node* superklass);
+
+ // Static parse-time type checking logic for gen_subtype_check:
+ enum { SSC_always_false, SSC_always_true, SSC_easy_test, SSC_full_test };
+ int static_subtype_check(ciKlass* superk, ciKlass* subk);
+
+ // Exact type check used for predicted calls and casts.
+ // Rewrites (*casted_receiver) to be casted to the stronger type.
+ // (Caller is responsible for doing replace_in_map.)
+ Node* type_check_receiver(Node* receiver, ciKlass* klass, float prob,
+ Node* *casted_receiver);
+
+ // implementation of object creation
+ Node* set_output_for_allocation(AllocateNode* alloc,
+ const TypeOopPtr* oop_type,
+ bool raw_mem_only);
+ Node* get_layout_helper(Node* klass_node, jint& constant_value);
+ Node* new_instance(Node* klass_node,
+ Node* slow_test = NULL,
+ bool raw_mem_only = false,
+ Node* *return_size_val = NULL);
+ Node* new_array(Node* klass_node, Node* count_val,
+ bool raw_mem_only = false, Node* *return_size_val = NULL);
+
+ // Handy for making control flow
+ IfNode* create_and_map_if(Node* ctrl, Node* tst, float prob, float cnt) {
+ IfNode* iff = new (C, 2) IfNode(ctrl, tst, prob, cnt);// New IfNode's
+ _gvn.set_type(iff, iff->Value(&_gvn)); // Value may be known at parse-time
+ // Place 'if' on worklist if it will be in graph
+ if (!tst->is_Con()) record_for_igvn(iff); // Range-check and Null-check removal is later
+ return iff;
+ }
+
+ IfNode* create_and_xform_if(Node* ctrl, Node* tst, float prob, float cnt) {
+ IfNode* iff = new (C, 2) IfNode(ctrl, tst, prob, cnt);// New IfNode's
+ _gvn.transform(iff); // Value may be known at parse-time
+ // Place 'if' on worklist if it will be in graph
+ if (!tst->is_Con()) record_for_igvn(iff); // Range-check and Null-check removal is later
+ return iff;
+ }
+};
+
+// Helper class to support building of control flow branches. Upon
+// creation the map and sp at bci are cloned and restored upon de-
+// struction. Typical use:
+//
+// { PreserveJVMState pjvms(this);
+// // code of new branch
+// }
+// // here the JVM state at bci is established
+
+class PreserveJVMState: public StackObj {
+ protected:
+ GraphKit* _kit;
+#ifdef ASSERT
+ int _block; // PO of current block, if a Parse
+ int _bci;
+#endif
+ SafePointNode* _map;
+ uint _sp;
+
+ public:
+ PreserveJVMState(GraphKit* kit, bool clone_map = true);
+ ~PreserveJVMState();
+};
+
+// Helper class to build cutouts of the form if (p) ; else {x...}.
+// The code {x...} must not fall through.
+// The kit's main flow of control is set to the "then" continuation of if(p).
+class BuildCutout: public PreserveJVMState {
+ public:
+ BuildCutout(GraphKit* kit, Node* p, float prob, float cnt = COUNT_UNKNOWN);
+ ~BuildCutout();
+};
diff --git a/src/share/vm/opto/idealGraphPrinter.cpp b/src/share/vm/opto/idealGraphPrinter.cpp
new file mode 100644
index 000000000..e65cc72b4
--- /dev/null
+++ b/src/share/vm/opto/idealGraphPrinter.cpp
@@ -0,0 +1,1919 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_idealGraphPrinter.cpp.incl"
+
+#ifndef PRODUCT
+
+// Constants
+// Keep consistent with Java constants
+const char *IdealGraphPrinter::INDENT = " ";
+const char *IdealGraphPrinter::TOP_ELEMENT = "graphDocument";
+const char *IdealGraphPrinter::GROUP_ELEMENT = "group";
+const char *IdealGraphPrinter::GRAPH_ELEMENT = "graph";
+const char *IdealGraphPrinter::PROPERTIES_ELEMENT = "properties";
+const char *IdealGraphPrinter::EDGES_ELEMENT = "edges";
+const char *IdealGraphPrinter::PROPERTY_ELEMENT = "p";
+const char *IdealGraphPrinter::EDGE_ELEMENT = "edge";
+const char *IdealGraphPrinter::NODE_ELEMENT = "node";
+const char *IdealGraphPrinter::NODES_ELEMENT = "nodes";
+const char *IdealGraphPrinter::REMOVE_EDGE_ELEMENT = "removeEdge";
+const char *IdealGraphPrinter::REMOVE_NODE_ELEMENT = "removeNode";
+const char *IdealGraphPrinter::METHOD_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::METHOD_IS_PUBLIC_PROPERTY = "public";
+const char *IdealGraphPrinter::METHOD_IS_STATIC_PROPERTY = "static";
+const char *IdealGraphPrinter::TRUE_VALUE = "true";
+const char *IdealGraphPrinter::NODE_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::EDGE_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::NODE_ID_PROPERTY = "id";
+const char *IdealGraphPrinter::FROM_PROPERTY = "from";
+const char *IdealGraphPrinter::TO_PROPERTY = "to";
+const char *IdealGraphPrinter::PROPERTY_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::GRAPH_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::INDEX_PROPERTY = "index";
+const char *IdealGraphPrinter::METHOD_ELEMENT = "method";
+const char *IdealGraphPrinter::INLINE_ELEMENT = "inline";
+const char *IdealGraphPrinter::BYTECODES_ELEMENT = "bytecodes";
+const char *IdealGraphPrinter::METHOD_BCI_PROPERTY = "bci";
+const char *IdealGraphPrinter::METHOD_SHORT_NAME_PROPERTY = "shortName";
+const char *IdealGraphPrinter::CONTROL_FLOW_ELEMENT = "controlFlow";
+const char *IdealGraphPrinter::BLOCK_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::BLOCK_DOMINATOR_PROPERTY = "dom";
+const char *IdealGraphPrinter::BLOCK_ELEMENT = "block";
+const char *IdealGraphPrinter::SUCCESSORS_ELEMENT = "successors";
+const char *IdealGraphPrinter::SUCCESSOR_ELEMENT = "successor";
+const char *IdealGraphPrinter::ASSEMBLY_ELEMENT = "assembly";
+
+int IdealGraphPrinter::_file_count = 0;
+
+IdealGraphPrinter *IdealGraphPrinter::printer() {
+ if (PrintIdealGraphLevel == 0) return NULL;
+
+ JavaThread *thread = JavaThread::current();
+ if (!thread->is_Compiler_thread()) return NULL;
+
+ CompilerThread *compiler_thread = (CompilerThread *)thread;
+ if (compiler_thread->ideal_graph_printer() == NULL) {
+ IdealGraphPrinter *printer = new IdealGraphPrinter();
+ compiler_thread->set_ideal_graph_printer(printer);
+ }
+
+ return compiler_thread->ideal_graph_printer();
+}
+
+void IdealGraphPrinter::clean_up() {
+ JavaThread *p;
+ for (p = Threads::first(); p; p = p->next()) {
+ if (p->is_Compiler_thread()) {
+ CompilerThread *c = (CompilerThread *)p;
+ IdealGraphPrinter *printer = c->ideal_graph_printer();
+ if (printer) {
+ delete printer;
+ }
+ c->set_ideal_graph_printer(NULL);
+ }
+ }
+}
+
+// Constructor, either file or network output
+IdealGraphPrinter::IdealGraphPrinter() {
+
+ _traverse_outs = false;
+ _should_send_method = true;
+ _output = NULL;
+ buffer[0] = 0;
+ _depth = 0;
+ _current_method = NULL;
+ assert(!_current_method, "current method must be initialized to NULL");
+ _arena = new Arena();
+
+ _stream = new (ResourceObj::C_HEAP) networkStream();
+
+ if (PrintIdealGraphFile != NULL) {
+ ThreadCritical tc;
+ // User wants all output to go to files
+ if (_file_count != 0) {
+ ResourceMark rm;
+ stringStream st;
+ const char* dot = strrchr(PrintIdealGraphFile, '.');
+ if (dot) {
+ st.write(PrintIdealGraphFile, dot - PrintIdealGraphFile);
+ st.print("%d%s", _file_count, dot);
+ } else {
+ st.print("%s%d", PrintIdealGraphFile, _file_count);
+ }
+ _output = new (ResourceObj::C_HEAP) fileStream(st.as_string());
+ } else {
+ _output = new (ResourceObj::C_HEAP) fileStream(PrintIdealGraphFile);
+ }
+ _file_count++;
+ } else {
+ // Try to connect to visualizer
+ if (_stream->connect(PrintIdealGraphAddress, PrintIdealGraphPort)) {
+ char c = 0;
+ _stream->read(&c, 1);
+ if (c != 'y') {
+ tty->print_cr("Client available, but does not want to receive data!");
+ _stream->close();
+ delete _stream;
+ _stream = NULL;
+ return;
+ }
+ _output = _stream;
+ } else {
+ // It would be nice if we could shut down cleanly but it should
+ // be an error if we can't connect to the visualizer.
+ fatal2("Couldn't connect to visualizer at %s:%d", PrintIdealGraphAddress, PrintIdealGraphPort);
+ }
+ }
+
+ start_element(TOP_ELEMENT);
+}
+
+// Destructor, close file or network stream
+IdealGraphPrinter::~IdealGraphPrinter() {
+
+ end_element(TOP_ELEMENT);
+
+ if (_stream) {
+ delete _stream;
+ if (_stream == _output) {
+ _output = NULL;
+ }
+ _stream = NULL;
+ }
+
+ if (_output) {
+ delete _output;
+ _output = NULL;
+ }
+}
+
+void IdealGraphPrinter::print_ifg(PhaseIFG* ifg) {
+
+ // Code to print an interference graph to tty, currently not used
+
+ /*
+ if (!_current_method) return;
+ // Remove neighbor colors
+
+ for (uint i = 0; i < ifg._maxlrg; i++) {
+
+ IndexSet *s = ifg.neighbors(i);
+ IndexSetIterator elements(s);
+ uint neighbor;
+ while ((neighbor = elements.next()) != 0) {
+ tty->print_cr("Edge between %d and %d\n", i, neighbor);
+ }
+ }
+
+
+ for (uint i = 0; i < ifg._maxlrg; i++) {
+ LRG &l = ifg.lrgs(i);
+ if (l._def) {
+ OptoReg::Name name = l.reg();
+ tty->print("OptoReg::dump: ");
+ OptoReg::dump(name);
+ tty->print_cr("");
+ tty->print_cr("name=%d\n", name);
+ if (name) {
+ if (OptoReg::is_stack(name)) {
+ tty->print_cr("Stack number %d\n", OptoReg::reg2stack(name));
+
+ } else if (!OptoReg::is_valid(name)) {
+ tty->print_cr("BAD!!!");
+ } else {
+
+ if (OptoReg::is_reg(name)) {
+ tty->print_cr(OptoReg::regname(name));
+ } else {
+ int x = 0;
+ }
+ }
+ int x = 0;
+ }
+
+ if (l._def == NodeSentinel) {
+ tty->print("multiple mapping from %d: ", i);
+ for (int j=0; j<l._defs->length(); j++) {
+ tty->print("%d ", l._defs->at(j)->_idx);
+ }
+ tty->print_cr("");
+ } else {
+ tty->print_cr("mapping between %d and %d\n", i, l._def->_idx);
+ }
+ }
+ }*/
+}
+
+void IdealGraphPrinter::print_method(ciMethod *method, int bci, InlineTree *tree) {
+
+ Properties properties;
+ stringStream str;
+ method->print_name(&str);
+
+ stringStream shortStr;
+ method->print_short_name(&shortStr);
+
+
+ properties.add(new Property(METHOD_NAME_PROPERTY, str.as_string()));
+ properties.add(new Property(METHOD_SHORT_NAME_PROPERTY, shortStr.as_string()));
+ properties.add(new Property(METHOD_BCI_PROPERTY, bci));
+ start_element(METHOD_ELEMENT, &properties);
+
+ start_element(BYTECODES_ELEMENT);
+ output()->print_cr("<![CDATA[");
+ method->print_codes_on(output());
+ output()->print_cr("]]>");
+ end_element(BYTECODES_ELEMENT);
+
+ start_element(INLINE_ELEMENT);
+ if (tree != NULL) {
+ GrowableArray<InlineTree *> subtrees = tree->subtrees();
+ for (int i = 0; i < subtrees.length(); i++) {
+ print_inline_tree(subtrees.at(i));
+ }
+ }
+ end_element(INLINE_ELEMENT);
+
+ end_element(METHOD_ELEMENT);
+ output()->flush();
+}
+
+void IdealGraphPrinter::print_inline_tree(InlineTree *tree) {
+
+ if (tree == NULL) return;
+
+ ciMethod *method = tree->method();
+ print_method(tree->method(), tree->caller_bci(), tree);
+
+}
+
+void IdealGraphPrinter::clear_nodes() {
+ // for (int i = 0; i < _nodes.length(); i++) {
+ // _nodes.at(i)->clear_node();
+ // }
+}
+
+void IdealGraphPrinter::print_inlining(Compile* compile) {
+
+ // Print inline tree
+ if (_should_send_method) {
+ InlineTree *inlineTree = compile->ilt();
+ if (inlineTree != NULL) {
+ print_inline_tree(inlineTree);
+ } else {
+ // print this method only
+ }
+ }
+}
+
+// Has to be called whenever a method is compiled
+void IdealGraphPrinter::begin_method(Compile* compile) {
+
+ ciMethod *method = compile->method();
+ assert(_output, "output stream must exist!");
+ assert(method, "null methods are not allowed!");
+ assert(!_current_method, "current method must be null!");
+
+ _arena->destruct_contents();
+
+ start_element(GROUP_ELEMENT);
+
+ // Print properties
+ Properties properties;
+
+ // Add method name
+ stringStream strStream;
+ method->print_name(&strStream);
+ properties.add(new Property(METHOD_NAME_PROPERTY, strStream.as_string()));
+
+ if (method->flags().is_public()) {
+ properties.add(new Property(METHOD_IS_PUBLIC_PROPERTY, TRUE_VALUE));
+ }
+
+ if (method->flags().is_static()) {
+ properties.add(new Property(METHOD_IS_STATIC_PROPERTY, TRUE_VALUE));
+ }
+
+ properties.print(this);
+
+ if (_stream) {
+ char answer = 0;
+ _stream->flush();
+ int result = _stream->read(&answer, 1);
+ _should_send_method = (answer == 'y');
+ }
+
+ this->_nodes = GrowableArray<NodeDescription *>(_arena, 2, 0, NULL);
+ this->_edges = GrowableArray< EdgeDescription * >(_arena, 2, 0, NULL);
+
+
+ this->_current_method = method;
+
+
+
+ _output->flush();
+}
+
+// Has to be called whenever a method has finished compilation
+void IdealGraphPrinter::end_method() {
+
+// if (finish && !in_method) return;
+
+ nmethod* method = (nmethod*)this->_current_method->code();
+
+ start_element(ASSEMBLY_ELEMENT);
+ // Disassembler::decode(method, _output);
+ end_element(ASSEMBLY_ELEMENT);
+
+
+ end_element(GROUP_ELEMENT);
+ _current_method = NULL;
+ _output->flush();
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc) {
+ delete desc;
+ _nodes.at_put(i, NULL);
+ }
+ }
+ this->_nodes.clear();
+
+
+ for (int i = 0; i < _edges.length(); i++) {
+ // for (int j=0; j<_edges.at(i)->length(); j++) {
+ EdgeDescription *conn = _edges.at(i);
+ conn->print(this);
+ if (conn) {
+ delete conn;
+ _edges.at_put(i, NULL);
+ }
+ //}
+ //_edges.at(i)->clear();
+ //delete _edges.at(i);
+ //_edges.at_put(i, NULL);
+ }
+ this->_edges.clear();
+
+// in_method = false;
+}
+
+// Outputs an XML start element
+void IdealGraphPrinter::start_element(const char *s, Properties *properties /* = NULL */, bool print_indent /* = false */, bool print_return /* = true */) {
+
+ start_element_helper(s, properties, false, print_indent, print_return);
+ _depth++;
+
+}
+
+// Outputs an XML start element without body
+void IdealGraphPrinter::simple_element(const char *s, Properties *properties /* = NULL */, bool print_indent /* = false */) {
+ start_element_helper(s, properties, true, print_indent, true);
+}
+
+// Outputs an XML start element. If outputEnd is true, the element has no body.
+void IdealGraphPrinter::start_element_helper(const char *s, Properties *properties, bool outputEnd, bool print_indent /* = false */, bool print_return /* = true */) {
+
+ assert(_output, "output stream must exist!");
+
+ if (print_indent) this->print_indent();
+ _output->print("<");
+ _output->print(s);
+ if (properties) properties->print_as_attributes(this);
+
+ if (outputEnd) {
+ _output->print("/");
+ }
+
+ _output->print(">");
+ if (print_return) _output->print_cr("");
+
+}
+
+// Print indent
+void IdealGraphPrinter::print_indent() {
+ for (int i = 0; i < _depth; i++) {
+ _output->print(INDENT);
+ }
+}
+
+// Outputs an XML end element
+void IdealGraphPrinter::end_element(const char *s, bool print_indent /* = true */, bool print_return /* = true */) {
+
+ assert(_output, "output stream must exist!");
+
+ _depth--;
+
+ if (print_indent) this->print_indent();
+ _output->print("</");
+ _output->print(s);
+ _output->print(">");
+ if (print_return) _output->print_cr("");
+
+}
+
+bool IdealGraphPrinter::traverse_outs() {
+ return _traverse_outs;
+}
+
+void IdealGraphPrinter::set_traverse_outs(bool b) {
+ _traverse_outs = b;
+}
+
+void IdealGraphPrinter::walk(Node *start) {
+
+
+ VectorSet visited(Thread::current()->resource_area());
+ GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
+ nodeStack.push(start);
+ visited.test_set(start->_idx);
+ while(nodeStack.length() > 0) {
+
+ Node *n = nodeStack.pop();
+ IdealGraphPrinter::pre_node(n, this);
+
+ if (_traverse_outs) {
+ for (DUIterator i = n->outs(); n->has_out(i); i++) {
+ Node* p = n->out(i);
+ if (!visited.test_set(p->_idx)) {
+ nodeStack.push(p);
+ }
+ }
+ }
+
+ for ( uint i = 0; i < n->len(); i++ ) {
+ if ( n->in(i) ) {
+ if (!visited.test_set(n->in(i)->_idx)) {
+ nodeStack.push(n->in(i));
+ }
+ }
+ }
+ }
+}
+
+void IdealGraphPrinter::compress(int index, GrowableArray<Block>* blocks) {
+ Block *block = blocks->adr_at(index);
+
+ int ancestor = block->ancestor();
+ assert(ancestor != -1, "");
+
+ Block *ancestor_block = blocks->adr_at(ancestor);
+ if (ancestor_block->ancestor() != -1) {
+ compress(ancestor, blocks);
+
+ int label = block->label();
+ Block *label_block = blocks->adr_at(label);
+
+ int ancestor_label = ancestor_block->label();
+ Block *ancestor_label_block = blocks->adr_at(label);
+ if (ancestor_label_block->semi() < label_block->semi()) {
+ block->set_label(ancestor_label);
+ }
+
+ block->set_ancestor(ancestor_block->ancestor());
+ }
+}
+
+int IdealGraphPrinter::eval(int index, GrowableArray<Block>* blocks) {
+ Block *block = blocks->adr_at(index);
+ if (block->ancestor() == -1) {
+ return index;
+ } else {
+ compress(index, blocks);
+ return block->label();
+ }
+}
+
+void IdealGraphPrinter::link(int index1, int index2, GrowableArray<Block>* blocks) {
+ Block *block2 = blocks->adr_at(index2);
+ block2->set_ancestor(index1);
+}
+
+void IdealGraphPrinter::build_dominators(GrowableArray<Block>* blocks) {
+
+ if (blocks->length() == 0) return;
+
+ GrowableArray<int> stack;
+ stack.append(0);
+
+ GrowableArray<Block *> array;
+
+ assert(blocks->length() > 0, "");
+ blocks->adr_at(0)->set_dominator(0);
+
+ int n = 0;
+ while(!stack.is_empty()) {
+ int index = stack.pop();
+ Block *block = blocks->adr_at(index);
+ block->set_semi(n);
+ array.append(block);
+ n = n + 1;
+ for (int i = 0; i < block->succs()->length(); i++) {
+ int succ_index = block->succs()->at(i);
+ Block *succ = blocks->adr_at(succ_index);
+ if (succ->semi() == -1) {
+ succ->set_parent(index);
+ stack.push(succ_index);
+ }
+ succ->add_pred(index);
+ }
+ }
+
+ for (int i=n-1; i>0; i--) {
+ Block *block = array.at(i);
+ int block_index = block->index();
+ for (int j=0; j<block->pred()->length(); j++) {
+ int pred_index = block->pred()->at(j);
+ int cur_index = eval(pred_index, blocks);
+
+ Block *cur_block = blocks->adr_at(cur_index);
+ if (cur_block->semi() < block->semi()) {
+ block->set_semi(cur_block->semi());
+ }
+ }
+
+ int semi_index = block->semi();
+ Block *semi_block = array.at(semi_index);
+ semi_block->add_to_bucket(block_index);
+
+ link(block->parent(), block_index, blocks);
+ Block *parent_block = blocks->adr_at(block->parent());
+
+ for (int j=0; j<parent_block->bucket()->length(); j++) {
+ int cur_index = parent_block->bucket()->at(j);
+ int new_index = eval(cur_index, blocks);
+ Block *cur_block = blocks->adr_at(cur_index);
+ Block *new_block = blocks->adr_at(new_index);
+ int dom = block->parent();
+
+ if (new_block->semi() < cur_block->semi()) {
+ dom = new_index;
+ }
+
+ cur_block->set_dominator(dom);
+ }
+
+ parent_block->clear_bucket();
+ }
+
+ for (int i=1; i < n; i++) {
+
+ Block *block = array.at(i);
+ int block_index = block->index();
+
+ int semi_index = block->semi();
+ Block *semi_block = array.at(semi_index);
+
+ if (block->dominator() != semi_block->index()) {
+ int new_dom = blocks->adr_at(block->dominator())->dominator();
+ block->set_dominator(new_dom);
+ }
+ }
+
+ for (int i = 0; i < blocks->length(); i++) {
+ if (blocks->adr_at(i)->dominator() == -1) {
+ blocks->adr_at(i)->set_dominator(0);
+ }
+ }
+
+ // Build dominates array
+ for (int i=1; i < blocks->length(); i++) {
+ Block *block = blocks->adr_at(i);
+ int dominator = block->dominator();
+ Block *dom_block = blocks->adr_at(dominator);
+ dom_block->add_dominates(i);
+ dom_block->add_child(i);
+
+ while(dominator != 0) {
+ dominator = dom_block->dominator();
+ dom_block = blocks->adr_at(dominator);
+ dom_block->add_child(i);
+ }
+ }
+}
+
+void IdealGraphPrinter::build_common_dominator(int **common_dominator, int index, GrowableArray<Block>* blocks) {
+
+ common_dominator[index][index] = index;
+ Block *block = blocks->adr_at(index);
+ for (int i = 0; i < block->dominates()->length(); i++) {
+ Block *dominated = blocks->adr_at(block->dominates()->at(i));
+
+ for (int j=0; j<dominated->children()->length(); j++) {
+ Block *child = blocks->adr_at(dominated->children()->at(j));
+ common_dominator[index][child->index()] = common_dominator[child->index()][index] = index;
+
+ for (int k=0; k<i; k++) {
+ Block *other_dominated = blocks->adr_at(block->dominates()->at(k));
+ common_dominator[child->index()][other_dominated->index()] = common_dominator[other_dominated->index()][child->index()] = index;
+
+ for (int l=0 ; l<other_dominated->children()->length(); l++) {
+ Block *other_child = blocks->adr_at(other_dominated->children()->at(l));
+ common_dominator[child->index()][other_child->index()] = common_dominator[other_child->index()][child->index()] = index;
+ }
+ }
+ }
+
+ build_common_dominator(common_dominator, dominated->index(), blocks);
+ }
+}
+
+void IdealGraphPrinter::schedule_latest(int **common_dominator, GrowableArray<Block>* blocks) {
+
+ int queue_size = _nodes.length() + 1;
+ NodeDescription **queue = NEW_RESOURCE_ARRAY(NodeDescription *, queue_size);
+ int queue_start = 0;
+ int queue_end = 0;
+ Arena *a = new Arena();
+ VectorSet on_queue(a);
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc) {
+ desc->init_succs();
+ }
+ }
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc) {
+ for (uint j=0; j<desc->node()->len(); j++) {
+ Node *n = desc->node()->in(j);
+ if (n) {
+ NodeDescription *other_desc = _nodes.at(n->_idx);
+ other_desc->add_succ(desc);
+ }
+ }
+ }
+ }
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc && desc->block_index() == -1) {
+
+ // Put Phi into same block as region
+ if (desc->node()->is_Phi() && desc->node()->in(0) && _nodes.at(desc->node()->in(0)->_idx)->block_index() != -1) {
+ int index = _nodes.at(desc->node()->in(0)->_idx)->block_index();
+ desc->set_block_index(index);
+ blocks->adr_at(index)->add_node(desc);
+
+ // Put Projections to same block as parent
+ } else if (desc->node()->is_block_proj() && _nodes.at(desc->node()->is_block_proj()->_idx)->block_index() != -1) {
+ int index = _nodes.at(desc->node()->is_block_proj()->_idx)->block_index();
+ desc->set_block_index(index);
+ blocks->adr_at(index)->add_node(desc);
+ } else {
+ queue[queue_end] = desc;
+ queue_end++;
+ on_queue.set(desc->node()->_idx);
+ }
+ }
+ }
+
+
+ int z = 0;
+ while(queue_start != queue_end && z < 10000) {
+
+ NodeDescription *desc = queue[queue_start];
+ queue_start = (queue_start + 1) % queue_size;
+ on_queue >>= desc->node()->_idx;
+
+ Node* node = desc->node();
+
+ if (desc->succs()->length() == 0) {
+ int x = 0;
+ }
+
+ int block_index = -1;
+ if (desc->succs()->length() != 0) {
+ for (int i = 0; i < desc->succs()->length(); i++) {
+ NodeDescription *cur_desc = desc->succs()->at(i);
+ if (cur_desc != desc) {
+ if (cur_desc->succs()->length() == 0) {
+
+ // Ignore nodes with 0 successors
+
+ } else if (cur_desc->block_index() == -1) {
+
+ // Let this node schedule first
+ block_index = -1;
+ break;
+
+ } else if (cur_desc->node()->is_Phi()){
+
+ // Special treatment for Phi functions
+ PhiNode *phi = cur_desc->node()->as_Phi();
+ assert(phi->in(0) && phi->in(0)->is_Region(), "Must have region node in first input");
+ RegionNode *region = phi->in(0)->as_Region();
+
+ for (uint j=1; j<phi->len(); j++) {
+ Node *cur_phi_input = phi->in(j);
+ if (cur_phi_input == desc->node() && region->in(j)) {
+ NodeDescription *cur_region_input = _nodes.at(region->in(j)->_idx);
+ if (cur_region_input->block_index() == -1) {
+
+ // Let this node schedule first
+ block_index = -1;
+ break;
+ } else {
+ if (block_index == -1) {
+ block_index = cur_region_input->block_index();
+ } else {
+ block_index = common_dominator[block_index][cur_region_input->block_index()];
+ }
+ }
+ }
+ }
+
+ } else {
+ if (block_index == -1) {
+ block_index = cur_desc->block_index();
+ } else {
+ block_index = common_dominator[block_index][cur_desc->block_index()];
+ }
+ }
+ }
+ }
+ }
+
+ if (block_index == -1) {
+ queue[queue_end] = desc;
+ queue_end = (queue_end + 1) % queue_size;
+ on_queue.set(desc->node()->_idx);
+ z++;
+ } else {
+ assert(desc->block_index() == -1, "");
+ desc->set_block_index(block_index);
+ blocks->adr_at(block_index)->add_node(desc);
+ z = 0;
+ }
+ }
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc && desc->block_index() == -1) {
+
+ //if (desc->node()->is_Proj() || desc->node()->is_Con()) {
+ Node *parent = desc->node()->in(0);
+ uint cur = 1;
+ while(!parent && cur < desc->node()->len()) {
+ parent = desc->node()->in(cur);
+ cur++;
+ }
+
+ if (parent && _nodes.at(parent->_idx)->block_index() != -1) {
+ int index = _nodes.at(parent->_idx)->block_index();
+ desc->set_block_index(index);
+ blocks->adr_at(index)->add_node(desc);
+ } else {
+ desc->set_block_index(0);
+ blocks->adr_at(0)->add_node(desc);
+ //ShouldNotReachHere();
+ }
+ //}
+ /*
+ if (desc->node()->is_block_proj() && _nodes.at(desc->node()->is_block_proj()->_idx)->block_index() != -1) {
+ int index = _nodes.at(desc->node()->is_block_proj()->_idx)->block_index();
+ desc->set_block_index(index);
+ blocks->adr_at(index)->add_node(desc);
+ } */
+ }
+ }
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc) {
+ desc->clear_succs();
+ }
+ }
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc) {
+ int block_index = desc->block_index();
+
+ assert(block_index >= 0 && block_index < blocks->length(), "Block index must be in range");
+ assert(blocks->adr_at(block_index)->nodes()->contains(desc), "Node must be child of block");
+ }
+ }
+ a->destruct_contents();
+}
+
+void IdealGraphPrinter::build_blocks(Node *root) {
+
+ Arena *a = new Arena();
+ Node_Stack stack(a, 100);
+
+ VectorSet visited(a);
+ stack.push(root, 0);
+ GrowableArray<Block> blocks(a, 2, 0, Block(0));
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ if (_nodes.at(i)) _nodes.at(i)->set_block_index(-1);
+ }
+
+
+ // Order nodes such that node index is equal to idx
+ for (int i = 0; i < _nodes.length(); i++) {
+
+ if (_nodes.at(i)) {
+ NodeDescription *node = _nodes.at(i);
+ int index = node->node()->_idx;
+ if (index != i) {
+ _nodes.at_grow(index);
+ NodeDescription *tmp = _nodes.at(index);
+ *(_nodes.adr_at(index)) = node;
+ *(_nodes.adr_at(i)) = tmp;
+ i--;
+ }
+ }
+ }
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *node = _nodes.at(i);
+ if (node) {
+ assert(node->node()->_idx == (uint)i, "");
+ }
+ }
+
+ while(stack.is_nonempty()) {
+
+ //Node *n = stack.node();
+ //int index = stack.index();
+ Node *proj = stack.node();//n->in(index);
+ const Node *parent = proj->is_block_proj();
+ if (parent == NULL) {
+ parent = proj;
+ }
+
+ if (!visited.test_set(parent->_idx)) {
+
+ NodeDescription *end_desc = _nodes.at(parent->_idx);
+ int block_index = blocks.length();
+ Block block(block_index);
+ blocks.append(block);
+ Block *b = blocks.adr_at(block_index);
+ b->set_start(end_desc);
+ // assert(end_desc->block_index() == -1, "");
+ end_desc->set_block_index(block_index);
+ b->add_node(end_desc);
+
+ // Skip any control-pinned middle'in stuff
+ Node *p = proj;
+ NodeDescription *start_desc = NULL;
+ do {
+ proj = p; // Update pointer to last Control
+ if (p->in(0) == NULL) {
+ start_desc = end_desc;
+ break;
+ }
+ p = p->in(0); // Move control forward
+ start_desc = _nodes.at(p->_idx);
+ assert(start_desc, "");
+
+ if (start_desc != end_desc && start_desc->block_index() == -1) {
+ assert(start_desc->block_index() == -1, "");
+ assert(block_index < blocks.length(), "");
+ start_desc->set_block_index(block_index);
+ b->add_node(start_desc);
+ }
+ } while( !p->is_block_proj() &&
+ !p->is_block_start() );
+
+ for (uint i = 0; i < start_desc->node()->len(); i++) {
+
+ Node *pred_node = start_desc->node()->in(i);
+
+
+ if (pred_node && pred_node != start_desc->node()) {
+ const Node *cur_parent = pred_node->is_block_proj();
+ if (cur_parent != NULL) {
+ pred_node = (Node *)cur_parent;
+ }
+
+ NodeDescription *pred_node_desc = _nodes.at(pred_node->_idx);
+ if (pred_node_desc->block_index() != -1) {
+ blocks.adr_at(pred_node_desc->block_index())->add_succ(block_index);
+ }
+ }
+ }
+
+ for (DUIterator_Fast dmax, i = end_desc->node()->fast_outs(dmax); i < dmax; i++) {
+ Node* cur_succ = end_desc->node()->fast_out(i);
+ NodeDescription *cur_succ_desc = _nodes.at(cur_succ->_idx);
+
+ DUIterator_Fast dmax2, i2 = cur_succ->fast_outs(dmax2);
+ if (cur_succ->is_block_proj() && i2 < dmax2 && !cur_succ->is_Root()) {
+
+ for (; i2<dmax2; i2++) {
+ Node *cur_succ2 = cur_succ->fast_out(i2);
+ if (cur_succ2) {
+ cur_succ_desc = _nodes.at(cur_succ2->_idx);
+ if (cur_succ_desc == NULL) {
+ // dead node so skip it
+ continue;
+ }
+ if (cur_succ2 != end_desc->node() && cur_succ_desc->block_index() != -1) {
+ b->add_succ(cur_succ_desc->block_index());
+ }
+ }
+ }
+
+ } else {
+
+ if (cur_succ != end_desc->node() && cur_succ_desc && cur_succ_desc->block_index() != -1) {
+ b->add_succ(cur_succ_desc->block_index());
+ }
+ }
+ }
+
+
+ int num_preds = p->len();
+ int bottom = -1;
+ if (p->is_Region() || p->is_Phi()) {
+ bottom = 0;
+ }
+
+ int pushed = 0;
+ for (int i=num_preds - 1; i > bottom; i--) {
+ if (p->in(i) != NULL && p->in(i) != p) {
+ stack.push(p->in(i), 0);
+ pushed++;
+ }
+ }
+
+ if (pushed == 0 && p->is_Root() && !_matcher) {
+ // Special case when backedges to root are not yet built
+ for (int i = 0; i < _nodes.length(); i++) {
+ if (_nodes.at(i) && _nodes.at(i)->node()->is_SafePoint() && _nodes.at(i)->node()->outcnt() == 0) {
+ stack.push(_nodes.at(i)->node(), 0);
+ }
+ }
+ }
+
+ } else {
+ stack.pop();
+ }
+ }
+
+ build_dominators(&blocks);
+
+ int **common_dominator = NEW_RESOURCE_ARRAY(int *, blocks.length());
+ for (int i = 0; i < blocks.length(); i++) {
+ int *cur = NEW_RESOURCE_ARRAY(int, blocks.length());
+ common_dominator[i] = cur;
+
+ for (int j=0; j<blocks.length(); j++) {
+ cur[j] = 0;
+ }
+ }
+
+ for (int i = 0; i < blocks.length(); i++) {
+ blocks.adr_at(i)->add_child(blocks.adr_at(i)->index());
+ }
+ build_common_dominator(common_dominator, 0, &blocks);
+
+ schedule_latest(common_dominator, &blocks);
+
+ start_element(CONTROL_FLOW_ELEMENT);
+
+ for (int i = 0; i < blocks.length(); i++) {
+ Block *block = blocks.adr_at(i);
+
+ Properties props;
+ props.add(new Property(BLOCK_NAME_PROPERTY, i));
+ props.add(new Property(BLOCK_DOMINATOR_PROPERTY, block->dominator()));
+ start_element(BLOCK_ELEMENT, &props);
+
+ if (block->succs()->length() > 0) {
+ start_element(SUCCESSORS_ELEMENT);
+ for (int j=0; j<block->succs()->length(); j++) {
+ int cur_index = block->succs()->at(j);
+ if (cur_index != 0 /* start_block has must not have inputs */) {
+ Properties properties;
+ properties.add(new Property(BLOCK_NAME_PROPERTY, cur_index));
+ simple_element(SUCCESSOR_ELEMENT, &properties);
+ }
+ }
+ end_element(SUCCESSORS_ELEMENT);
+ }
+
+ start_element(NODES_ELEMENT);
+
+ for (int j=0; j<block->nodes()->length(); j++) {
+ NodeDescription *n = block->nodes()->at(j);
+ Properties properties;
+ properties.add(new Property(NODE_ID_PROPERTY, n->id()));
+ simple_element(NODE_ELEMENT, &properties);
+ }
+
+ end_element(NODES_ELEMENT);
+
+ end_element(BLOCK_ELEMENT);
+ }
+
+
+ end_element(CONTROL_FLOW_ELEMENT);
+
+ a->destruct_contents();
+}
+
+void IdealGraphPrinter::print_method(Compile* compile, const char *name, int level, bool clear_nodes) {
+ print(compile, name, (Node *)compile->root(), level, clear_nodes);
+}
+
+// Print current ideal graph
+void IdealGraphPrinter::print(Compile* compile, const char *name, Node *node, int level, bool clear_nodes) {
+
+// if (finish && !in_method) return;
+ if (!_current_method || !_should_send_method || level > PrintIdealGraphLevel) return;
+
+ assert(_current_method, "newMethod has to be called first!");
+
+ if (clear_nodes) {
+ int x = 0;
+ }
+
+ _clear_nodes = clear_nodes;
+
+ // Warning, unsafe cast?
+ _chaitin = (PhaseChaitin *)compile->regalloc();
+ _matcher = compile->matcher();
+
+
+ // Update nodes
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc) {
+ desc->set_state(Invalid);
+ }
+ }
+ Node *n = node;
+ walk(n);
+
+ // Update edges
+ for (int i = 0; i < _edges.length(); i++) {
+ _edges.at(i)->set_state(Invalid);
+ }
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc && desc->state() != Invalid) {
+
+ int to = desc->id();
+ uint len = desc->node()->len();
+ for (uint j=0; j<len; j++) {
+ Node *n = desc->node()->in(j);
+
+ if (n) {
+
+
+ intptr_t from = (intptr_t)n;
+
+ // Assert from node is valid
+ /*
+ bool ok = false;
+ for (int k=0; k<_nodes.length(); k++) {
+ NodeDescription *desc = _nodes.at(k);
+ if (desc && desc->id() == from) {
+ assert(desc->state() != Invalid, "");
+ ok = true;
+ }
+ }
+ assert(ok, "");*/
+
+ uint index = j;
+ if (index >= desc->node()->req()) {
+ index = desc->node()->req();
+ }
+
+ print_edge(from, to, index);
+ }
+ }
+ }
+ }
+
+ bool is_different = false;
+
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc && desc->state() != Valid) {
+ is_different = true;
+ break;
+ }
+ }
+
+ if (!is_different) {
+ for (int i = 0; i < _edges.length(); i++) {
+ EdgeDescription *conn = _edges.at(i);
+ if (conn && conn->state() != Valid) {
+ is_different = true;
+ break;
+ }
+ }
+ }
+
+ // No changes -> do not print graph
+ if (!is_different) return;
+
+ Properties properties;
+ properties.add(new Property(GRAPH_NAME_PROPERTY, (const char *)name));
+ start_element(GRAPH_ELEMENT, &properties);
+
+ start_element(NODES_ELEMENT);
+ for (int i = 0; i < _nodes.length(); i++) {
+ NodeDescription *desc = _nodes.at(i);
+ if (desc) {
+ desc->print(this);
+ if (desc->state() == Invalid) {
+ delete desc;
+ _nodes.at_put(i, NULL);
+ } else {
+ desc->set_state(Valid);
+ }
+ }
+ }
+ end_element(NODES_ELEMENT);
+
+ build_blocks(node);
+
+ start_element(EDGES_ELEMENT);
+ for (int i = 0; i < _edges.length(); i++) {
+ EdgeDescription *conn = _edges.at(i);
+
+ // Assert from and to nodes are valid
+ /*
+ if (!conn->state() == Invalid) {
+ bool ok1 = false;
+ bool ok2 = false;
+ for (int j=0; j<_nodes.length(); j++) {
+ NodeDescription *desc = _nodes.at(j);
+ if (desc && desc->id() == conn->from()) {
+ ok1 = true;
+ }
+
+ if (desc && desc->id() == conn->to()) {
+ ok2 = true;
+ }
+ }
+
+ assert(ok1, "from node not found!");
+ assert(ok2, "to node not found!");
+ }*/
+
+ conn->print(this);
+ if (conn->state() == Invalid) {
+ _edges.remove_at(i);
+ delete conn;
+ i--;
+ }
+ }
+
+ end_element(EDGES_ELEMENT);
+
+ end_element(GRAPH_ELEMENT);
+
+ _output->flush();
+}
+
+// Print edge
+void IdealGraphPrinter::print_edge(int from, int to, int index) {
+
+ EdgeDescription *conn = new EdgeDescription(from, to, index);
+ for (int i = 0; i < _edges.length(); i++) {
+ if (_edges.at(i)->equals(conn)) {
+ conn->set_state(Valid);
+ delete _edges.at(i);
+ _edges.at_put(i, conn);
+ return;
+ }
+ }
+
+ _edges.append(conn);
+}
+
+extern const char *NodeClassNames[];
+
+// Create node description
+IdealGraphPrinter::NodeDescription *IdealGraphPrinter::create_node_description(Node* node) {
+
+#ifndef PRODUCT
+ node->_in_dump_cnt++;
+ NodeDescription *desc = new NodeDescription(node);
+ desc->properties()->add(new Property(NODE_NAME_PROPERTY, (const char *)node->Name()));
+
+ const Type *t = node->bottom_type();
+ desc->properties()->add(new Property("type", (const char *)Type::msg[t->base()]));
+
+ desc->properties()->add(new Property("idx", node->_idx));
+#ifdef ASSERT
+ desc->properties()->add(new Property("debug_idx", node->_debug_idx));
+#endif
+
+
+ const jushort flags = node->flags();
+ if (flags & Node::Flag_is_Copy) {
+ desc->properties()->add(new Property("is_copy", "true"));
+ }
+ if (flags & Node::Flag_is_Call) {
+ desc->properties()->add(new Property("is_call", "true"));
+ }
+ if (flags & Node::Flag_rematerialize) {
+ desc->properties()->add(new Property("rematerialize", "true"));
+ }
+ if (flags & Node::Flag_needs_anti_dependence_check) {
+ desc->properties()->add(new Property("needs_anti_dependence_check", "true"));
+ }
+ if (flags & Node::Flag_is_macro) {
+ desc->properties()->add(new Property("is_macro", "true"));
+ }
+ if (flags & Node::Flag_is_Con) {
+ desc->properties()->add(new Property("is_con", "true"));
+ }
+ if (flags & Node::Flag_is_cisc_alternate) {
+ desc->properties()->add(new Property("is_cisc_alternate", "true"));
+ }
+ if (flags & Node::Flag_is_Branch) {
+ desc->properties()->add(new Property("is_branch", "true"));
+ }
+ if (flags & Node::Flag_is_block_start) {
+ desc->properties()->add(new Property("is_block_start", "true"));
+ }
+ if (flags & Node::Flag_is_Goto) {
+ desc->properties()->add(new Property("is_goto", "true"));
+ }
+ if (flags & Node::Flag_is_dead_loop_safe) {
+ desc->properties()->add(new Property("is_dead_loop_safe", "true"));
+ }
+ if (flags & Node::Flag_may_be_short_branch) {
+ desc->properties()->add(new Property("may_be_short_branch", "true"));
+ }
+ if (flags & Node::Flag_is_safepoint_node) {
+ desc->properties()->add(new Property("is_safepoint_node", "true"));
+ }
+ if (flags & Node::Flag_is_pc_relative) {
+ desc->properties()->add(new Property("is_pc_relative", "true"));
+ }
+
+ if (_matcher) {
+ if (_matcher->is_shared(desc->node())) {
+ desc->properties()->add(new Property("is_shared", "true"));
+ } else {
+ desc->properties()->add(new Property("is_shared", "false"));
+ }
+
+ if (_matcher->is_dontcare(desc->node())) {
+ desc->properties()->add(new Property("is_dontcare", "true"));
+ } else {
+ desc->properties()->add(new Property("is_dontcare", "false"));
+ }
+ }
+
+ if (node->is_Proj()) {
+ desc->properties()->add(new Property("con", (int)node->as_Proj()->_con));
+ }
+
+ if (node->is_Mach()) {
+ desc->properties()->add(new Property("idealOpcode", (const char *)NodeClassNames[node->as_Mach()->ideal_Opcode()]));
+ }
+
+
+
+
+
+ outputStream *oldTty = tty;
+ buffer[0] = 0;
+ stringStream s2(buffer, sizeof(buffer) - 1);
+
+ node->dump_spec(&s2);
+ assert(s2.size() < sizeof(buffer), "size in range");
+ desc->properties()->add(new Property("dump_spec", buffer));
+
+ if (node->is_block_proj()) {
+ desc->properties()->add(new Property("is_block_proj", "true"));
+ }
+
+ if (node->is_block_start()) {
+ desc->properties()->add(new Property("is_block_start", "true"));
+ }
+
+ const char *short_name = "short_name";
+ if (strcmp(node->Name(), "Parm") == 0 && node->as_Proj()->_con >= TypeFunc::Parms) {
+ int index = node->as_Proj()->_con - TypeFunc::Parms;
+ if (index >= 10) {
+ desc->properties()->add(new Property(short_name, "PA"));
+ } else {
+ sprintf(buffer, "P%d", index);
+ desc->properties()->add(new Property(short_name, buffer));
+ }
+ } else if (strcmp(node->Name(), "IfTrue") == 0) {
+ desc->properties()->add(new Property(short_name, "T"));
+ } else if (strcmp(node->Name(), "IfFalse") == 0) {
+ desc->properties()->add(new Property(short_name, "F"));
+ } else if ((node->is_Con() && node->is_Type()) || node->is_Proj()) {
+
+ if (t->base() == Type::Int && t->is_int()->is_con()) {
+ const TypeInt *typeInt = t->is_int();
+ assert(typeInt->is_con(), "must be constant");
+ jint value = typeInt->get_con();
+
+ // max. 2 chars allowed
+ if (value >= -9 && value <= 99) {
+ sprintf(buffer, "%d", value);
+ desc->properties()->add(new Property(short_name, buffer));
+ }
+ else
+ {
+ desc->properties()->add(new Property(short_name, "I"));
+ }
+ } else if (t == Type::TOP) {
+ desc->properties()->add(new Property(short_name, "^"));
+ } else if (t->base() == Type::Long && t->is_long()->is_con()) {
+ const TypeLong *typeLong = t->is_long();
+ assert(typeLong->is_con(), "must be constant");
+ jlong value = typeLong->get_con();
+
+ // max. 2 chars allowed
+ if (value >= -9 && value <= 99) {
+ sprintf(buffer, "%d", value);
+ desc->properties()->add(new Property(short_name, buffer));
+ }
+ else
+ {
+ desc->properties()->add(new Property(short_name, "L"));
+ }
+ } else if (t->base() == Type::KlassPtr) {
+ const TypeKlassPtr *typeKlass = t->is_klassptr();
+ desc->properties()->add(new Property(short_name, "CP"));
+ } else if (t->base() == Type::Control) {
+ desc->properties()->add(new Property(short_name, "C"));
+ } else if (t->base() == Type::Memory) {
+ desc->properties()->add(new Property(short_name, "M"));
+ } else if (t->base() == Type::Abio) {
+ desc->properties()->add(new Property(short_name, "IO"));
+ } else if (t->base() == Type::Return_Address) {
+ desc->properties()->add(new Property(short_name, "RA"));
+ } else if (t->base() == Type::AnyPtr) {
+ desc->properties()->add(new Property(short_name, "P"));
+ } else if (t->base() == Type::RawPtr) {
+ desc->properties()->add(new Property(short_name, "RP"));
+ } else if (t->base() == Type::AryPtr) {
+ desc->properties()->add(new Property(short_name, "AP"));
+ }
+ }
+
+ if (node->is_SafePoint()) {
+ SafePointNode *safePointNode = node->as_SafePoint();
+ if (safePointNode->jvms()) {
+ stringStream bciStream;
+ bciStream.print("%d ", safePointNode->jvms()->bci());
+ JVMState *caller = safePointNode->jvms()->caller();
+ while(caller) {
+ bciStream.print("%d ", caller->bci());
+
+ caller = caller->caller();
+ }
+ desc->properties()->add(new Property("bci", bciStream.as_string()));
+ }
+ }
+
+ if (_chaitin && _chaitin != (PhaseChaitin *)0xdeadbeef) {
+ buffer[0] = 0;
+ _chaitin->dump_register(node, buffer);
+ desc->properties()->add(new Property("reg", buffer));
+ desc->properties()->add(new Property("lrg", _chaitin->n2lidx(node)));
+ }
+
+
+ node->_in_dump_cnt--;
+ return desc;
+#else
+ return NULL;
+#endif
+}
+
+void IdealGraphPrinter::pre_node(Node* node, void *env) {
+
+ IdealGraphPrinter *printer = (IdealGraphPrinter *)env;
+
+ NodeDescription *newDesc = printer->create_node_description(node);
+
+ if (printer->_clear_nodes) {
+
+ printer->_nodes.append(newDesc);
+ } else {
+
+ NodeDescription *desc = printer->_nodes.at_grow(node->_idx, NULL);
+
+ if (desc && desc->equals(newDesc)) {
+ //desc->set_state(Valid);
+ //desc->set_node(node);
+ delete desc;
+ printer->_nodes.at_put(node->_idx, NULL);
+ newDesc->set_state(Valid);
+ //printer->_nodes.at_put(node->_idx, newDesc);
+ } else {
+
+ if (desc && desc->id() == newDesc->id()) {
+ delete desc;
+ printer->_nodes.at_put(node->_idx, NULL);
+ newDesc->set_state(New);
+
+ }
+
+ //if (desc) {
+ // delete desc;
+ //}
+
+ //printer->_nodes.at_put(node->_idx, newDesc);
+ }
+
+ printer->_nodes.append(newDesc);
+ }
+}
+
+void IdealGraphPrinter::post_node(Node* node, void *env) {
+}
+
+outputStream *IdealGraphPrinter::output() {
+ return _output;
+}
+
+IdealGraphPrinter::Description::Description() {
+ _state = New;
+}
+
+void IdealGraphPrinter::Description::print(IdealGraphPrinter *printer) {
+ if (_state == Invalid) {
+ print_removed(printer);
+ } else if (_state == New) {
+ print_changed(printer);
+ }
+}
+
+void IdealGraphPrinter::Description::set_state(State s) {
+ _state = s;
+}
+
+IdealGraphPrinter::State IdealGraphPrinter::Description::state() {
+ return _state;
+}
+
+void IdealGraphPrinter::Block::set_proj(NodeDescription *n) {
+ _proj = n;
+}
+
+void IdealGraphPrinter::Block::set_start(NodeDescription *n) {
+ _start = n;
+}
+
+int IdealGraphPrinter::Block::semi() {
+ return _semi;
+}
+
+int IdealGraphPrinter::Block::parent() {
+ return _parent;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::bucket() {
+ return &_bucket;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::children() {
+ return &_children;
+}
+
+void IdealGraphPrinter::Block::add_child(int i) {
+ _children.append(i);
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::dominates() {
+ return &_dominates;
+}
+
+void IdealGraphPrinter::Block::add_dominates(int i) {
+ _dominates.append(i);
+}
+
+void IdealGraphPrinter::Block::add_to_bucket(int i) {
+ _bucket.append(i);
+}
+
+void IdealGraphPrinter::Block::clear_bucket() {
+ _bucket.clear();
+}
+
+void IdealGraphPrinter::Block::set_dominator(int i) {
+ _dominator = i;
+}
+
+void IdealGraphPrinter::Block::set_label(int i) {
+ _label = i;
+}
+
+int IdealGraphPrinter::Block::label() {
+ return _label;
+}
+
+int IdealGraphPrinter::Block::ancestor() {
+ return _ancestor;
+}
+
+void IdealGraphPrinter::Block::set_ancestor(int i) {
+ _ancestor = i;
+}
+
+int IdealGraphPrinter::Block::dominator() {
+ return _dominator;
+}
+
+int IdealGraphPrinter::Block::index() {
+ return _index;
+}
+
+void IdealGraphPrinter::Block::set_parent(int i) {
+ _parent = i;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::pred() {
+ return &_pred;
+}
+
+void IdealGraphPrinter::Block::set_semi(int i) {
+ _semi = i;
+}
+
+IdealGraphPrinter::Block::Block() {
+}
+
+IdealGraphPrinter::Block::Block(int index) {
+ _index = index;
+ _label = index;
+ _semi = -1;
+ _ancestor = -1;
+ _dominator = -1;
+}
+
+void IdealGraphPrinter::Block::add_pred(int i) {
+ _pred.append(i);
+}
+
+IdealGraphPrinter::NodeDescription *IdealGraphPrinter::Block::proj() {
+ return _proj;
+}
+
+IdealGraphPrinter::NodeDescription *IdealGraphPrinter::Block::start() {
+ return _start;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::succs() {
+ return &_succs;
+}
+
+void IdealGraphPrinter::Block::add_succ(int index) {
+
+ if (this->_index == 16 && index == 15) {
+ int x = 0;
+ }
+
+ if (!_succs.contains(index)) {
+ _succs.append(index);
+ }
+}
+
+
+void IdealGraphPrinter::Block::add_node(NodeDescription *n) {
+ if (!_nodes.contains(n)) {
+ _nodes.append(n);
+ }
+}
+
+GrowableArray<IdealGraphPrinter::NodeDescription *>* IdealGraphPrinter::Block::nodes() {
+ return &_nodes;
+}
+
+int IdealGraphPrinter::NodeDescription::count = 0;
+
+IdealGraphPrinter::NodeDescription::NodeDescription(Node* node) : _node(node) {
+ _id = (intptr_t)(node);
+ _block_index = -1;
+}
+
+IdealGraphPrinter::NodeDescription::~NodeDescription() {
+ _properties.clean();
+}
+
+// void IdealGraphPrinter::NodeDescription::set_node(Node* node) {
+// //this->_node = node;
+// }
+
+int IdealGraphPrinter::NodeDescription::block_index() {
+ return _block_index;
+}
+
+
+GrowableArray<IdealGraphPrinter::NodeDescription *>* IdealGraphPrinter::NodeDescription::succs() {
+ return &_succs;
+}
+
+void IdealGraphPrinter::NodeDescription::clear_succs() {
+ _succs.clear();
+}
+
+void IdealGraphPrinter::NodeDescription::init_succs() {
+ _succs = GrowableArray<NodeDescription *>();
+}
+
+void IdealGraphPrinter::NodeDescription::add_succ(NodeDescription *desc) {
+ _succs.append(desc);
+}
+
+void IdealGraphPrinter::NodeDescription::set_block_index(int i) {
+ _block_index = i;
+}
+
+bool IdealGraphPrinter::NodeDescription::equals(NodeDescription *desc) {
+ if (desc == NULL) return false;
+ if (desc->id() != id()) return false;
+ return properties()->equals(desc->properties());
+}
+
+Node* IdealGraphPrinter::NodeDescription::node() {
+ return _node;
+}
+
+IdealGraphPrinter::Properties* IdealGraphPrinter::NodeDescription::properties() {
+ return &_properties;
+}
+
+uint IdealGraphPrinter::NodeDescription::id() {
+ return _id;
+}
+
+void IdealGraphPrinter::NodeDescription::print_changed(IdealGraphPrinter *printer) {
+
+
+ Properties properties;
+ properties.add(new Property(NODE_ID_PROPERTY, id()));
+ printer->start_element(NODE_ELEMENT, &properties);
+
+ this->properties()->print(printer);
+
+
+ printer->end_element(NODE_ELEMENT);
+}
+
+void IdealGraphPrinter::NodeDescription::print_removed(IdealGraphPrinter *printer) {
+
+ Properties properties;
+ properties.add(new Property(NODE_ID_PROPERTY, id()));
+ printer->simple_element(REMOVE_NODE_ELEMENT, &properties);
+}
+
+IdealGraphPrinter::EdgeDescription::EdgeDescription(int from, int to, int index) {
+ this->_from = from;
+ this->_to = to;
+ this->_index = index;
+}
+
+IdealGraphPrinter::EdgeDescription::~EdgeDescription() {
+}
+
+int IdealGraphPrinter::EdgeDescription::from() {
+ return _from;
+}
+
+int IdealGraphPrinter::EdgeDescription::to() {
+ return _to;
+}
+
+void IdealGraphPrinter::EdgeDescription::print_changed(IdealGraphPrinter *printer) {
+
+ Properties properties;
+ properties.add(new Property(INDEX_PROPERTY, _index));
+ properties.add(new Property(FROM_PROPERTY, _from));
+ properties.add(new Property(TO_PROPERTY, _to));
+ printer->simple_element(EDGE_ELEMENT, &properties);
+}
+
+void IdealGraphPrinter::EdgeDescription::print_removed(IdealGraphPrinter *printer) {
+
+ Properties properties;
+ properties.add(new Property(INDEX_PROPERTY, _index));
+ properties.add(new Property(FROM_PROPERTY, _from));
+ properties.add(new Property(TO_PROPERTY, _to));
+ printer->simple_element(REMOVE_EDGE_ELEMENT, &properties);
+}
+
+bool IdealGraphPrinter::EdgeDescription::equals(IdealGraphPrinter::EdgeDescription *desc) {
+ if (desc == NULL) return false;
+ return (_from == desc->_from && _to == desc->_to && _index == desc->_index);
+}
+
+IdealGraphPrinter::Properties::Properties() : list(new (ResourceObj::C_HEAP) GrowableArray<Property *>(2, 0, NULL, true)) {
+}
+
+IdealGraphPrinter::Properties::~Properties() {
+ clean();
+ delete list;
+}
+
+void IdealGraphPrinter::Properties::add(Property *p) {
+ assert(p != NULL, "Property not NULL");
+ list->append(p);
+}
+
+void IdealGraphPrinter::Properties::print(IdealGraphPrinter *printer) {
+ printer->start_element(PROPERTIES_ELEMENT);
+
+ for (int i = 0; i < list->length(); i++) {
+ list->at(i)->print(printer);
+ }
+
+ printer->end_element(PROPERTIES_ELEMENT);
+}
+
+void IdealGraphPrinter::Properties::clean() {
+ for (int i = 0; i < list->length(); i++) {
+ delete list->at(i);
+ list->at_put(i, NULL);
+ }
+ list->clear();
+ assert(list->length() == 0, "List cleared");
+}
+
+void IdealGraphPrinter::Properties::remove(const char *name) {
+ for (int i = 0; i < list->length(); i++) {
+ if (strcmp(list->at(i)->name(), name) == 0) {
+ delete list->at(i);
+ list->remove_at(i);
+ i--;
+ }
+ }
+}
+
+void IdealGraphPrinter::Properties::print_as_attributes(IdealGraphPrinter *printer) {
+
+ for (int i = 0; i < list->length(); i++) {
+ assert(list->at(i) != NULL, "Property not null!");
+ printer->output()->print(" ");
+ list->at(i)->print_as_attribute(printer);
+ }
+}
+
+bool IdealGraphPrinter::Properties::equals(Properties* p) {
+ if (p->list->length() != this->list->length()) return false;
+
+ for (int i = 0; i < list->length(); i++) {
+ assert(list->at(i) != NULL, "Property not null!");
+ if (!list->at(i)->equals(p->list->at(i))) return false;
+ }
+
+ return true;
+}
+
+IdealGraphPrinter::Property::Property() {
+ _name = NULL;
+ _value = NULL;
+}
+
+const char *IdealGraphPrinter::Property::name() {
+ return _name;
+}
+
+IdealGraphPrinter::Property::Property(const Property* p) {
+
+ this->_name = NULL;
+ this->_value = NULL;
+
+ if (p->_name != NULL) {
+ _name = dup(p->_name);
+ }
+
+ if (p->_value) {
+ _value = dup(p->_value);
+ }
+}
+
+IdealGraphPrinter::Property::~Property() {
+
+ clean();
+}
+
+IdealGraphPrinter::Property::Property(const char *name, const char *value) {
+
+ assert(name, "Name must not be null!");
+ assert(value, "Value must not be null!");
+
+ _name = dup(name);
+ _value = dup(value);
+}
+
+IdealGraphPrinter::Property::Property(const char *name, int intValue) {
+ _name = dup(name);
+
+ stringStream stream;
+ stream.print("%d", intValue);
+ _value = dup(stream.as_string());
+}
+
+void IdealGraphPrinter::Property::clean() {
+ if (_name) {
+ delete _name;
+ _name = NULL;
+ }
+
+ if (_value) {
+ delete _value;
+ _value = NULL;
+ }
+}
+
+
+bool IdealGraphPrinter::Property::is_null() {
+ return _name == NULL;
+}
+
+void IdealGraphPrinter::Property::print(IdealGraphPrinter *printer) {
+
+ assert(!is_null(), "null properties cannot be printed!");
+ Properties properties;
+ properties.add(new Property(PROPERTY_NAME_PROPERTY, _name));
+ printer->start_element(PROPERTY_ELEMENT, &properties, false, false);
+ printer->print_xml(_value);
+ printer->end_element(PROPERTY_ELEMENT, false, true);
+}
+
+void IdealGraphPrinter::Property::print_as_attribute(IdealGraphPrinter *printer) {
+
+ printer->output()->print(_name);
+ printer->output()->print("=\"");
+ printer->print_xml(_value);
+ printer->output()->print("\"");
+}
+
+
+bool IdealGraphPrinter::Property::equals(Property* p) {
+
+ if (is_null() && p->is_null()) return true;
+ if (is_null()) return false;
+ if (p->is_null()) return false;
+
+ int cmp1 = strcmp(p->_name, _name);
+ if (cmp1 != 0) return false;
+
+ int cmp2 = strcmp(p->_value, _value);
+ if (cmp2 != 0) return false;
+
+ return true;
+}
+
+void IdealGraphPrinter::print_xml(const char *value) {
+ size_t len = strlen(value);
+
+ char buf[2];
+ buf[1] = 0;
+ for (size_t i = 0; i < len; i++) {
+ char c = value[i];
+
+ switch(c) {
+ case '<':
+ output()->print("&lt;");
+ break;
+
+ case '>':
+ output()->print("&gt;");
+ break;
+
+ default:
+ buf[0] = c;
+ output()->print(buf);
+ break;
+ }
+ }
+}
+
+#endif
diff --git a/src/share/vm/opto/idealGraphPrinter.hpp b/src/share/vm/opto/idealGraphPrinter.hpp
new file mode 100644
index 000000000..b73493e19
--- /dev/null
+++ b/src/share/vm/opto/idealGraphPrinter.hpp
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef PRODUCT
+
+class Compile;
+class PhaseIFG;
+class PhaseChaitin;
+class Matcher;
+class Node;
+class InlineTree;
+class ciMethod;
+
+class IdealGraphPrinter
+{
+private:
+
+ enum State
+ {
+ Invalid,
+ Valid,
+ New
+ };
+
+private:
+
+ static const char *INDENT;
+ static const char *TOP_ELEMENT;
+ static const char *GROUP_ELEMENT;
+ static const char *GRAPH_ELEMENT;
+ static const char *PROPERTIES_ELEMENT;
+ static const char *EDGES_ELEMENT;
+ static const char *PROPERTY_ELEMENT;
+ static const char *EDGE_ELEMENT;
+ static const char *NODE_ELEMENT;
+ static const char *NODES_ELEMENT;
+ static const char *CONTROL_FLOW_ELEMENT;
+ static const char *REMOVE_EDGE_ELEMENT;
+ static const char *REMOVE_NODE_ELEMENT;
+ static const char *METHOD_NAME_PROPERTY;
+ static const char *BLOCK_NAME_PROPERTY;
+ static const char *BLOCK_DOMINATOR_PROPERTY;
+ static const char *BLOCK_ELEMENT;
+ static const char *SUCCESSORS_ELEMENT;
+ static const char *SUCCESSOR_ELEMENT;
+ static const char *METHOD_IS_PUBLIC_PROPERTY;
+ static const char *METHOD_IS_STATIC_PROPERTY;
+ static const char *TRUE_VALUE;
+ static const char *NODE_NAME_PROPERTY;
+ static const char *EDGE_NAME_PROPERTY;
+ static const char *NODE_ID_PROPERTY;
+ static const char *FROM_PROPERTY;
+ static const char *TO_PROPERTY;
+ static const char *PROPERTY_NAME_PROPERTY;
+ static const char *GRAPH_NAME_PROPERTY;
+ static const char *INDEX_PROPERTY;
+ static const char *METHOD_ELEMENT;
+ static const char *INLINE_ELEMENT;
+ static const char *BYTECODES_ELEMENT;
+ static const char *METHOD_BCI_PROPERTY;
+ static const char *METHOD_SHORT_NAME_PROPERTY;
+ static const char *ASSEMBLY_ELEMENT;
+
+ class Property {
+
+ private:
+
+ const char *_name;
+ const char *_value;
+
+ public:
+
+ Property();
+ Property(const Property* p);
+ ~Property();
+ Property(const char *name, const char *value);
+ Property(const char *name, int value);
+ bool equals(Property* p);
+ void print(IdealGraphPrinter *printer);
+ void print_as_attribute(IdealGraphPrinter *printer);
+ bool is_null();
+ void clean();
+ const char *name();
+
+ static const char* dup(const char *str) {
+ char * copy = new char[strlen(str)+1];
+ strcpy(copy, str);
+ return copy;
+ }
+
+ };
+
+ class Properties {
+
+ private:
+
+ GrowableArray<Property *> *list;
+
+ public:
+
+ Properties();
+ ~Properties();
+ void add(Property *p);
+ void remove(const char *name);
+ bool equals(Properties* p);
+ void print(IdealGraphPrinter *printer);
+ void print_as_attributes(IdealGraphPrinter *printer);
+ void clean();
+
+ };
+
+
+ class Description {
+
+ private:
+
+ State _state;
+
+ public:
+
+ Description();
+
+ State state();
+ void set_state(State s);
+ void print(IdealGraphPrinter *printer);
+ virtual void print_changed(IdealGraphPrinter *printer) = 0;
+ virtual void print_removed(IdealGraphPrinter *printer) = 0;
+
+ };
+
+ class NodeDescription : public Description{
+
+ public:
+
+ static int count;
+
+ private:
+
+ GrowableArray<NodeDescription *> _succs;
+ int _block_index;
+ uintptr_t _id;
+ Properties _properties;
+ Node* _node;
+
+ public:
+
+ NodeDescription(Node* node);
+ ~NodeDescription();
+ Node* node();
+
+ // void set_node(Node* node);
+ GrowableArray<NodeDescription *>* succs();
+ void init_succs();
+ void clear_succs();
+ void add_succ(NodeDescription *desc);
+ int block_index();
+ void set_block_index(int i);
+ Properties* properties();
+ virtual void print_changed(IdealGraphPrinter *printer);
+ virtual void print_removed(IdealGraphPrinter *printer);
+ bool equals(NodeDescription *desc);
+ uint id();
+
+ };
+
+ class Block {
+
+ private:
+
+ NodeDescription *_start;
+ NodeDescription *_proj;
+ GrowableArray<int> _succs;
+ GrowableArray<NodeDescription *> _nodes;
+ GrowableArray<int> _dominates;
+ GrowableArray<int> _children;
+ int _semi;
+ int _parent;
+ GrowableArray<int> _pred;
+ GrowableArray<int> _bucket;
+ int _index;
+ int _dominator;
+ int _ancestor;
+ int _label;
+
+ public:
+
+ Block();
+ Block(int index);
+
+ void add_node(NodeDescription *n);
+ GrowableArray<NodeDescription *>* nodes();
+ GrowableArray<int>* children();
+ void add_child(int i);
+ void add_succ(int index);
+ GrowableArray<int>* succs();
+ GrowableArray<int>* dominates();
+ void add_dominates(int i);
+ NodeDescription *start();
+ NodeDescription *proj();
+ void set_start(NodeDescription *n);
+ void set_proj(NodeDescription *n);
+
+ int label();
+ void set_label(int i);
+ int ancestor();
+ void set_ancestor(int i);
+ int index();
+ int dominator();
+ void set_dominator(int i);
+ int parent();
+ void set_parent(int i);
+ int semi();
+ GrowableArray<int>* bucket();
+ void add_to_bucket(int i);
+ void clear_bucket();
+ GrowableArray<int>* pred();
+ void set_semi(int i);
+ void add_pred(int i);
+
+ };
+
+ class EdgeDescription : public Description {
+
+ private:
+
+ int _from;
+ int _to;
+ int _index;
+ public:
+
+ EdgeDescription(int from, int to, int index);
+ ~EdgeDescription();
+
+ virtual void print_changed(IdealGraphPrinter *printer);
+ virtual void print_removed(IdealGraphPrinter *printer);
+ bool equals(EdgeDescription *desc);
+ int from();
+ int to();
+ };
+
+
+ static int _file_count;
+ networkStream *_stream;
+ outputStream *_output;
+ ciMethod *_current_method;
+ GrowableArray<NodeDescription *> _nodes;
+ GrowableArray<EdgeDescription *> _edges;
+ int _depth;
+ Arena *_arena;
+ char buffer[128];
+ bool _should_send_method;
+ PhaseChaitin* _chaitin;
+ bool _clear_nodes;
+ Matcher* _matcher;
+ bool _traverse_outs;
+
+ void start_element_helper(const char *name, Properties *properties, bool endElement, bool print_indent = false, bool print_return = true);
+ NodeDescription *create_node_description(Node* node);
+
+ static void pre_node(Node* node, void *env);
+ static void post_node(Node* node, void *env);
+
+ void schedule_latest(int **common_dominator, GrowableArray<Block>* blocks);
+ void build_common_dominator(int **common_dominator, int index, GrowableArray<Block>* blocks);
+ void compress(int index, GrowableArray<Block>* blocks);
+ int eval(int index, GrowableArray<Block>* blocks);
+ void link(int index1, int index2, GrowableArray<Block>* blocks);
+ void build_dominators(GrowableArray<Block>* blocks);
+ void build_blocks(Node *node);
+ void walk(Node *n);
+ void start_element(const char *name, Properties *properties = NULL, bool print_indent = false, bool print_return = true);
+ void simple_element(const char *name, Properties *properties = NULL, bool print_indent = false);
+ void end_element(const char *name, bool print_indent = false, bool print_return = true);
+ void print_edge(int from, int to, int index);
+ void print_indent();
+ void print_method(ciMethod *method, int bci, InlineTree *tree);
+ void print_inline_tree(InlineTree *tree);
+ void clear_nodes();
+
+ IdealGraphPrinter();
+ ~IdealGraphPrinter();
+
+public:
+
+ static void clean_up();
+ static IdealGraphPrinter *printer();
+
+ bool traverse_outs();
+ void set_traverse_outs(bool b);
+ void print_ifg(PhaseIFG* ifg);
+ outputStream *output();
+ void print_inlining(Compile* compile);
+ void begin_method(Compile* compile);
+ void end_method();
+ void print_method(Compile* compile, const char *name, int level=1, bool clear_nodes = false);
+ void print(Compile* compile, const char *name, Node *root, int level=1, bool clear_nodes = false);
+ void print_xml(const char *name);
+
+
+};
+
+#endif
diff --git a/src/share/vm/opto/idealKit.cpp b/src/share/vm/opto/idealKit.cpp
new file mode 100644
index 000000000..ae65319f0
--- /dev/null
+++ b/src/share/vm/opto/idealKit.cpp
@@ -0,0 +1,503 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_idealKit.cpp.incl"
+
+// Static initialization
+
+// This declares the position where vars are kept in the cvstate
+// For some degree of consistency we use the TypeFunc enum to
+// soak up spots in the inputs even though we only use early Control
+// and Memory slots. (So far.)
+const uint IdealKit::first_var = TypeFunc::Parms + 1;
+
+//----------------------------IdealKit-----------------------------------------
+IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms) :
+ _gvn(gvn), C(gvn.C) {
+ _initial_ctrl = control;
+ _initial_memory = mem;
+ _delay_all_transforms = delay_all_transforms;
+ _var_ct = 0;
+ _cvstate = NULL;
+ // We can go memory state free or else we need the entire memory state
+ assert(mem == NULL || mem->Opcode() == Op_MergeMem, "memory must be pre-split");
+ int init_size = 5;
+ _pending_cvstates = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
+ _delay_transform = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
+ DEBUG_ONLY(_state = new (C->node_arena()) GrowableArray<int>(C->node_arena(), init_size, 0, 0));
+}
+
+//-------------------------------if_then-------------------------------------
+// Create: if(left relop right)
+// / \
+// iffalse iftrue
+// Push the iffalse cvstate onto the stack. The iftrue becomes the current cvstate.
+void IdealKit::if_then(Node* left, BoolTest::mask relop,
+ Node* right, float prob, float cnt, bool push_new_state) {
+ assert((state() & (BlockS|LoopS|IfThenS|ElseS)), "bad state for new If");
+ Node* bol;
+ if (left->bottom_type()->isa_ptr() == NULL) {
+ if (left->bottom_type()->isa_int() != NULL) {
+ bol = Bool(CmpI(left, right), relop);
+ } else {
+ assert(left->bottom_type()->isa_long() != NULL, "what else?");
+ bol = Bool(CmpL(left, right), relop);
+ }
+
+ } else {
+ bol = Bool(CmpP(left, right), relop);
+ }
+ // Delay gvn.tranform on if-nodes until construction is finished
+ // to prevent a constant bool input from discarding a control output.
+ IfNode* iff = delay_transform(new (C, 2) IfNode(ctrl(), bol, prob, cnt))->as_If();
+ Node* then = IfTrue(iff);
+ Node* elsen = IfFalse(iff);
+ Node* else_cvstate = copy_cvstate();
+ else_cvstate->set_req(TypeFunc::Control, elsen);
+ _pending_cvstates->push(else_cvstate);
+ DEBUG_ONLY(if (push_new_state) _state->push(IfThenS));
+ set_ctrl(then);
+}
+
+//-------------------------------else_-------------------------------------
+// Pop the else cvstate off the stack, and push the (current) then cvstate.
+// The else cvstate becomes the current cvstate.
+void IdealKit::else_() {
+ assert(state() == IfThenS, "bad state for new Else");
+ Node* else_cvstate = _pending_cvstates->pop();
+ DEBUG_ONLY(_state->pop());
+ // save current (then) cvstate for later use at endif
+ _pending_cvstates->push(_cvstate);
+ DEBUG_ONLY(_state->push(ElseS));
+ _cvstate = else_cvstate;
+}
+
+//-------------------------------end_if-------------------------------------
+// Merge the "then" and "else" cvstates.
+//
+// The if_then() pushed the current state for later use
+// as the initial state for a future "else" clause. The
+// current state then became the initial state for the
+// then clause. If an "else" clause was encountered, it will
+// pop the top state and use it for it's initial state.
+// It will also push the current state (the state at the end of
+// the "then" clause) for latter use at the end_if.
+//
+// At the endif, the states are:
+// 1) else exists a) current state is end of "else" clause
+// b) top stack state is end of "then" clause
+//
+// 2) no else: a) current state is end of "then" clause
+// b) top stack state is from the "if_then" which
+// would have been the initial state of the else.
+//
+// Merging the states is accomplished by:
+// 1) make a label for the merge
+// 2) terminate the current state with a goto to the label
+// 3) pop the top state from the stack and make it the
+// current state
+// 4) bind the label at the current state. Binding a label
+// terminates the current state with a goto to the
+// label and makes the label's state the current state.
+//
+void IdealKit::end_if() {
+ assert(state() & (IfThenS|ElseS), "bad state for new Endif");
+ Node* lab = make_label(1);
+
+ // Node* join_state = _pending_cvstates->pop();
+ /* merging, join */
+ goto_(lab);
+ _cvstate = _pending_cvstates->pop();
+
+ bind(lab);
+ DEBUG_ONLY(_state->pop());
+}
+
+//-------------------------------loop-------------------------------------
+// Create the loop head portion (*) of:
+// * iv = init
+// * top: (region node)
+// * if (iv relop limit) {
+// loop body
+// i = i + 1
+// goto top
+// * } else // exits loop
+//
+// Pushes the loop top cvstate first, then the else (loop exit) cvstate
+// onto the stack.
+void IdealKit::loop(IdealVariable& iv, Node* init, BoolTest::mask relop, Node* limit, float prob, float cnt) {
+ assert((state() & (BlockS|LoopS|IfThenS|ElseS)), "bad state for new loop");
+ set(iv, init);
+ Node* head = make_label(1);
+ bind(head);
+ _pending_cvstates->push(head); // push for use at end_loop
+ _cvstate = copy_cvstate();
+ if_then(value(iv), relop, limit, prob, cnt, false /* no new state */);
+ DEBUG_ONLY(_state->push(LoopS));
+ assert(ctrl()->is_IfTrue(), "true branch stays in loop");
+ assert(_pending_cvstates->top()->in(TypeFunc::Control)->is_IfFalse(), "false branch exits loop");
+}
+
+//-------------------------------end_loop-------------------------------------
+// Creates the goto top label.
+// Expects the else (loop exit) cvstate to be on top of the
+// stack, and the loop top cvstate to be 2nd.
+void IdealKit::end_loop() {
+ assert((state() == LoopS), "bad state for new end_loop");
+ Node* exit = _pending_cvstates->pop();
+ Node* head = _pending_cvstates->pop();
+ goto_(head);
+ clear(head);
+ DEBUG_ONLY(_state->pop());
+ _cvstate = exit;
+}
+
+//-------------------------------make_label-------------------------------------
+// Creates a label. The number of goto's
+// must be specified (which should be 1 less than
+// the number of precedessors.)
+Node* IdealKit::make_label(int goto_ct) {
+ assert(_cvstate != NULL, "must declare variables before labels");
+ Node* lab = new_cvstate();
+ int sz = 1 + goto_ct + 1 /* fall thru */;
+ Node* reg = delay_transform(new (C, sz) RegionNode(sz));
+ lab->init_req(TypeFunc::Control, reg);
+ return lab;
+}
+
+//-------------------------------bind-------------------------------------
+// Bind a label at the current cvstate by simulating
+// a goto to the label.
+void IdealKit::bind(Node* lab) {
+ goto_(lab, true /* bind */);
+ _cvstate = lab;
+}
+
+//-------------------------------goto_-------------------------------------
+// Make the current cvstate a predecessor of the label,
+// creating phi's to merge values. If bind is true and
+// this is not the last control edge, then ensure that
+// all live values have phis created. Used to create phis
+// at loop-top regions.
+void IdealKit::goto_(Node* lab, bool bind) {
+ Node* reg = lab->in(TypeFunc::Control);
+ // find next empty slot in region
+ uint slot = 1;
+ while (slot < reg->req() && reg->in(slot) != NULL) slot++;
+ assert(slot < reg->req(), "too many gotos");
+ // If this is last predecessor, then don't force phi creation
+ if (slot == reg->req() - 1) bind = false;
+ reg->init_req(slot, ctrl());
+ assert(first_var + _var_ct == _cvstate->req(), "bad _cvstate size");
+ for (uint i = first_var; i < _cvstate->req(); i++) {
+
+ // l is the value of var reaching the label. Could be a single value
+ // reaching the label, or a phi that merges multiples values reaching
+ // the label. The latter is true if the label's input: in(..) is
+ // a phi whose control input is the region node for the label.
+
+ Node* l = lab->in(i);
+ // Get the current value of the var
+ Node* m = _cvstate->in(i);
+ // If the var went unused no need for a phi
+ if (m == NULL) {
+ continue;
+ } else if (l == NULL || m == l) {
+ // Only one unique value "m" is known to reach this label so a phi
+ // is not yet necessary unless:
+ // the label is being bound and all predecessors have not been seen,
+ // in which case "bind" will be true.
+ if (bind) {
+ m = promote_to_phi(m, reg);
+ }
+ // Record the phi/value used for this var in the label's cvstate
+ lab->set_req(i, m);
+ } else {
+ // More than one value for the variable reaches this label so
+ // a create a phi if one does not already exist.
+ if (!was_promoted_to_phi(l, reg)) {
+ l = promote_to_phi(l, reg);
+ lab->set_req(i, l);
+ }
+ // Record in the phi, the var's value from the current state
+ l->set_req(slot, m);
+ }
+ }
+ do_memory_merge(_cvstate, lab);
+ stop();
+}
+
+//-----------------------------promote_to_phi-----------------------------------
+Node* IdealKit::promote_to_phi(Node* n, Node* reg) {
+ assert(!was_promoted_to_phi(n, reg), "n already promoted to phi on this region");
+ // Get a conservative type for the phi
+ const BasicType bt = n->bottom_type()->basic_type();
+ const Type* ct = Type::get_const_basic_type(bt);
+ return delay_transform(PhiNode::make(reg, n, ct));
+}
+
+//-----------------------------declares_done-----------------------------------
+void IdealKit::declares_done() {
+ _cvstate = new_cvstate(); // initialize current cvstate
+ set_ctrl(_initial_ctrl); // initialize control in current cvstate
+ set_all_memory(_initial_memory);// initialize memory in current cvstate
+ DEBUG_ONLY(_state->push(BlockS));
+}
+
+//-----------------------------transform-----------------------------------
+Node* IdealKit::transform(Node* n) {
+ if (_delay_all_transforms) {
+ return delay_transform(n);
+ } else {
+ return gvn().transform(n);
+ }
+}
+
+//-----------------------------delay_transform-----------------------------------
+Node* IdealKit::delay_transform(Node* n) {
+ gvn().set_type(n, n->bottom_type());
+ _delay_transform->push(n);
+ return n;
+}
+
+//-----------------------------new_cvstate-----------------------------------
+Node* IdealKit::new_cvstate() {
+ uint sz = _var_ct + first_var;
+ return new (C, sz) Node(sz);
+}
+
+//-----------------------------copy_cvstate-----------------------------------
+Node* IdealKit::copy_cvstate() {
+ Node* ns = new_cvstate();
+ for (uint i = 0; i < ns->req(); i++) ns->init_req(i, _cvstate->in(i));
+ // We must clone memory since it will be updated as we do stores.
+ ns->set_req(TypeFunc::Memory, MergeMemNode::make(C, ns->in(TypeFunc::Memory)));
+ return ns;
+}
+
+//-----------------------------clear-----------------------------------
+void IdealKit::clear(Node* m) {
+ for (uint i = 0; i < m->req(); i++) m->set_req(i, NULL);
+}
+
+//-----------------------------drain_delay_transform----------------------------
+void IdealKit::drain_delay_transform() {
+ while (_delay_transform->length() > 0) {
+ Node* n = _delay_transform->pop();
+ gvn().transform(n);
+ if (!gvn().is_IterGVN()) {
+ C->record_for_igvn(n);
+ }
+ }
+}
+
+//-----------------------------IdealVariable----------------------------
+IdealVariable::IdealVariable(IdealKit &k) {
+ k.declare(this);
+}
+
+Node* IdealKit::memory(uint alias_idx) {
+ MergeMemNode* mem = merged_memory();
+ Node* p = mem->memory_at(alias_idx);
+ _gvn.set_type(p, Type::MEMORY); // must be mapped
+ return p;
+}
+
+void IdealKit::set_memory(Node* mem, uint alias_idx) {
+ merged_memory()->set_memory_at(alias_idx, mem);
+}
+
+//----------------------------- make_load ----------------------------
+Node* IdealKit::load(Node* ctl,
+ Node* adr,
+ const Type* t,
+ BasicType bt,
+ int adr_idx,
+ bool require_atomic_access) {
+
+ assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
+ const TypePtr* adr_type = NULL; // debug-mode-only argument
+ debug_only(adr_type = C->get_adr_type(adr_idx));
+ Node* mem = memory(adr_idx);
+ Node* ld;
+ if (require_atomic_access && bt == T_LONG) {
+ ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t);
+ } else {
+ ld = LoadNode::make(C, ctl, mem, adr, adr_type, t, bt);
+ }
+ return transform(ld);
+}
+
+Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt,
+ int adr_idx,
+ bool require_atomic_access) {
+ assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+ const TypePtr* adr_type = NULL;
+ debug_only(adr_type = C->get_adr_type(adr_idx));
+ Node *mem = memory(adr_idx);
+ Node* st;
+ if (require_atomic_access && bt == T_LONG) {
+ st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val);
+ } else {
+ st = StoreNode::make(C, ctl, mem, adr, adr_type, val, bt);
+ }
+ st = transform(st);
+ set_memory(st, adr_idx);
+
+ return st;
+}
+
+// Card mark store. Must be ordered so that it will come after the store of
+// the oop.
+Node* IdealKit::storeCM(Node* ctl, Node* adr, Node *val, Node* oop_store,
+ BasicType bt,
+ int adr_idx) {
+ assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+ const TypePtr* adr_type = NULL;
+ debug_only(adr_type = C->get_adr_type(adr_idx));
+ Node *mem = memory(adr_idx);
+
+ // Add required edge to oop_store, optimizer does not support precedence edges.
+ // Convert required edge to precedence edge before allocation.
+ Node* st = new (C, 5) StoreCMNode(ctl, mem, adr, adr_type, val, oop_store);
+
+ st = transform(st);
+ set_memory(st, adr_idx);
+
+ return st;
+}
+
+//---------------------------- do_memory_merge --------------------------------
+// The memory from one merging cvstate needs to be merged with the memory for another
+// join cvstate. If the join cvstate doesn't have a merged memory yet then we
+// can just copy the state from the merging cvstate
+
+// Merge one slow path into the rest of memory.
+void IdealKit::do_memory_merge(Node* merging, Node* join) {
+
+ // Get the region for the join state
+ Node* join_region = join->in(TypeFunc::Control);
+ assert(join_region != NULL, "join region must exist");
+ if (join->in(TypeFunc::Memory) == NULL ) {
+ join->set_req(TypeFunc::Memory, merging->in(TypeFunc::Memory));
+ return;
+ }
+
+ // The control flow for merging must have already been attached to the join region
+ // we need its index for the phis.
+ uint slot;
+ for (slot = 1; slot < join_region->req() ; slot ++ ) {
+ if (join_region->in(slot) == merging->in(TypeFunc::Control)) break;
+ }
+ assert(slot != join_region->req(), "edge must already exist");
+
+ MergeMemNode* join_m = join->in(TypeFunc::Memory)->as_MergeMem();
+ MergeMemNode* merging_m = merging->in(TypeFunc::Memory)->as_MergeMem();
+
+ // join_m should be an ancestor mergemem of merging
+ // Slow path memory comes from the current map (which is from a slow call)
+ // Fast path/null path memory comes from the call's input
+
+ // Merge the other fast-memory inputs with the new slow-default memory.
+ // for (MergeMemStream mms(merged_memory(), fast_mem->as_MergeMem()); mms.next_non_empty2(); ) {
+ for (MergeMemStream mms(join_m, merging_m); mms.next_non_empty2(); ) {
+ Node* join_slice = mms.force_memory();
+ Node* merging_slice = mms.memory2();
+ if (join_slice != merging_slice) {
+ PhiNode* phi;
+ // bool new_phi = false;
+ // Is the phi for this slice one that we created for this join region or simply
+ // one we copied? If it is ours then add
+ if (join_slice->is_Phi() && join_slice->as_Phi()->region() == join_region) {
+ phi = join_slice->as_Phi();
+ } else {
+ // create the phi with join_slice filling supplying memory for all of the
+ // control edges to the join region
+ phi = PhiNode::make(join_region, join_slice, Type::MEMORY, mms.adr_type(C));
+ phi = (PhiNode*) delay_transform(phi);
+ // gvn().set_type(phi, Type::MEMORY);
+ // new_phi = true;
+ }
+ // Now update the phi with the slice for the merging slice
+ phi->set_req(slot, merging_slice/* slow_path, slow_slice */);
+ // this updates join_m with the phi
+ mms.set_memory(phi);
+ }
+ }
+}
+
+
+//----------------------------- make_call ----------------------------
+// Trivial runtime call
+void IdealKit::make_leaf_call(const TypeFunc *slow_call_type,
+ address slow_call,
+ const char *leaf_name,
+ Node* parm0,
+ Node* parm1,
+ Node* parm2) {
+
+ // We only handle taking in RawMem and modifying RawMem
+ const TypePtr* adr_type = TypeRawPtr::BOTTOM;
+ uint adr_idx = C->get_alias_index(adr_type);
+
+ // Clone initial memory
+ MergeMemNode* cloned_mem = MergeMemNode::make(C, merged_memory());
+
+ // Slow-path leaf call
+ int size = slow_call_type->domain()->cnt();
+ CallNode *call = (CallNode*)new (C, size) CallLeafNode( slow_call_type, slow_call, leaf_name, adr_type);
+
+ // Set fixed predefined input arguments
+ call->init_req( TypeFunc::Control, ctrl() );
+ call->init_req( TypeFunc::I_O , top() ) ; // does no i/o
+ // Narrow memory as only memory input
+ call->init_req( TypeFunc::Memory , memory(adr_idx));
+ call->init_req( TypeFunc::FramePtr, top() /* frameptr() */ );
+ call->init_req( TypeFunc::ReturnAdr, top() );
+
+ if (parm0 != NULL) call->init_req(TypeFunc::Parms+0, parm0);
+ if (parm1 != NULL) call->init_req(TypeFunc::Parms+1, parm1);
+ if (parm2 != NULL) call->init_req(TypeFunc::Parms+2, parm2);
+
+ // Node *c = _gvn.transform(call);
+ call = (CallNode *) _gvn.transform(call);
+ Node *c = call; // dbx gets confused with call call->dump()
+
+ // Slow leaf call has no side-effects, sets few values
+
+ set_ctrl(transform( new (C, 1) ProjNode(call,TypeFunc::Control) ));
+
+ // Set the incoming clone of memory as current memory
+ set_all_memory(cloned_mem);
+
+ // Make memory for the call
+ Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+
+ // Set the RawPtr memory state only.
+ set_memory(mem, adr_idx);
+
+ assert(C->alias_type(call->adr_type()) == C->alias_type(adr_type),
+ "call node must be constructed correctly");
+}
diff --git a/src/share/vm/opto/idealKit.hpp b/src/share/vm/opto/idealKit.hpp
new file mode 100644
index 000000000..5ccdb77b3
--- /dev/null
+++ b/src/share/vm/opto/idealKit.hpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//-----------------------------------------------------------------------------
+//----------------------------IdealKit-----------------------------------------
+// Set of utilities for creating control flow and scalar SSA data flow.
+// Control:
+// if_then(left, relop, right)
+// else_ (optional)
+// end_if
+// loop(iv variable, initial, relop, limit)
+// - sets iv to initial for first trip
+// - exits when relation on limit is true
+// - the values of initial and limit should be loop invariant
+// - no increment, must be explicitly coded
+// - final value of iv is available after end_loop (until dead())
+// end_loop
+// make_label(number of gotos)
+// goto_(label)
+// bind(label)
+// Data:
+// ConI(integer constant) - create an integer constant
+// set(variable, value) - assignment
+// value(variable) - reference value
+// dead(variable) - variable's value is no longer live
+// increment(variable, value) - increment variable by value
+// simple operations: AddI, SubI, AndI, LShiftI, etc.
+// Example:
+// Node* limit = ??
+// IdealVariable i(kit), j(kit);
+// declares_done();
+// Node* exit = make_label(1); // 1 goto
+// set(j, ConI(0));
+// loop(i, ConI(0), BoolTest::lt, limit); {
+// if_then(value(i), BoolTest::gt, ConI(5)) {
+// set(j, ConI(1));
+// goto_(exit); dead(i);
+// } end_if();
+// increment(i, ConI(1));
+// } end_loop(); dead(i);
+// bind(exit);
+//
+// See string_indexOf for a more complete example.
+
+class IdealKit;
+
+// Variable definition for IdealKit
+class IdealVariable: public StackObj {
+ friend class IdealKit;
+ private:
+ int _id;
+ void set_id(int id) { _id = id; }
+ public:
+ IdealVariable(IdealKit &k);
+ int id() { assert(has_id(),"uninitialized id"); return _id; }
+ bool has_id() { return _id >= 0; }
+};
+
+class IdealKit: public StackObj {
+ friend class IdealVariable;
+ // The main state (called a cvstate for Control and Variables)
+ // contains both the current values of the variables and the
+ // current set of predecessor control edges. The variable values
+ // are managed via a Node [in(1)..in(_var_ct)], and the predecessor
+ // control edges managed via a RegionNode. The in(0) of the Node
+ // for variables points to the RegionNode for the control edges.
+ protected:
+ Compile * const C;
+ PhaseGVN &_gvn;
+ GrowableArray<Node*>* _pending_cvstates; // stack of cvstates
+ GrowableArray<Node*>* _delay_transform; // delay invoking gvn.transform until drain
+ Node* _cvstate; // current cvstate (control, memory and variables)
+ uint _var_ct; // number of variables
+ bool _delay_all_transforms; // flag forcing all transforms to be delayed
+ Node* _initial_ctrl; // saves initial control until variables declared
+ Node* _initial_memory; // saves initial memory until variables declared
+
+ PhaseGVN& gvn() const { return _gvn; }
+ // Create a new cvstate filled with nulls
+ Node* new_cvstate(); // Create a new cvstate
+ Node* cvstate() { return _cvstate; } // current cvstate
+ Node* copy_cvstate(); // copy current cvstate
+ void set_ctrl(Node* ctrl) { _cvstate->set_req(TypeFunc::Control, ctrl); }
+
+ // Should this assert this is a MergeMem???
+ void set_all_memory(Node* mem){ _cvstate->set_req(TypeFunc::Memory, mem); }
+ void set_memory(Node* mem, uint alias_idx );
+ void do_memory_merge(Node* merging, Node* join);
+ void clear(Node* m); // clear a cvstate
+ void stop() { clear(_cvstate); } // clear current cvstate
+ Node* delay_transform(Node* n);
+ Node* transform(Node* n); // gvn.transform or push node on delay list
+ Node* promote_to_phi(Node* n, Node* reg);// Promote "n" to a phi on region "reg"
+ bool was_promoted_to_phi(Node* n, Node* reg) {
+ return (n->is_Phi() && n->in(0) == reg);
+ }
+ void declare(IdealVariable* v) { v->set_id(_var_ct++); }
+ // This declares the position where vars are kept in the cvstate
+ // For some degree of consistency we use the TypeFunc enum to
+ // soak up spots in the inputs even though we only use early Control
+ // and Memory slots. (So far.)
+ static const uint first_var; // = TypeFunc::Parms + 1;
+
+#ifdef ASSERT
+ enum State { NullS=0, BlockS=1, LoopS=2, IfThenS=4, ElseS=8, EndifS= 16 };
+ GrowableArray<int>* _state;
+ State state() { return (State)(_state->top()); }
+#endif
+
+ // Users should not care about slices only MergedMem so no access for them.
+ Node* memory(uint alias_idx);
+
+ public:
+ IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false);
+ ~IdealKit() {
+ stop();
+ drain_delay_transform();
+ }
+ // Control
+ Node* ctrl() { return _cvstate->in(TypeFunc::Control); }
+ Node* top() { return C->top(); }
+ MergeMemNode* merged_memory() { return _cvstate->in(TypeFunc::Memory)->as_MergeMem(); }
+ void set(IdealVariable& v, Node* rhs) { _cvstate->set_req(first_var + v.id(), rhs); }
+ Node* value(IdealVariable& v) { return _cvstate->in(first_var + v.id()); }
+ void dead(IdealVariable& v) { set(v, (Node*)NULL); }
+ void if_then(Node* left, BoolTest::mask relop, Node* right,
+ float prob = PROB_FAIR, float cnt = COUNT_UNKNOWN,
+ bool push_new_state = true);
+ void else_();
+ void end_if();
+ void loop(IdealVariable& iv, Node* init, BoolTest::mask cmp, Node* limit,
+ float prob = PROB_LIKELY(0.9), float cnt = COUNT_UNKNOWN);
+ void end_loop();
+ Node* make_label(int goto_ct);
+ void bind(Node* lab);
+ void goto_(Node* lab, bool bind = false);
+ void declares_done();
+ void drain_delay_transform();
+
+ Node* IfTrue(IfNode* iff) { return transform(new (C,1) IfTrueNode(iff)); }
+ Node* IfFalse(IfNode* iff) { return transform(new (C,1) IfFalseNode(iff)); }
+
+ // Data
+ Node* ConI(jint k) { return (Node*)gvn().intcon(k); }
+ Node* makecon(const Type *t) const { return _gvn.makecon(t); }
+
+ Node* AddI(Node* l, Node* r) { return transform(new (C,3) AddINode(l, r)); }
+ Node* SubI(Node* l, Node* r) { return transform(new (C,3) SubINode(l, r)); }
+ Node* AndI(Node* l, Node* r) { return transform(new (C,3) AndINode(l, r)); }
+ Node* MaxI(Node* l, Node* r) { return transform(new (C,3) MaxINode(l, r)); }
+ Node* LShiftI(Node* l, Node* r) { return transform(new (C,3) LShiftINode(l, r)); }
+ Node* CmpI(Node* l, Node* r) { return transform(new (C,3) CmpINode(l, r)); }
+ Node* Bool(Node* cmp, BoolTest::mask relop) { return transform(new (C,2) BoolNode(cmp, relop)); }
+ void increment(IdealVariable& v, Node* j) { set(v, AddI(value(v), j)); }
+ void decrement(IdealVariable& v, Node* j) { set(v, SubI(value(v), j)); }
+
+ Node* CmpL(Node* l, Node* r) { return transform(new (C,3) CmpLNode(l, r)); }
+
+ // TLS
+ Node* thread() { return gvn().transform(new (C, 1) ThreadLocalNode()); }
+
+ // Pointers
+ Node* AddP(Node *base, Node *ptr, Node *off) { return transform(new (C,4) AddPNode(base, ptr, off)); }
+ Node* CmpP(Node* l, Node* r) { return transform(new (C,3) CmpPNode(l, r)); }
+#ifdef _LP64
+ Node* XorX(Node* l, Node* r) { return transform(new (C,3) XorLNode(l, r)); }
+#else // _LP64
+ Node* XorX(Node* l, Node* r) { return transform(new (C,3) XorINode(l, r)); }
+#endif // _LP64
+ Node* URShiftX(Node* l, Node* r) { return transform(new (C,3) URShiftXNode(l, r)); }
+ Node* ConX(jint k) { return (Node*)gvn().MakeConX(k); }
+ Node* CastPX(Node* ctl, Node* p) { return transform(new (C,2) CastP2XNode(ctl, p)); }
+ // Add a fixed offset to a pointer
+ Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset);
+
+ // Memory operations
+
+ // This is the base version which is given an alias index.
+ Node* load(Node* ctl,
+ Node* adr,
+ const Type* t,
+ BasicType bt,
+ int adr_idx,
+ bool require_atomic_access = false);
+
+ // Return the new StoreXNode
+ Node* store(Node* ctl,
+ Node* adr,
+ Node* val,
+ BasicType bt,
+ int adr_idx,
+ bool require_atomic_access = false);
+
+ // Store a card mark ordered after store_oop
+ Node* storeCM(Node* ctl,
+ Node* adr,
+ Node* val,
+ Node* oop_store,
+ BasicType bt,
+ int adr_idx);
+
+ // Trivial call
+ void make_leaf_call(const TypeFunc *slow_call_type,
+ address slow_call,
+ const char *leaf_name,
+ Node* parm0,
+ Node* parm1 = NULL,
+ Node* parm2 = NULL);
+};
diff --git a/src/share/vm/opto/ifg.cpp b/src/share/vm/opto/ifg.cpp
new file mode 100644
index 000000000..2c6cd665f
--- /dev/null
+++ b/src/share/vm/opto/ifg.cpp
@@ -0,0 +1,813 @@
+/*
+ * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_ifg.cpp.incl"
+
+#define EXACT_PRESSURE 1
+
+//=============================================================================
+//------------------------------IFG--------------------------------------------
+PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
+}
+
+//------------------------------init-------------------------------------------
+void PhaseIFG::init( uint maxlrg ) {
+ _maxlrg = maxlrg;
+ _yanked = new (_arena) VectorSet(_arena);
+ _is_square = false;
+ // Make uninitialized adjacency lists
+ _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
+ // Also make empty live range structures
+ _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
+ memset(_lrgs,0,sizeof(LRG)*maxlrg);
+ // Init all to empty
+ for( uint i = 0; i < maxlrg; i++ ) {
+ _adjs[i].initialize(maxlrg);
+ _lrgs[i].Set_All();
+ }
+}
+
+//------------------------------add--------------------------------------------
+// Add edge between vertices a & b. These are sorted (triangular matrix),
+// then the smaller number is inserted in the larger numbered array.
+int PhaseIFG::add_edge( uint a, uint b ) {
+ lrgs(a).invalid_degree();
+ lrgs(b).invalid_degree();
+ // Sort a and b, so that a is bigger
+ assert( !_is_square, "only on triangular" );
+ if( a < b ) { uint tmp = a; a = b; b = tmp; }
+ return _adjs[a].insert( b );
+}
+
+//------------------------------add_vector-------------------------------------
+// Add an edge between 'a' and everything in the vector.
+void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
+ // IFG is triangular, so do the inserts where 'a' < 'b'.
+ assert( !_is_square, "only on triangular" );
+ IndexSet *adjs_a = &_adjs[a];
+ if( !vec->count() ) return;
+
+ IndexSetIterator elements(vec);
+ uint neighbor;
+ while ((neighbor = elements.next()) != 0) {
+ add_edge( a, neighbor );
+ }
+}
+
+//------------------------------test-------------------------------------------
+// Is there an edge between a and b?
+int PhaseIFG::test_edge( uint a, uint b ) const {
+ // Sort a and b, so that a is larger
+ assert( !_is_square, "only on triangular" );
+ if( a < b ) { uint tmp = a; a = b; b = tmp; }
+ return _adjs[a].member(b);
+}
+
+//------------------------------SquareUp---------------------------------------
+// Convert triangular matrix to square matrix
+void PhaseIFG::SquareUp() {
+ assert( !_is_square, "only on triangular" );
+
+ // Simple transpose
+ for( uint i = 0; i < _maxlrg; i++ ) {
+ IndexSetIterator elements(&_adjs[i]);
+ uint datum;
+ while ((datum = elements.next()) != 0) {
+ _adjs[datum].insert( i );
+ }
+ }
+ _is_square = true;
+}
+
+//------------------------------Compute_Effective_Degree-----------------------
+// Compute effective degree in bulk
+void PhaseIFG::Compute_Effective_Degree() {
+ assert( _is_square, "only on square" );
+
+ for( uint i = 0; i < _maxlrg; i++ )
+ lrgs(i).set_degree(effective_degree(i));
+}
+
+//------------------------------test_edge_sq-----------------------------------
+int PhaseIFG::test_edge_sq( uint a, uint b ) const {
+ assert( _is_square, "only on square" );
+ // Swap, so that 'a' has the lesser count. Then binary search is on
+ // the smaller of a's list and b's list.
+ if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
+ //return _adjs[a].unordered_member(b);
+ return _adjs[a].member(b);
+}
+
+//------------------------------Union------------------------------------------
+// Union edges of B into A
+void PhaseIFG::Union( uint a, uint b ) {
+ assert( _is_square, "only on square" );
+ IndexSet *A = &_adjs[a];
+ IndexSetIterator b_elements(&_adjs[b]);
+ uint datum;
+ while ((datum = b_elements.next()) != 0) {
+ if(A->insert(datum)) {
+ _adjs[datum].insert(a);
+ lrgs(a).invalid_degree();
+ lrgs(datum).invalid_degree();
+ }
+ }
+}
+
+//------------------------------remove_node------------------------------------
+// Yank a Node and all connected edges from the IFG. Return a
+// list of neighbors (edges) yanked.
+IndexSet *PhaseIFG::remove_node( uint a ) {
+ assert( _is_square, "only on square" );
+ assert( !_yanked->test(a), "" );
+ _yanked->set(a);
+
+ // I remove the LRG from all neighbors.
+ IndexSetIterator elements(&_adjs[a]);
+ LRG &lrg_a = lrgs(a);
+ uint datum;
+ while ((datum = elements.next()) != 0) {
+ _adjs[datum].remove(a);
+ lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) );
+ }
+ return neighbors(a);
+}
+
+//------------------------------re_insert--------------------------------------
+// Re-insert a yanked Node.
+void PhaseIFG::re_insert( uint a ) {
+ assert( _is_square, "only on square" );
+ assert( _yanked->test(a), "" );
+ (*_yanked) >>= a;
+
+ IndexSetIterator elements(&_adjs[a]);
+ uint datum;
+ while ((datum = elements.next()) != 0) {
+ _adjs[datum].insert(a);
+ lrgs(datum).invalid_degree();
+ }
+}
+
+//------------------------------compute_degree---------------------------------
+// Compute the degree between 2 live ranges. If both live ranges are
+// aligned-adjacent powers-of-2 then we use the MAX size. If either is
+// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
+// MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why
+// this is so.
+int LRG::compute_degree( LRG &l ) const {
+ int tmp;
+ int num_regs = _num_regs;
+ int nregs = l.num_regs();
+ tmp = (_fat_proj || l._fat_proj) // either is a fat-proj?
+ ? (num_regs * nregs) // then use product
+ : MAX2(num_regs,nregs); // else use max
+ return tmp;
+}
+
+//------------------------------effective_degree-------------------------------
+// Compute effective degree for this live range. If both live ranges are
+// aligned-adjacent powers-of-2 then we use the MAX size. If either is
+// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
+// MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why
+// this is so.
+int PhaseIFG::effective_degree( uint lidx ) const {
+ int eff = 0;
+ int num_regs = lrgs(lidx).num_regs();
+ int fat_proj = lrgs(lidx)._fat_proj;
+ IndexSet *s = neighbors(lidx);
+ IndexSetIterator elements(s);
+ uint nidx;
+ while((nidx = elements.next()) != 0) {
+ LRG &lrgn = lrgs(nidx);
+ int nregs = lrgn.num_regs();
+ eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
+ ? (num_regs * nregs) // then use product
+ : MAX2(num_regs,nregs); // else use max
+ }
+ return eff;
+}
+
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+void PhaseIFG::dump() const {
+ tty->print_cr("-- Interference Graph --%s--",
+ _is_square ? "square" : "triangular" );
+ if( _is_square ) {
+ for( uint i = 0; i < _maxlrg; i++ ) {
+ tty->print( (*_yanked)[i] ? "XX " : " ");
+ tty->print("L%d: { ",i);
+ IndexSetIterator elements(&_adjs[i]);
+ uint datum;
+ while ((datum = elements.next()) != 0) {
+ tty->print("L%d ", datum);
+ }
+ tty->print_cr("}");
+
+ }
+ return;
+ }
+
+ // Triangular
+ for( uint i = 0; i < _maxlrg; i++ ) {
+ uint j;
+ tty->print( (*_yanked)[i] ? "XX " : " ");
+ tty->print("L%d: { ",i);
+ for( j = _maxlrg; j > i; j-- )
+ if( test_edge(j - 1,i) ) {
+ tty->print("L%d ",j - 1);
+ }
+ tty->print("| ");
+ IndexSetIterator elements(&_adjs[i]);
+ uint datum;
+ while ((datum = elements.next()) != 0) {
+ tty->print("L%d ", datum);
+ }
+ tty->print("}\n");
+ }
+ tty->print("\n");
+}
+
+//------------------------------stats------------------------------------------
+void PhaseIFG::stats() const {
+ ResourceMark rm;
+ int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
+ memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
+ uint i;
+ for( i = 0; i < _maxlrg; i++ ) {
+ h_cnt[neighbor_cnt(i)]++;
+ }
+ tty->print_cr("--Histogram of counts--");
+ for( i = 0; i < _maxlrg*2; i++ )
+ if( h_cnt[i] )
+ tty->print("%d/%d ",i,h_cnt[i]);
+ tty->print_cr("");
+}
+
+//------------------------------verify-----------------------------------------
+void PhaseIFG::verify( const PhaseChaitin *pc ) const {
+ // IFG is square, sorted and no need for Find
+ for( uint i = 0; i < _maxlrg; i++ ) {
+ assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" );
+ IndexSet *set = &_adjs[i];
+ IndexSetIterator elements(set);
+ uint idx;
+ uint last = 0;
+ while ((idx = elements.next()) != 0) {
+ assert( idx != i, "Must have empty diagonal");
+ assert( pc->Find_const(idx) == idx, "Must not need Find" );
+ assert( _adjs[idx].member(i), "IFG not square" );
+ assert( !(*_yanked)[idx], "No yanked neighbors" );
+ assert( last < idx, "not sorted increasing");
+ last = idx;
+ }
+ assert( !lrgs(i)._degree_valid ||
+ effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong" );
+ }
+}
+#endif
+
+//------------------------------interfere_with_live----------------------------
+// Interfere this register with everything currently live. Use the RegMasks
+// to trim the set of possible interferences. Return a count of register-only
+// inteferences as an estimate of register pressure.
+void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
+ uint retval = 0;
+ // Interfere with everything live.
+ const RegMask &rm = lrgs(r).mask();
+ // Check for interference by checking overlap of regmasks.
+ // Only interfere if acceptable register masks overlap.
+ IndexSetIterator elements(liveout);
+ uint l;
+ while( (l = elements.next()) != 0 )
+ if( rm.overlap( lrgs(l).mask() ) )
+ _ifg->add_edge( r, l );
+}
+
+//------------------------------build_ifg_virtual------------------------------
+// Actually build the interference graph. Uses virtual registers only, no
+// physical register masks. This allows me to be very aggressive when
+// coalescing copies. Some of this aggressiveness will have to be undone
+// later, but I'd rather get all the copies I can now (since unremoved copies
+// at this point can end up in bad places). Copies I re-insert later I have
+// more opportunity to insert them in low-frequency locations.
+void PhaseChaitin::build_ifg_virtual( ) {
+
+ // For all blocks (in any order) do...
+ for( uint i=0; i<_cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+ IndexSet *liveout = _live->live(b);
+
+ // The IFG is built by a single reverse pass over each basic block.
+ // Starting with the known live-out set, we remove things that get
+ // defined and add things that become live (essentially executing one
+ // pass of a standard LIVE analysis). Just before a Node defines a value
+ // (and removes it from the live-ness set) that value is certainly live.
+ // The defined value interferes with everything currently live. The
+ // value is then removed from the live-ness set and it's inputs are
+ // added to the live-ness set.
+ for( uint j = b->end_idx() + 1; j > 1; j-- ) {
+ Node *n = b->_nodes[j-1];
+
+ // Get value being defined
+ uint r = n2lidx(n);
+
+ // Some special values do not allocate
+ if( r ) {
+
+ // Remove from live-out set
+ liveout->remove(r);
+
+ // Copies do not define a new value and so do not interfere.
+ // Remove the copies source from the liveout set before interfering.
+ uint idx = n->is_Copy();
+ if( idx ) liveout->remove( n2lidx(n->in(idx)) );
+
+ // Interfere with everything live
+ interfere_with_live( r, liveout );
+ }
+
+ // Make all inputs live
+ if( !n->is_Phi() ) { // Phi function uses come from prior block
+ for( uint k = 1; k < n->req(); k++ )
+ liveout->insert( n2lidx(n->in(k)) );
+ }
+
+ // 2-address instructions always have the defined value live
+ // on entry to the instruction, even though it is being defined
+ // by the instruction. We pretend a virtual copy sits just prior
+ // to the instruction and kills the src-def'd register.
+ // In other words, for 2-address instructions the defined value
+ // interferes with all inputs.
+ uint idx;
+ if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
+ const MachNode *mach = n->as_Mach();
+ // Sometimes my 2-address ADDs are commuted in a bad way.
+ // We generally want the USE-DEF register to refer to the
+ // loop-varying quantity, to avoid a copy.
+ uint op = mach->ideal_Opcode();
+ // Check that mach->num_opnds() == 3 to ensure instruction is
+ // not subsuming constants, effectively excludes addI_cin_imm
+ // Can NOT swap for instructions like addI_cin_imm since it
+ // is adding zero to yhi + carry and the second ideal-input
+ // points to the result of adding low-halves.
+ // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
+ if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
+ n->in(1)->bottom_type()->base() == Type::Int &&
+ // See if the ADD is involved in a tight data loop the wrong way
+ n->in(2)->is_Phi() &&
+ n->in(2)->in(2) == n ) {
+ Node *tmp = n->in(1);
+ n->set_req( 1, n->in(2) );
+ n->set_req( 2, tmp );
+ }
+ // Defined value interferes with all inputs
+ uint lidx = n2lidx(n->in(idx));
+ for( uint k = 1; k < n->req(); k++ ) {
+ uint kidx = n2lidx(n->in(k));
+ if( kidx != lidx )
+ _ifg->add_edge( r, kidx );
+ }
+ }
+ } // End of forall instructions in block
+ } // End of forall blocks
+}
+
+//------------------------------count_int_pressure-----------------------------
+uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
+ IndexSetIterator elements(liveout);
+ uint lidx;
+ uint cnt = 0;
+ while ((lidx = elements.next()) != 0) {
+ if( lrgs(lidx).mask().is_UP() &&
+ lrgs(lidx).mask_size() &&
+ !lrgs(lidx)._is_float &&
+ lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) )
+ cnt += lrgs(lidx).reg_pressure();
+ }
+ return cnt;
+}
+
+//------------------------------count_float_pressure---------------------------
+uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
+ IndexSetIterator elements(liveout);
+ uint lidx;
+ uint cnt = 0;
+ while ((lidx = elements.next()) != 0) {
+ if( lrgs(lidx).mask().is_UP() &&
+ lrgs(lidx).mask_size() &&
+ lrgs(lidx)._is_float )
+ cnt += lrgs(lidx).reg_pressure();
+ }
+ return cnt;
+}
+
+//------------------------------lower_pressure---------------------------------
+// Adjust register pressure down by 1. Capture last hi-to-low transition,
+static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
+ if( lrg->mask().is_UP() && lrg->mask_size() ) {
+ if( lrg->_is_float ) {
+ pressure[1] -= lrg->reg_pressure();
+ if( pressure[1] == (uint)FLOATPRESSURE ) {
+ hrp_index[1] = where;
+#ifdef EXACT_PRESSURE
+ if( pressure[1] > b->_freg_pressure )
+ b->_freg_pressure = pressure[1]+1;
+#else
+ b->_freg_pressure = (uint)FLOATPRESSURE+1;
+#endif
+ }
+ } else if( lrg->mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+ pressure[0] -= lrg->reg_pressure();
+ if( pressure[0] == (uint)INTPRESSURE ) {
+ hrp_index[0] = where;
+#ifdef EXACT_PRESSURE
+ if( pressure[0] > b->_reg_pressure )
+ b->_reg_pressure = pressure[0]+1;
+#else
+ b->_reg_pressure = (uint)INTPRESSURE+1;
+#endif
+ }
+ }
+ }
+}
+
+//------------------------------build_ifg_physical-----------------------------
+// Build the interference graph using physical registers when available.
+// That is, if 2 live ranges are simultaneously alive but in their acceptable
+// register sets do not overlap, then they do not interfere.
+uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
+ NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); )
+
+ uint spill_reg = LRG::SPILL_REG;
+ uint must_spill = 0;
+
+ // For all blocks (in any order) do...
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+ // Clone (rather than smash in place) the liveout info, so it is alive
+ // for the "collect_gc_info" phase later.
+ IndexSet liveout(_live->live(b));
+ uint last_inst = b->end_idx();
+ // Compute last phi index
+ uint last_phi;
+ for( last_phi = 1; last_phi < last_inst; last_phi++ )
+ if( !b->_nodes[last_phi]->is_Phi() )
+ break;
+
+ // Reset block's register pressure values for each ifg construction
+ uint pressure[2], hrp_index[2];
+ pressure[0] = pressure[1] = 0;
+ hrp_index[0] = hrp_index[1] = last_inst+1;
+ b->_reg_pressure = b->_freg_pressure = 0;
+ // Liveout things are presumed live for the whole block. We accumulate
+ // 'area' accordingly. If they get killed in the block, we'll subtract
+ // the unused part of the block from the area.
+ double cost = b->_freq * double(last_inst-last_phi);
+ assert( cost >= 0, "negative spill cost" );
+ IndexSetIterator elements(&liveout);
+ uint lidx;
+ while ((lidx = elements.next()) != 0) {
+ LRG &lrg = lrgs(lidx);
+ lrg._area += cost;
+ // Compute initial register pressure
+ if( lrg.mask().is_UP() && lrg.mask_size() ) {
+ if( lrg._is_float ) { // Count float pressure
+ pressure[1] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+ if( pressure[1] > b->_freg_pressure )
+ b->_freg_pressure = pressure[1];
+#endif
+ // Count int pressure, but do not count the SP, flags
+ } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+ pressure[0] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+ if( pressure[0] > b->_reg_pressure )
+ b->_reg_pressure = pressure[0];
+#endif
+ }
+ }
+ }
+ assert( pressure[0] == count_int_pressure (&liveout), "" );
+ assert( pressure[1] == count_float_pressure(&liveout), "" );
+
+ // The IFG is built by a single reverse pass over each basic block.
+ // Starting with the known live-out set, we remove things that get
+ // defined and add things that become live (essentially executing one
+ // pass of a standard LIVE analysis). Just before a Node defines a value
+ // (and removes it from the live-ness set) that value is certainly live.
+ // The defined value interferes with everything currently live. The
+ // value is then removed from the live-ness set and it's inputs are added
+ // to the live-ness set.
+ uint j;
+ for( j = last_inst + 1; j > 1; j-- ) {
+ Node *n = b->_nodes[j - 1];
+
+ // Get value being defined
+ uint r = n2lidx(n);
+
+ // Some special values do not allocate
+ if( r ) {
+ // A DEF normally costs block frequency; rematerialized values are
+ // removed from the DEF sight, so LOWER costs here.
+ lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq;
+
+ // If it is not live, then this instruction is dead. Probably caused
+ // by spilling and rematerialization. Who cares why, yank this baby.
+ if( !liveout.member(r) && n->Opcode() != Op_SafePoint ) {
+ Node *def = n->in(0);
+ if( !n->is_Proj() ||
+ // Could also be a flags-projection of a dead ADD or such.
+ (n2lidx(def) && !liveout.member(n2lidx(def)) ) ) {
+ b->_nodes.remove(j - 1);
+ if( lrgs(r)._def == n ) lrgs(r)._def = 0;
+ n->disconnect_inputs(NULL);
+ _cfg._bbs.map(n->_idx,NULL);
+ n->replace_by(C->top());
+ // Since yanking a Node from block, high pressure moves up one
+ hrp_index[0]--;
+ hrp_index[1]--;
+ continue;
+ }
+
+ // Fat-projections kill many registers which cannot be used to
+ // hold live ranges.
+ if( lrgs(r)._fat_proj ) {
+ // Count the int-only registers
+ RegMask itmp = lrgs(r).mask();
+ itmp.AND(*Matcher::idealreg2regmask[Op_RegI]);
+ int iregs = itmp.Size();
+#ifdef EXACT_PRESSURE
+ if( pressure[0]+iregs > b->_reg_pressure )
+ b->_reg_pressure = pressure[0]+iregs;
+#endif
+ if( pressure[0] <= (uint)INTPRESSURE &&
+ pressure[0]+iregs > (uint)INTPRESSURE ) {
+#ifndef EXACT_PRESSURE
+ b->_reg_pressure = (uint)INTPRESSURE+1;
+#endif
+ hrp_index[0] = j-1;
+ }
+ // Count the float-only registers
+ RegMask ftmp = lrgs(r).mask();
+ ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]);
+ int fregs = ftmp.Size();
+#ifdef EXACT_PRESSURE
+ if( pressure[1]+fregs > b->_freg_pressure )
+ b->_freg_pressure = pressure[1]+fregs;
+#endif
+ if( pressure[1] <= (uint)FLOATPRESSURE &&
+ pressure[1]+fregs > (uint)FLOATPRESSURE ) {
+#ifndef EXACT_PRESSURE
+ b->_freg_pressure = (uint)FLOATPRESSURE+1;
+#endif
+ hrp_index[1] = j-1;
+ }
+ }
+
+ } else { // Else it is live
+ // A DEF also ends 'area' partway through the block.
+ lrgs(r)._area -= cost;
+ assert( lrgs(r)._area >= 0, "negative spill area" );
+
+ // Insure high score for immediate-use spill copies so they get a color
+ if( n->is_SpillCopy()
+ && lrgs(r)._def != NodeSentinel // MultiDef live range can still split
+ && n->outcnt() == 1 // and use must be in this block
+ && _cfg._bbs[n->unique_out()->_idx] == b ) {
+ // All single-use MachSpillCopy(s) that immediately precede their
+ // use must color early. If a longer live range steals their
+ // color, the spill copy will split and may push another spill copy
+ // further away resulting in an infinite spill-split-retry cycle.
+ // Assigning a zero area results in a high score() and a good
+ // location in the simplify list.
+ //
+
+ Node *single_use = n->unique_out();
+ assert( b->find_node(single_use) >= j, "Use must be later in block");
+ // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
+
+ // Find first non SpillCopy 'm' that follows the current instruction
+ // (j - 1) is index for current instruction 'n'
+ Node *m = n;
+ for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; }
+ if( m == single_use ) {
+ lrgs(r)._area = 0.0;
+ }
+ }
+
+ // Remove from live-out set
+ if( liveout.remove(r) ) {
+ // Adjust register pressure.
+ // Capture last hi-to-lo pressure transition
+ lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index );
+ assert( pressure[0] == count_int_pressure (&liveout), "" );
+ assert( pressure[1] == count_float_pressure(&liveout), "" );
+ }
+
+ // Copies do not define a new value and so do not interfere.
+ // Remove the copies source from the liveout set before interfering.
+ uint idx = n->is_Copy();
+ if( idx ) {
+ uint x = n2lidx(n->in(idx));
+ if( liveout.remove( x ) ) {
+ lrgs(x)._area -= cost;
+ // Adjust register pressure.
+ lower_pressure( &lrgs(x), j-1, b, pressure, hrp_index );
+ assert( pressure[0] == count_int_pressure (&liveout), "" );
+ assert( pressure[1] == count_float_pressure(&liveout), "" );
+ }
+ }
+ } // End of if live or not
+
+ // Interfere with everything live. If the defined value must
+ // go in a particular register, just remove that register from
+ // all conflicting parties and avoid the interference.
+
+ // Make exclusions for rematerializable defs. Since rematerializable
+ // DEFs are not bound but the live range is, some uses must be bound.
+ // If we spill live range 'r', it can rematerialize at each use site
+ // according to its bindings.
+ const RegMask &rmask = lrgs(r).mask();
+ if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
+ // Smear odd bits; leave only aligned pairs of bits.
+ RegMask r2mask = rmask;
+ r2mask.SmearToPairs();
+ // Check for common case
+ int r_size = lrgs(r).num_regs();
+ OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
+
+ IndexSetIterator elements(&liveout);
+ uint l;
+ while ((l = elements.next()) != 0) {
+ LRG &lrg = lrgs(l);
+ // If 'l' must spill already, do not further hack his bits.
+ // He'll get some interferences and be forced to spill later.
+ if( lrg._must_spill ) continue;
+ // Remove bound register(s) from 'l's choices
+ RegMask old = lrg.mask();
+ uint old_size = lrg.mask_size();
+ // Remove the bits from LRG 'r' from LRG 'l' so 'l' no
+ // longer interferes with 'r'. If 'l' requires aligned
+ // adjacent pairs, subtract out bit pairs.
+ if( lrg.num_regs() == 2 && !lrg._fat_proj ) {
+ lrg.SUBTRACT( r2mask );
+ lrg.compute_set_mask_size();
+ } else if( r_size != 1 ) {
+ lrg.SUBTRACT( rmask );
+ lrg.compute_set_mask_size();
+ } else { // Common case: size 1 bound removal
+ if( lrg.mask().Member(r_reg) ) {
+ lrg.Remove(r_reg);
+ lrg.set_mask_size(lrg.mask().is_AllStack() ? 65535:old_size-1);
+ }
+ }
+ // If 'l' goes completely dry, it must spill.
+ if( lrg.not_free() ) {
+ // Give 'l' some kind of reasonable mask, so he picks up
+ // interferences (and will spill later).
+ lrg.set_mask( old );
+ lrg.set_mask_size(old_size);
+ must_spill++;
+ lrg._must_spill = 1;
+ lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
+ }
+ }
+ } // End of if bound
+
+ // Now interference with everything that is live and has
+ // compatible register sets.
+ interfere_with_live(r,&liveout);
+
+ } // End of if normal register-allocated value
+
+ cost -= b->_freq; // Area remaining in the block
+ if( cost < 0.0 ) cost = 0.0; // Cost goes negative in the Phi area
+
+ // Make all inputs live
+ if( !n->is_Phi() ) { // Phi function uses come from prior block
+ JVMState* jvms = n->jvms();
+ uint debug_start = jvms ? jvms->debug_start() : 999999;
+ // Start loop at 1 (skip control edge) for most Nodes.
+ // SCMemProj's might be the sole use of a StoreLConditional.
+ // While StoreLConditionals set memory (the SCMemProj use)
+ // they also def flags; if that flag def is unused the
+ // allocator sees a flag-setting instruction with no use of
+ // the flags and assumes it's dead. This keeps the (useless)
+ // flag-setting behavior alive while also keeping the (useful)
+ // memory update effect.
+ for( uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++ ) {
+ Node *def = n->in(k);
+ uint x = n2lidx(def);
+ if( !x ) continue;
+ LRG &lrg = lrgs(x);
+ // No use-side cost for spilling debug info
+ if( k < debug_start )
+ // A USE costs twice block frequency (once for the Load, once
+ // for a Load-delay). Rematerialized uses only cost once.
+ lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq));
+ // It is live now
+ if( liveout.insert( x ) ) {
+ // Newly live things assumed live from here to top of block
+ lrg._area += cost;
+ // Adjust register pressure
+ if( lrg.mask().is_UP() && lrg.mask_size() ) {
+ if( lrg._is_float ) {
+ pressure[1] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+ if( pressure[1] > b->_freg_pressure )
+ b->_freg_pressure = pressure[1];
+#endif
+ } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+ pressure[0] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+ if( pressure[0] > b->_reg_pressure )
+ b->_reg_pressure = pressure[0];
+#endif
+ }
+ }
+ assert( pressure[0] == count_int_pressure (&liveout), "" );
+ assert( pressure[1] == count_float_pressure(&liveout), "" );
+ }
+ assert( lrg._area >= 0, "negative spill area" );
+ }
+ }
+ } // End of reverse pass over all instructions in block
+
+ // If we run off the top of the block with high pressure and
+ // never see a hi-to-low pressure transition, just record that
+ // the whole block is high pressure.
+ if( pressure[0] > (uint)INTPRESSURE ) {
+ hrp_index[0] = 0;
+#ifdef EXACT_PRESSURE
+ if( pressure[0] > b->_reg_pressure )
+ b->_reg_pressure = pressure[0];
+#else
+ b->_reg_pressure = (uint)INTPRESSURE+1;
+#endif
+ }
+ if( pressure[1] > (uint)FLOATPRESSURE ) {
+ hrp_index[1] = 0;
+#ifdef EXACT_PRESSURE
+ if( pressure[1] > b->_freg_pressure )
+ b->_freg_pressure = pressure[1];
+#else
+ b->_freg_pressure = (uint)FLOATPRESSURE+1;
+#endif
+ }
+
+ // Compute high pressure indice; avoid landing in the middle of projnodes
+ j = hrp_index[0];
+ if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
+ Node *cur = b->_nodes[j];
+ while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+ j--;
+ cur = b->_nodes[j];
+ }
+ }
+ b->_ihrp_index = j;
+ j = hrp_index[1];
+ if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
+ Node *cur = b->_nodes[j];
+ while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+ j--;
+ cur = b->_nodes[j];
+ }
+ }
+ b->_fhrp_index = j;
+
+#ifndef PRODUCT
+ // Gather Register Pressure Statistics
+ if( PrintOptoStatistics ) {
+ if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE )
+ _high_pressure++;
+ else
+ _low_pressure++;
+ }
+#endif
+ } // End of for all blocks
+
+ return must_spill;
+}
diff --git a/src/share/vm/opto/ifnode.cpp b/src/share/vm/opto/ifnode.cpp
new file mode 100644
index 000000000..48031ef3d
--- /dev/null
+++ b/src/share/vm/opto/ifnode.cpp
@@ -0,0 +1,922 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_ifnode.cpp.incl"
+
+
+extern int explicit_null_checks_elided;
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Return a tuple for whichever arm of the IF is reachable
+const Type *IfNode::Value( PhaseTransform *phase ) const {
+ if( !in(0) ) return Type::TOP;
+ if( phase->type(in(0)) == Type::TOP )
+ return Type::TOP;
+ const Type *t = phase->type(in(1));
+ if( t == Type::TOP ) // data is undefined
+ return TypeTuple::IFNEITHER; // unreachable altogether
+ if( t == TypeInt::ZERO ) // zero, or false
+ return TypeTuple::IFFALSE; // only false branch is reachable
+ if( t == TypeInt::ONE ) // 1, or true
+ return TypeTuple::IFTRUE; // only true branch is reachable
+ assert( t == TypeInt::BOOL, "expected boolean type" );
+
+ return TypeTuple::IFBOTH; // No progress
+}
+
+const RegMask &IfNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+//------------------------------split_if---------------------------------------
+// Look for places where we merge constants, then test on the merged value.
+// If the IF test will be constant folded on the path with the constant, we
+// win by splitting the IF to before the merge point.
+static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
+ // I could be a lot more general here, but I'm trying to squeeze this
+ // in before the Christmas '98 break so I'm gonna be kinda restrictive
+ // on the patterns I accept. CNC
+
+ // Look for a compare of a constant and a merged value
+ Node *i1 = iff->in(1);
+ if( !i1->is_Bool() ) return NULL;
+ BoolNode *b = i1->as_Bool();
+ Node *cmp = b->in(1);
+ if( !cmp->is_Cmp() ) return NULL;
+ i1 = cmp->in(1);
+ if( i1 == NULL || !i1->is_Phi() ) return NULL;
+ PhiNode *phi = i1->as_Phi();
+ if( phi->is_copy() ) return NULL;
+ Node *con2 = cmp->in(2);
+ if( !con2->is_Con() ) return NULL;
+ // See that the merge point contains some constants
+ Node *con1=NULL;
+ uint i4;
+ for( i4 = 1; i4 < phi->req(); i4++ ) {
+ con1 = phi->in(i4);
+ if( !con1 ) return NULL; // Do not optimize partially collaped merges
+ if( con1->is_Con() ) break; // Found a constant
+ // Also allow null-vs-not-null checks
+ const TypePtr *tp = igvn->type(con1)->isa_ptr();
+ if( tp && tp->_ptr == TypePtr::NotNull )
+ break;
+ }
+ if( i4 >= phi->req() ) return NULL; // Found no constants
+
+ igvn->C->set_has_split_ifs(true); // Has chance for split-if
+
+ // Make sure that the compare can be constant folded away
+ Node *cmp2 = cmp->clone();
+ cmp2->set_req(1,con1);
+ cmp2->set_req(2,con2);
+ const Type *t = cmp2->Value(igvn);
+ // This compare is dead, so whack it!
+ igvn->remove_dead_node(cmp2);
+ if( !t->singleton() ) return NULL;
+
+ // No intervening control, like a simple Call
+ Node *r = iff->in(0);
+ if( !r->is_Region() ) return NULL;
+ if( phi->region() != r ) return NULL;
+ // No other users of the cmp/bool
+ if (b->outcnt() != 1 || cmp->outcnt() != 1) {
+ //tty->print_cr("many users of cmp/bool");
+ return NULL;
+ }
+
+ // Make sure we can determine where all the uses of merged values go
+ for (DUIterator_Fast jmax, j = r->fast_outs(jmax); j < jmax; j++) {
+ Node* u = r->fast_out(j);
+ if( u == r ) continue;
+ if( u == iff ) continue;
+ if( u->outcnt() == 0 ) continue; // use is dead & ignorable
+ if( !u->is_Phi() ) {
+ /*
+ if( u->is_Start() ) {
+ tty->print_cr("Region has inlined start use");
+ } else {
+ tty->print_cr("Region has odd use");
+ u->dump(2);
+ }*/
+ return NULL;
+ }
+ if( u != phi ) {
+ // CNC - do not allow any other merged value
+ //tty->print_cr("Merging another value");
+ //u->dump(2);
+ return NULL;
+ }
+ // Make sure we can account for all Phi uses
+ for (DUIterator_Fast kmax, k = u->fast_outs(kmax); k < kmax; k++) {
+ Node* v = u->fast_out(k); // User of the phi
+ // CNC - Allow only really simple patterns.
+ // In particular I disallow AddP of the Phi, a fairly common pattern
+ if( v == cmp ) continue; // The compare is OK
+ if( (v->is_ConstraintCast()) &&
+ v->in(0)->in(0) == iff )
+ continue; // CastPP/II of the IfNode is OK
+ // Disabled following code because I cannot tell if exactly one
+ // path dominates without a real dominator check. CNC 9/9/1999
+ //uint vop = v->Opcode();
+ //if( vop == Op_Phi ) { // Phi from another merge point might be OK
+ // Node *r = v->in(0); // Get controlling point
+ // if( !r ) return NULL; // Degraded to a copy
+ // // Find exactly one path in (either True or False doms, but not IFF)
+ // int cnt = 0;
+ // for( uint i = 1; i < r->req(); i++ )
+ // if( r->in(i) && r->in(i)->in(0) == iff )
+ // cnt++;
+ // if( cnt == 1 ) continue; // Exactly one of True or False guards Phi
+ //}
+ if( !v->is_Call() ) {
+ /*
+ if( v->Opcode() == Op_AddP ) {
+ tty->print_cr("Phi has AddP use");
+ } else if( v->Opcode() == Op_CastPP ) {
+ tty->print_cr("Phi has CastPP use");
+ } else if( v->Opcode() == Op_CastII ) {
+ tty->print_cr("Phi has CastII use");
+ } else {
+ tty->print_cr("Phi has use I cant be bothered with");
+ }
+ */
+ }
+ return NULL;
+
+ /* CNC - Cut out all the fancy acceptance tests
+ // Can we clone this use when doing the transformation?
+ // If all uses are from Phis at this merge or constants, then YES.
+ if( !v->in(0) && v != cmp ) {
+ tty->print_cr("Phi has free-floating use");
+ v->dump(2);
+ return NULL;
+ }
+ for( uint l = 1; l < v->req(); l++ ) {
+ if( (!v->in(l)->is_Phi() || v->in(l)->in(0) != r) &&
+ !v->in(l)->is_Con() ) {
+ tty->print_cr("Phi has use");
+ v->dump(2);
+ return NULL;
+ } // End of if Phi-use input is neither Phi nor Constant
+ } // End of for all inputs to Phi-use
+ */
+ } // End of for all uses of Phi
+ } // End of for all uses of Region
+
+ // Only do this if the IF node is in a sane state
+ if (iff->outcnt() != 2)
+ return NULL;
+
+ // Got a hit! Do the Mondo Hack!
+ //
+ //ABC a1c def ghi B 1 e h A C a c d f g i
+ // R - Phi - Phi - Phi Rc - Phi - Phi - Phi Rx - Phi - Phi - Phi
+ // cmp - 2 cmp - 2 cmp - 2
+ // bool bool_c bool_x
+ // if if_c if_x
+ // T F T F T F
+ // ..s.. ..t .. ..s.. ..t.. ..s.. ..t..
+ //
+ // Split the paths coming into the merge point into 2 seperate groups of
+ // merges. On the left will be all the paths feeding constants into the
+ // Cmp's Phi. On the right will be the remaining paths. The Cmp's Phi
+ // will fold up into a constant; this will let the Cmp fold up as well as
+ // all the control flow. Below the original IF we have 2 control
+ // dependent regions, 's' and 't'. Now we will merge the two paths
+ // just prior to 's' and 't' from the two IFs. At least 1 path (and quite
+ // likely 2 or more) will promptly constant fold away.
+ PhaseGVN *phase = igvn;
+
+ // Make a region merging constants and a region merging the rest
+ uint req_c = 0;
+ for (uint ii = 1; ii < r->req(); ii++) {
+ if( phi->in(ii) == con1 ) {
+ req_c++;
+ }
+ }
+ Node *region_c = new (igvn->C, req_c + 1) RegionNode(req_c + 1);
+ Node *phi_c = con1;
+ uint len = r->req();
+ Node *region_x = new (igvn->C, len - req_c + 1) RegionNode(len - req_c + 1);
+ Node *phi_x = PhiNode::make_blank(region_x, phi);
+ for (uint i = 1, i_c = 1, i_x = 1; i < len; i++) {
+ if( phi->in(i) == con1 ) {
+ region_c->init_req( i_c++, r ->in(i) );
+ } else {
+ region_x->init_req( i_x, r ->in(i) );
+ phi_x ->init_req( i_x++, phi->in(i) );
+ }
+ }
+
+ // Register the new RegionNodes but do not transform them. Cannot
+ // transform until the entire Region/Phi conglerate has been hacked
+ // as a single huge transform.
+ igvn->register_new_node_with_optimizer( region_c );
+ igvn->register_new_node_with_optimizer( region_x );
+ phi_x = phase->transform( phi_x );
+ // Prevent the untimely death of phi_x. Currently he has no uses. He is
+ // about to get one. If this only use goes away, then phi_x will look dead.
+ // However, he will be picking up some more uses down below.
+ Node *hook = new (igvn->C, 4) Node(4);
+ hook->init_req(0, phi_x);
+ hook->init_req(1, phi_c);
+
+ // Make the compare
+ Node *cmp_c = phase->makecon(t);
+ Node *cmp_x = cmp->clone();
+ cmp_x->set_req(1,phi_x);
+ cmp_x->set_req(2,con2);
+ cmp_x = phase->transform(cmp_x);
+ // Make the bool
+ Node *b_c = phase->transform(new (igvn->C, 2) BoolNode(cmp_c,b->_test._test));
+ Node *b_x = phase->transform(new (igvn->C, 2) BoolNode(cmp_x,b->_test._test));
+ // Make the IfNode
+ IfNode *iff_c = new (igvn->C, 2) IfNode(region_c,b_c,iff->_prob,iff->_fcnt);
+ igvn->set_type_bottom(iff_c);
+ igvn->_worklist.push(iff_c);
+ hook->init_req(2, iff_c);
+
+ IfNode *iff_x = new (igvn->C, 2) IfNode(region_x,b_x,iff->_prob, iff->_fcnt);
+ igvn->set_type_bottom(iff_x);
+ igvn->_worklist.push(iff_x);
+ hook->init_req(3, iff_x);
+
+ // Make the true/false arms
+ Node *iff_c_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_c));
+ Node *iff_c_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_c));
+ Node *iff_x_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_x));
+ Node *iff_x_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_x));
+
+ // Merge the TRUE paths
+ Node *region_s = new (igvn->C, 3) RegionNode(3);
+ igvn->_worklist.push(region_s);
+ region_s->init_req(1, iff_c_t);
+ region_s->init_req(2, iff_x_t);
+ igvn->register_new_node_with_optimizer( region_s );
+
+ // Merge the FALSE paths
+ Node *region_f = new (igvn->C, 3) RegionNode(3);
+ igvn->_worklist.push(region_f);
+ region_f->init_req(1, iff_c_f);
+ region_f->init_req(2, iff_x_f);
+ igvn->register_new_node_with_optimizer( region_f );
+
+ igvn->hash_delete(cmp);// Remove soon-to-be-dead node from hash table.
+ cmp->set_req(1,NULL); // Whack the inputs to cmp because it will be dead
+ cmp->set_req(2,NULL);
+ // Check for all uses of the Phi and give them a new home.
+ // The 'cmp' got cloned, but CastPP/IIs need to be moved.
+ Node *phi_s = NULL; // do not construct unless needed
+ Node *phi_f = NULL; // do not construct unless needed
+ for (DUIterator_Last i2min, i2 = phi->last_outs(i2min); i2 >= i2min; --i2) {
+ Node* v = phi->last_out(i2);// User of the phi
+ igvn->hash_delete(v); // Have to fixup other Phi users
+ igvn->_worklist.push(v);
+ uint vop = v->Opcode();
+ Node *proj = NULL;
+ if( vop == Op_Phi ) { // Remote merge point
+ Node *r = v->in(0);
+ for (uint i3 = 1; i3 < r->req(); i3++)
+ if (r->in(i3) && r->in(i3)->in(0) == iff) {
+ proj = r->in(i3);
+ break;
+ }
+ } else if( v->is_ConstraintCast() ) {
+ proj = v->in(0); // Controlling projection
+ } else {
+ assert( 0, "do not know how to handle this guy" );
+ }
+
+ Node *proj_path_data, *proj_path_ctrl;
+ if( proj->Opcode() == Op_IfTrue ) {
+ if( phi_s == NULL ) {
+ // Only construct phi_s if needed, otherwise provides
+ // interfering use.
+ phi_s = PhiNode::make_blank(region_s,phi);
+ phi_s->init_req( 1, phi_c );
+ phi_s->init_req( 2, phi_x );
+ phi_s = phase->transform(phi_s);
+ }
+ proj_path_data = phi_s;
+ proj_path_ctrl = region_s;
+ } else {
+ if( phi_f == NULL ) {
+ // Only construct phi_f if needed, otherwise provides
+ // interfering use.
+ phi_f = PhiNode::make_blank(region_f,phi);
+ phi_f->init_req( 1, phi_c );
+ phi_f->init_req( 2, phi_x );
+ phi_f = phase->transform(phi_f);
+ }
+ proj_path_data = phi_f;
+ proj_path_ctrl = region_f;
+ }
+
+ // Fixup 'v' for for the split
+ if( vop == Op_Phi ) { // Remote merge point
+ uint i;
+ for( i = 1; i < v->req(); i++ )
+ if( v->in(i) == phi )
+ break;
+ v->set_req(i, proj_path_data );
+ } else if( v->is_ConstraintCast() ) {
+ v->set_req(0, proj_path_ctrl );
+ v->set_req(1, proj_path_data );
+ } else
+ ShouldNotReachHere();
+ }
+
+ // Now replace the original iff's True/False with region_s/region_t.
+ // This makes the original iff go dead.
+ for (DUIterator_Last i3min, i3 = iff->last_outs(i3min); i3 >= i3min; --i3) {
+ Node* p = iff->last_out(i3);
+ assert( p->Opcode() == Op_IfTrue || p->Opcode() == Op_IfFalse, "" );
+ Node *u = (p->Opcode() == Op_IfTrue) ? region_s : region_f;
+ // Replace p with u
+ igvn->add_users_to_worklist(p);
+ for (DUIterator_Last lmin, l = p->last_outs(lmin); l >= lmin;) {
+ Node* x = p->last_out(l);
+ igvn->hash_delete(x);
+ uint uses_found = 0;
+ for( uint j = 0; j < x->req(); j++ ) {
+ if( x->in(j) == p ) {
+ x->set_req(j, u);
+ uses_found++;
+ }
+ }
+ l -= uses_found; // we deleted 1 or more copies of this edge
+ }
+ igvn->remove_dead_node(p);
+ }
+
+ // Force the original merge dead
+ igvn->hash_delete(r);
+ r->set_req_X(0,NULL,igvn);
+
+ // Now remove the bogus extra edges used to keep things alive
+ igvn->remove_dead_node( hook );
+
+ // Must return either the original node (now dead) or a new node
+ // (Do not return a top here, since that would break the uniqueness of top.)
+ return new (igvn->C, 1) ConINode(TypeInt::ZERO);
+}
+
+//------------------------------is_range_check---------------------------------
+// Return 0 if not a range check. Return 1 if a range check and set index and
+// offset. Return 2 if we had to negate the test. Index is NULL if the check
+// is versus a constant.
+int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
+ Node* b = in(1);
+ if (b == NULL || !b->is_Bool()) return 0;
+ BoolNode* bn = b->as_Bool();
+ Node* cmp = bn->in(1);
+ if (cmp == NULL) return 0;
+ if (cmp->Opcode() != Op_CmpU) return 0;
+
+ Node* l = cmp->in(1);
+ Node* r = cmp->in(2);
+ int flip_test = 1;
+ if (bn->_test._test == BoolTest::le) {
+ l = cmp->in(2);
+ r = cmp->in(1);
+ flip_test = 2;
+ } else if (bn->_test._test != BoolTest::lt) {
+ return 0;
+ }
+ if (l->is_top()) return 0; // Top input means dead test
+ if (r->Opcode() != Op_LoadRange) return 0;
+
+ // We have recognized one of these forms:
+ // Flip 1: If (Bool[<] CmpU(l, LoadRange)) ...
+ // Flip 2: If (Bool[<=] CmpU(LoadRange, l)) ...
+
+ // Make sure it's a real range check by requiring an uncommon trap
+ // along the OOB path. Otherwise, it's possible that the user wrote
+ // something which optimized to look like a range check but behaves
+ // in some other way.
+ Node* iftrap = proj_out(flip_test == 2 ? true : false);
+ bool found_trap = false;
+ if (iftrap != NULL) {
+ Node* u = iftrap->unique_ctrl_out();
+ if (u != NULL) {
+ // It could be a merge point (Region) for uncommon trap.
+ if (u->is_Region()) {
+ Node* c = u->unique_ctrl_out();
+ if (c != NULL) {
+ iftrap = u;
+ u = c;
+ }
+ }
+ if (u->in(0) == iftrap && u->is_CallStaticJava()) {
+ int req = u->as_CallStaticJava()->uncommon_trap_request();
+ if (Deoptimization::trap_request_reason(req) ==
+ Deoptimization::Reason_range_check) {
+ found_trap = true;
+ }
+ }
+ }
+ }
+ if (!found_trap) return 0; // sorry, no cigar
+
+ // Look for index+offset form
+ Node* ind = l;
+ jint off = 0;
+ if (l->is_top()) {
+ return 0;
+ } else if (l->is_Add()) {
+ if ((off = l->in(1)->find_int_con(0)) != 0) {
+ ind = l->in(2);
+ } else if ((off = l->in(2)->find_int_con(0)) != 0) {
+ ind = l->in(1);
+ }
+ } else if ((off = l->find_int_con(-1)) >= 0) {
+ // constant offset with no variable index
+ ind = NULL;
+ } else {
+ // variable index with no constant offset (or dead negative index)
+ off = 0;
+ }
+
+ // Return all the values:
+ index = ind;
+ offset = off;
+ range = r;
+ return flip_test;
+}
+
+//------------------------------adjust_check-----------------------------------
+// Adjust (widen) a prior range check
+static void adjust_check(Node* proj, Node* range, Node* index,
+ int flip, jint off_lo, PhaseIterGVN* igvn) {
+ PhaseGVN *gvn = igvn;
+ // Break apart the old check
+ Node *iff = proj->in(0);
+ Node *bol = iff->in(1);
+ if( bol->is_top() ) return; // In case a partially dead range check appears
+ // bail (or bomb[ASSERT/DEBUG]) if NOT projection-->IfNode-->BoolNode
+ DEBUG_ONLY( if( !bol->is_Bool() ) { proj->dump(3); fatal("Expect projection-->IfNode-->BoolNode"); } )
+ if( !bol->is_Bool() ) return;
+
+ Node *cmp = bol->in(1);
+ // Compute a new check
+ Node *new_add = gvn->intcon(off_lo);
+ if( index ) {
+ new_add = off_lo ? gvn->transform(new (gvn->C, 3) AddINode( index, new_add )) : index;
+ }
+ Node *new_cmp = (flip == 1)
+ ? new (gvn->C, 3) CmpUNode( new_add, range )
+ : new (gvn->C, 3) CmpUNode( range, new_add );
+ new_cmp = gvn->transform(new_cmp);
+ // See if no need to adjust the existing check
+ if( new_cmp == cmp ) return;
+ // Else, adjust existing check
+ Node *new_bol = gvn->transform( new (gvn->C, 2) BoolNode( new_cmp, bol->as_Bool()->_test._test ) );
+ igvn->hash_delete( iff );
+ iff->set_req_X( 1, new_bol, igvn );
+}
+
+//------------------------------up_one_dom-------------------------------------
+// Walk up the dominator tree one step. Return NULL at root or true
+// complex merges. Skips through small diamonds.
+Node* IfNode::up_one_dom(Node *curr, bool linear_only) {
+ Node *dom = curr->in(0);
+ if( !dom ) // Found a Region degraded to a copy?
+ return curr->nonnull_req(); // Skip thru it
+
+ if( curr != dom ) // Normal walk up one step?
+ return dom;
+
+ // Use linear_only if we are still parsing, since we cannot
+ // trust the regions to be fully filled in.
+ if (linear_only)
+ return NULL;
+
+ // Else hit a Region. Check for a loop header
+ if( dom->is_Loop() )
+ return dom->in(1); // Skip up thru loops
+
+ // Check for small diamonds
+ Node *din1, *din2, *din3, *din4;
+ if( dom->req() == 3 && // 2-path merge point
+ (din1 = dom ->in(1)) && // Left path exists
+ (din2 = dom ->in(2)) && // Right path exists
+ (din3 = din1->in(0)) && // Left path up one
+ (din4 = din2->in(0)) ) { // Right path up one
+ if( din3->is_Call() && // Handle a slow-path call on either arm
+ (din3 = din3->in(0)) )
+ din3 = din3->in(0);
+ if( din4->is_Call() && // Handle a slow-path call on either arm
+ (din4 = din4->in(0)) )
+ din4 = din4->in(0);
+ if( din3 == din4 && din3->is_If() )
+ return din3; // Skip around diamonds
+ }
+
+ // Give up the search at true merges
+ return NULL; // Dead loop? Or hit root?
+}
+
+//------------------------------remove_useless_bool----------------------------
+// Check for people making a useless boolean: things like
+// if( (x < y ? true : false) ) { ... }
+// Replace with if( x < y ) { ... }
+static Node *remove_useless_bool(IfNode *iff, PhaseGVN *phase) {
+ Node *i1 = iff->in(1);
+ if( !i1->is_Bool() ) return NULL;
+ BoolNode *bol = i1->as_Bool();
+
+ Node *cmp = bol->in(1);
+ if( cmp->Opcode() != Op_CmpI ) return NULL;
+
+ // Must be comparing against a bool
+ const Type *cmp2_t = phase->type( cmp->in(2) );
+ if( cmp2_t != TypeInt::ZERO &&
+ cmp2_t != TypeInt::ONE )
+ return NULL;
+
+ // Find a prior merge point merging the boolean
+ i1 = cmp->in(1);
+ if( !i1->is_Phi() ) return NULL;
+ PhiNode *phi = i1->as_Phi();
+ if( phase->type( phi ) != TypeInt::BOOL )
+ return NULL;
+
+ // Check for diamond pattern
+ int true_path = phi->is_diamond_phi();
+ if( true_path == 0 ) return NULL;
+
+ // phi->region->if_proj->ifnode->bool->cmp
+ BoolNode *bol2 = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();
+
+ // Now get the 'sense' of the test correct so we can plug in
+ // either iff2->in(1) or its complement.
+ int flip = 0;
+ if( bol->_test._test == BoolTest::ne ) flip = 1-flip;
+ else if( bol->_test._test != BoolTest::eq ) return NULL;
+ if( cmp2_t == TypeInt::ZERO ) flip = 1-flip;
+
+ const Type *phi1_t = phase->type( phi->in(1) );
+ const Type *phi2_t = phase->type( phi->in(2) );
+ // Check for Phi(0,1) and flip
+ if( phi1_t == TypeInt::ZERO ) {
+ if( phi2_t != TypeInt::ONE ) return NULL;
+ flip = 1-flip;
+ } else {
+ // Check for Phi(1,0)
+ if( phi1_t != TypeInt::ONE ) return NULL;
+ if( phi2_t != TypeInt::ZERO ) return NULL;
+ }
+ if( true_path == 2 ) {
+ flip = 1-flip;
+ }
+
+ Node* new_bol = (flip ? phase->transform( bol2->negate(phase) ) : bol2);
+ iff->set_req(1, new_bol);
+ // Intervening diamond probably goes dead
+ phase->C->set_major_progress();
+ return iff;
+}
+
+static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff);
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Strip out
+// control copies
+Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (remove_dead_region(phase, can_reshape)) return this;
+ // No Def-Use info?
+ if (!can_reshape) return NULL;
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+
+ // Don't bother trying to transform a dead if
+ if (in(0)->is_top()) return NULL;
+ // Don't bother trying to transform an if with a dead test
+ if (in(1)->is_top()) return NULL;
+ // Another variation of a dead test
+ if (in(1)->is_Con()) return NULL;
+ // Another variation of a dead if
+ if (outcnt() < 2) return NULL;
+
+ // Canonicalize the test.
+ Node* idt_if = idealize_test(phase, this);
+ if (idt_if != NULL) return idt_if;
+
+ // Try to split the IF
+ Node *s = split_if(this, igvn);
+ if (s != NULL) return s;
+
+ // Check for people making a useless boolean: things like
+ // if( (x < y ? true : false) ) { ... }
+ // Replace with if( x < y ) { ... }
+ Node *bol2 = remove_useless_bool(this, phase);
+ if( bol2 ) return bol2;
+
+ // Setup to scan up the CFG looking for a dominating test
+ Node *dom = in(0);
+ Node *prev_dom = this;
+
+ // Check for range-check vs other kinds of tests
+ Node *index1, *range1;
+ jint offset1;
+ int flip1 = is_range_check(range1, index1, offset1);
+ if( flip1 ) {
+ Node *first_prev_dom = NULL;
+
+ // Try to remove extra range checks. All 'up_one_dom' gives up at merges
+ // so all checks we inspect post-dominate the top-most check we find.
+ // If we are going to fail the current check and we reach the top check
+ // then we are guarenteed to fail, so just start interpreting there.
+ // We 'expand' the top 2 range checks to include all post-dominating
+ // checks.
+
+ // The top 2 range checks seen
+ Node *prev_chk1 = NULL;
+ Node *prev_chk2 = NULL;
+ // Low and high offsets seen so far
+ jint off_lo = offset1;
+ jint off_hi = offset1;
+
+ // Scan for the top 2 checks and collect range of offsets
+ for( int dist = 0; dist < 999; dist++ ) { // Range-Check scan limit
+ if( dom->Opcode() == Op_If && // Not same opcode?
+ prev_dom->in(0) == dom ) { // One path of test does dominate?
+ if( dom == this ) return NULL; // dead loop
+ // See if this is a range check
+ Node *index2, *range2;
+ jint offset2;
+ int flip2 = dom->as_If()->is_range_check(range2, index2, offset2);
+ // See if this is a _matching_ range check, checking against
+ // the same array bounds.
+ if( flip2 == flip1 && range2 == range1 && index2 == index1 &&
+ dom->outcnt() == 2 ) {
+ // Gather expanded bounds
+ off_lo = MIN2(off_lo,offset2);
+ off_hi = MAX2(off_hi,offset2);
+ // Record top 2 range checks
+ prev_chk2 = prev_chk1;
+ prev_chk1 = prev_dom;
+ // If we match the test exactly, then the top test covers
+ // both our lower and upper bounds.
+ if( dom->in(1) == in(1) )
+ prev_chk2 = prev_chk1;
+ }
+ }
+ prev_dom = dom;
+ dom = up_one_dom( dom );
+ if( !dom ) break;
+ }
+
+
+ // Attempt to widen the dominating range check to cover some later
+ // ones. Since range checks "fail" by uncommon-trapping to the
+ // interpreter, widening a check can make us speculative enter the
+ // interpreter. If we see range-check deopt's, do not widen!
+ if (!phase->C->allow_range_check_smearing()) return NULL;
+
+ // Constant indices only need to check the upper bound.
+ // Non-constance indices must check both low and high.
+ if( index1 ) {
+ // Didn't find 2 prior covering checks, so cannot remove anything.
+ if( !prev_chk2 ) return NULL;
+ // 'Widen' the offsets of the 1st and 2nd covering check
+ adjust_check( prev_chk1, range1, index1, flip1, off_lo, igvn );
+ // Do not call adjust_check twice on the same projection
+ // as the first call may have transformed the BoolNode to a ConI
+ if( prev_chk1 != prev_chk2 ) {
+ adjust_check( prev_chk2, range1, index1, flip1, off_hi, igvn );
+ }
+ // Test is now covered by prior checks, dominate it out
+ prev_dom = prev_chk2;
+ } else {
+ // Didn't find prior covering check, so cannot remove anything.
+ if( !prev_chk1 ) return NULL;
+ // 'Widen' the offset of the 1st and only covering check
+ adjust_check( prev_chk1, range1, index1, flip1, off_hi, igvn );
+ // Test is now covered by prior checks, dominate it out
+ prev_dom = prev_chk1;
+ }
+
+
+ } else { // Scan for an equivalent test
+
+ Node *cmp;
+ int dist = 0; // Cutoff limit for search
+ int op = Opcode();
+ if( op == Op_If &&
+ (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
+ if( cmp->in(2) != NULL && // make sure cmp is not already dead
+ cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
+ dist = 64; // Limit for null-pointer scans
+ } else {
+ dist = 4; // Do not bother for random pointer tests
+ }
+ } else {
+ dist = 4; // Limit for random junky scans
+ }
+
+ // Normal equivalent-test check.
+ if( !dom ) return NULL; // Dead loop?
+
+ // Search up the dominator tree for an If with an identical test
+ while( dom->Opcode() != op || // Not same opcode?
+ dom->in(1) != in(1) || // Not same input 1?
+ (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
+ prev_dom->in(0) != dom ) { // One path of test does not dominate?
+ if( dist < 0 ) return NULL;
+
+ dist--;
+ prev_dom = dom;
+ dom = up_one_dom( dom );
+ if( !dom ) return NULL;
+ }
+
+ // Check that we did not follow a loop back to ourselves
+ if( this == dom )
+ return NULL;
+
+ if( dist > 2 ) // Add to count of NULL checks elided
+ explicit_null_checks_elided++;
+
+ } // End of Else scan for an equivalent test
+
+ // Hit! Remove this IF
+#ifndef PRODUCT
+ if( TraceIterativeGVN ) {
+ tty->print(" Removing IfNode: "); this->dump();
+ }
+ if( VerifyOpto && !phase->allow_progress() ) {
+ // Found an equivalent dominating test,
+ // we can not guarantee reaching a fix-point for these during iterativeGVN
+ // since intervening nodes may not change.
+ return NULL;
+ }
+#endif
+
+ // Replace dominated IfNode
+ dominated_by( prev_dom, igvn );
+
+ // Must return either the original node (now dead) or a new node
+ // (Do not return a top here, since that would break the uniqueness of top.)
+ return new (phase->C, 1) ConINode(TypeInt::ZERO);
+}
+
+//------------------------------dominated_by-----------------------------------
+void IfNode::dominated_by( Node *prev_dom, PhaseIterGVN *igvn ) {
+ igvn->hash_delete(this); // Remove self to prevent spurious V-N
+ Node *idom = in(0);
+ // Need opcode to decide which way 'this' test goes
+ int prev_op = prev_dom->Opcode();
+ Node *top = igvn->C->top(); // Shortcut to top
+
+ // Now walk the current IfNode's projections.
+ // Loop ends when 'this' has no more uses.
+ for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
+ Node *ifp = last_out(i); // Get IfTrue/IfFalse
+ igvn->add_users_to_worklist(ifp);
+ // Check which projection it is and set target.
+ // Data-target is either the dominating projection of the same type
+ // or TOP if the dominating projection is of opposite type.
+ // Data-target will be used as the new control edge for the non-CFG
+ // nodes like Casts and Loads.
+ Node *data_target = (ifp->Opcode() == prev_op ) ? prev_dom : top;
+ // Control-target is just the If's immediate dominator or TOP.
+ Node *ctrl_target = (ifp->Opcode() == prev_op ) ? idom : top;
+
+ // For each child of an IfTrue/IfFalse projection, reroute.
+ // Loop ends when projection has no more uses.
+ for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
+ Node* s = ifp->last_out(j); // Get child of IfTrue/IfFalse
+ igvn->hash_delete(s); // Yank from hash table before edge hacking
+ if( !s->depends_only_on_test() ) {
+ // Find the control input matching this def-use edge.
+ // For Regions it may not be in slot 0.
+ uint l;
+ for( l = 0; s->in(l) != ifp; l++ ) { }
+ s->set_req(l, ctrl_target);
+ } else { // Else, for control producers,
+ s->set_req(0, data_target); // Move child to data-target
+ }
+ igvn->_worklist.push(s); // Revisit collapsed Phis
+ } // End for each child of a projection
+
+ igvn->remove_dead_node(ifp);
+ } // End for each IfTrue/IfFalse child of If
+
+ // Kill the IfNode
+ igvn->remove_dead_node(this);
+}
+
+//------------------------------Identity---------------------------------------
+// If the test is constant & we match, then we are the input Control
+Node *IfTrueNode::Identity( PhaseTransform *phase ) {
+ // Can only optimize if cannot go the other way
+ const TypeTuple *t = phase->type(in(0))->is_tuple();
+ return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFTRUE )
+ ? in(0)->in(0) // IfNode control
+ : this; // no progress
+}
+
+//------------------------------dump_spec--------------------------------------
+#ifndef PRODUCT
+void IfNode::dump_spec(outputStream *st) const {
+ st->print("P=%f, C=%f",_prob,_fcnt);
+}
+#endif
+
+//------------------------------idealize_test----------------------------------
+// Try to canonicalize tests better. Peek at the Cmp/Bool/If sequence and
+// come up with a canonical sequence. Bools getting 'eq', 'gt' and 'ge' forms
+// converted to 'ne', 'le' and 'lt' forms. IfTrue/IfFalse get swapped as
+// needed.
+static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
+ assert(iff->in(0) != NULL, "If must be live");
+
+ if (iff->outcnt() != 2) return NULL; // Malformed projections.
+ Node* old_if_f = iff->proj_out(false);
+ Node* old_if_t = iff->proj_out(true);
+
+ // CountedLoopEnds want the back-control test to be TRUE, irregardless of
+ // whether they are testing a 'gt' or 'lt' condition. The 'gt' condition
+ // happens in count-down loops
+ if (iff->is_CountedLoopEnd()) return NULL;
+ if (!iff->in(1)->is_Bool()) return NULL; // Happens for partially optimized IF tests
+ BoolNode *b = iff->in(1)->as_Bool();
+ BoolTest bt = b->_test;
+ // Test already in good order?
+ if( bt.is_canonical() )
+ return NULL;
+
+ // Flip test to be canonical. Requires flipping the IfFalse/IfTrue and
+ // cloning the IfNode.
+ Node* new_b = phase->transform( new (phase->C, 2) BoolNode(b->in(1), bt.negate()) );
+ if( !new_b->is_Bool() ) return NULL;
+ b = new_b->as_Bool();
+
+ PhaseIterGVN *igvn = phase->is_IterGVN();
+ assert( igvn, "Test is not canonical in parser?" );
+
+ // The IF node never really changes, but it needs to be cloned
+ iff = new (phase->C, 2) IfNode( iff->in(0), b, 1.0-iff->_prob, iff->_fcnt);
+
+ Node *prior = igvn->hash_find_insert(iff);
+ if( prior ) {
+ igvn->remove_dead_node(iff);
+ iff = (IfNode*)prior;
+ } else {
+ // Cannot call transform on it just yet
+ igvn->set_type_bottom(iff);
+ }
+ igvn->_worklist.push(iff);
+
+ // Now handle projections. Cloning not required.
+ Node* new_if_f = (Node*)(new (phase->C, 1) IfFalseNode( iff ));
+ Node* new_if_t = (Node*)(new (phase->C, 1) IfTrueNode ( iff ));
+
+ igvn->register_new_node_with_optimizer(new_if_f);
+ igvn->register_new_node_with_optimizer(new_if_t);
+ igvn->hash_delete(old_if_f);
+ igvn->hash_delete(old_if_t);
+ // Flip test, so flip trailing control
+ igvn->subsume_node(old_if_f, new_if_t);
+ igvn->subsume_node(old_if_t, new_if_f);
+
+ // Progress
+ return iff;
+}
+
+//------------------------------Identity---------------------------------------
+// If the test is constant & we match, then we are the input Control
+Node *IfFalseNode::Identity( PhaseTransform *phase ) {
+ // Can only optimize if cannot go the other way
+ const TypeTuple *t = phase->type(in(0))->is_tuple();
+ return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFFALSE )
+ ? in(0)->in(0) // IfNode control
+ : this; // no progress
+}
diff --git a/src/share/vm/opto/indexSet.cpp b/src/share/vm/opto/indexSet.cpp
new file mode 100644
index 000000000..078315bee
--- /dev/null
+++ b/src/share/vm/opto/indexSet.cpp
@@ -0,0 +1,573 @@
+/*
+ * Copyright 1998-2004 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This file defines the IndexSet class, a set of sparse integer indices.
+// This data structure is used by the compiler in its liveness analysis and
+// during register allocation. It also defines an iterator for this class.
+
+#include "incls/_precompiled.incl"
+#include "incls/_indexSet.cpp.incl"
+
+//-------------------------------- Initializations ------------------------------
+
+IndexSet::BitBlock IndexSet::_empty_block = IndexSet::BitBlock();
+
+#ifdef ASSERT
+// Initialize statistics counters
+uint IndexSet::_alloc_new = 0;
+uint IndexSet::_alloc_total = 0;
+
+long IndexSet::_total_bits = 0;
+long IndexSet::_total_used_blocks = 0;
+long IndexSet::_total_unused_blocks = 0;
+
+// Per set, or all sets operation tracing
+int IndexSet::_serial_count = 1;
+#endif
+
+// What is the first set bit in a 5 bit integer?
+const byte IndexSetIterator::_first_bit[32] = {
+ 0, 0, 1, 0,
+ 2, 0, 1, 0,
+ 3, 0, 1, 0,
+ 2, 0, 1, 0,
+ 4, 0, 1, 0,
+ 2, 0, 1, 0,
+ 3, 0, 1, 0,
+ 2, 0, 1, 0
+};
+
+// What is the second set bit in a 5 bit integer?
+const byte IndexSetIterator::_second_bit[32] = {
+ 5, 5, 5, 1,
+ 5, 2, 2, 1,
+ 5, 3, 3, 1,
+ 3, 2, 2, 1,
+ 5, 4, 4, 1,
+ 4, 2, 2, 1,
+ 4, 3, 3, 1,
+ 3, 2, 2, 1
+};
+
+// I tried implementing the IndexSetIterator with a window_size of 8 and
+// didn't seem to get a noticeable speedup. I am leaving in the tables
+// in case we want to switch back.
+
+/*const byte IndexSetIterator::_first_bit[256] = {
+ 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+const byte IndexSetIterator::_second_bit[256] = {
+ 8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1,
+ 8, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+ 8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+ 8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
+ 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+ 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+ 8, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
+ 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+ 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+ 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
+ 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+ 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+ 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1
+};*/
+
+//---------------------------- IndexSet::populate_free_list() -----------------------------
+// Populate the free BitBlock list with a batch of BitBlocks. The BitBlocks
+// are 32 bit aligned.
+
+void IndexSet::populate_free_list() {
+ Compile *compile = Compile::current();
+ BitBlock *free = (BitBlock*)compile->indexSet_free_block_list();
+
+ char *mem = (char*)arena()->Amalloc_4(sizeof(BitBlock) *
+ bitblock_alloc_chunk_size + 32);
+
+ // Align the pointer to a 32 bit boundary.
+ BitBlock *new_blocks = (BitBlock*)(((uintptr_t)mem + 32) & ~0x001F);
+
+ // Add the new blocks to the free list.
+ for (int i = 0; i < bitblock_alloc_chunk_size; i++) {
+ new_blocks->set_next(free);
+ free = new_blocks;
+ new_blocks++;
+ }
+
+ compile->set_indexSet_free_block_list(free);
+
+#ifdef ASSERT
+ if (CollectIndexSetStatistics) {
+ _alloc_new += bitblock_alloc_chunk_size;
+ }
+#endif
+}
+
+
+//---------------------------- IndexSet::alloc_block() ------------------------
+// Allocate a BitBlock from the free list. If the free list is empty,
+// prime it.
+
+IndexSet::BitBlock *IndexSet::alloc_block() {
+#ifdef ASSERT
+ if (CollectIndexSetStatistics) {
+ _alloc_total++;
+ }
+#endif
+ Compile *compile = Compile::current();
+ BitBlock* free_list = (BitBlock*)compile->indexSet_free_block_list();
+ if (free_list == NULL) {
+ populate_free_list();
+ free_list = (BitBlock*)compile->indexSet_free_block_list();
+ }
+ BitBlock *block = free_list;
+ compile->set_indexSet_free_block_list(block->next());
+
+ block->clear();
+ return block;
+}
+
+//---------------------------- IndexSet::alloc_block_containing() -------------
+// Allocate a new BitBlock and put it into the position in the _blocks array
+// corresponding to element.
+
+IndexSet::BitBlock *IndexSet::alloc_block_containing(uint element) {
+ BitBlock *block = alloc_block();
+ uint bi = get_block_index(element);
+ _blocks[bi] = block;
+ return block;
+}
+
+//---------------------------- IndexSet::free_block() -------------------------
+// Add a BitBlock to the free list.
+
+void IndexSet::free_block(uint i) {
+ debug_only(check_watch("free block", i));
+ assert(i < _max_blocks, "block index too large");
+ BitBlock *block = _blocks[i];
+ assert(block != &_empty_block, "cannot free the empty block");
+ block->set_next((IndexSet::BitBlock*)Compile::current()->indexSet_free_block_list());
+ Compile::current()->set_indexSet_free_block_list(block);
+ set_block(i,&_empty_block);
+}
+
+//------------------------------lrg_union--------------------------------------
+// Compute the union of all elements of one and two which interfere with
+// the RegMask mask. If the degree of the union becomes exceeds
+// fail_degree, the union bails out. The underlying set is cleared before
+// the union is performed.
+
+uint IndexSet::lrg_union(uint lr1, uint lr2,
+ const uint fail_degree,
+ const PhaseIFG *ifg,
+ const RegMask &mask ) {
+ IndexSet *one = ifg->neighbors(lr1);
+ IndexSet *two = ifg->neighbors(lr2);
+ LRG &lrg1 = ifg->lrgs(lr1);
+ LRG &lrg2 = ifg->lrgs(lr2);
+#ifdef ASSERT
+ assert(_max_elements == one->_max_elements, "max element mismatch");
+ check_watch("union destination");
+ one->check_watch("union source");
+ two->check_watch("union source");
+#endif
+
+ // Compute the degree of the combined live-range. The combined
+ // live-range has the union of the original live-ranges' neighbors set as
+ // well as the neighbors of all intermediate copies, minus those neighbors
+ // that can not use the intersected allowed-register-set.
+
+ // Copy the larger set. Insert the smaller set into the larger.
+ if (two->count() > one->count()) {
+ IndexSet *temp = one;
+ one = two;
+ two = temp;
+ }
+
+ clear();
+
+ // Used to compute degree of register-only interferences. Infinite-stack
+ // neighbors do not alter colorability, as they can always color to some
+ // other color. (A variant of the Briggs assertion)
+ uint reg_degree = 0;
+
+ uint element;
+ // Load up the combined interference set with the neighbors of one
+ IndexSetIterator elements(one);
+ while ((element = elements.next()) != 0) {
+ LRG &lrg = ifg->lrgs(element);
+ if (mask.overlap(lrg.mask())) {
+ insert(element);
+ if( !lrg.mask().is_AllStack() ) {
+ reg_degree += lrg1.compute_degree(lrg);
+ if( reg_degree >= fail_degree ) return reg_degree;
+ } else {
+ // !!!!! Danger! No update to reg_degree despite having a neighbor.
+ // A variant of the Briggs assertion.
+ // Not needed if I simplify during coalesce, ala George/Appel.
+ assert( lrg.lo_degree(), "" );
+ }
+ }
+ }
+ // Add neighbors of two as well
+ IndexSetIterator elements2(two);
+ while ((element = elements2.next()) != 0) {
+ LRG &lrg = ifg->lrgs(element);
+ if (mask.overlap(lrg.mask())) {
+ if (insert(element)) {
+ if( !lrg.mask().is_AllStack() ) {
+ reg_degree += lrg2.compute_degree(lrg);
+ if( reg_degree >= fail_degree ) return reg_degree;
+ } else {
+ // !!!!! Danger! No update to reg_degree despite having a neighbor.
+ // A variant of the Briggs assertion.
+ // Not needed if I simplify during coalesce, ala George/Appel.
+ assert( lrg.lo_degree(), "" );
+ }
+ }
+ }
+ }
+
+ return reg_degree;
+}
+
+//---------------------------- IndexSet() -----------------------------
+// A deep copy constructor. This is used when you need a scratch copy of this set.
+
+IndexSet::IndexSet (IndexSet *set) {
+#ifdef ASSERT
+ _serial_number = _serial_count++;
+ set->check_watch("copied", _serial_number);
+ check_watch("initialized by copy", set->_serial_number);
+ _max_elements = set->_max_elements;
+#endif
+ _count = set->_count;
+ _max_blocks = set->_max_blocks;
+ if (_max_blocks <= preallocated_block_list_size) {
+ _blocks = _preallocated_block_list;
+ } else {
+ _blocks =
+ (IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
+ }
+ for (uint i = 0; i < _max_blocks; i++) {
+ BitBlock *block = set->_blocks[i];
+ if (block == &_empty_block) {
+ set_block(i, &_empty_block);
+ } else {
+ BitBlock *new_block = alloc_block();
+ memcpy(new_block->words(), block->words(), sizeof(uint32) * words_per_block);
+ set_block(i, new_block);
+ }
+ }
+}
+
+//---------------------------- IndexSet::initialize() -----------------------------
+// Prepare an IndexSet for use.
+
+void IndexSet::initialize(uint max_elements) {
+#ifdef ASSERT
+ _serial_number = _serial_count++;
+ check_watch("initialized", max_elements);
+ _max_elements = max_elements;
+#endif
+ _count = 0;
+ _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
+
+ if (_max_blocks <= preallocated_block_list_size) {
+ _blocks = _preallocated_block_list;
+ } else {
+ _blocks = (IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
+ }
+ for (uint i = 0; i < _max_blocks; i++) {
+ set_block(i, &_empty_block);
+ }
+}
+
+//---------------------------- IndexSet::initialize()------------------------------
+// Prepare an IndexSet for use. If it needs to allocate its _blocks array, it does
+// so from the Arena passed as a parameter. BitBlock allocation is still done from
+// the static Arena which was set with reset_memory().
+
+void IndexSet::initialize(uint max_elements, Arena *arena) {
+#ifdef ASSERT
+ _serial_number = _serial_count++;
+ check_watch("initialized2", max_elements);
+ _max_elements = max_elements;
+#endif // ASSERT
+ _count = 0;
+ _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
+
+ if (_max_blocks <= preallocated_block_list_size) {
+ _blocks = _preallocated_block_list;
+ } else {
+ _blocks = (IndexSet::BitBlock**) arena->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
+ }
+ for (uint i = 0; i < _max_blocks; i++) {
+ set_block(i, &_empty_block);
+ }
+}
+
+//---------------------------- IndexSet::swap() -----------------------------
+// Exchange two IndexSets.
+
+void IndexSet::swap(IndexSet *set) {
+#ifdef ASSERT
+ assert(_max_elements == set->_max_elements, "must have same universe size to swap");
+ check_watch("swap", set->_serial_number);
+ set->check_watch("swap", _serial_number);
+#endif
+
+ for (uint i = 0; i < _max_blocks; i++) {
+ BitBlock *temp = _blocks[i];
+ set_block(i, set->_blocks[i]);
+ set->set_block(i, temp);
+ }
+ uint temp = _count;
+ _count = set->_count;
+ set->_count = temp;
+}
+
+//---------------------------- IndexSet::dump() -----------------------------
+// Print this set. Used for debugging.
+
+#ifndef PRODUCT
+void IndexSet::dump() const {
+ IndexSetIterator elements(this);
+
+ tty->print("{");
+ uint i;
+ while ((i = elements.next()) != 0) {
+ tty->print("L%d ", i);
+ }
+ tty->print_cr("}");
+}
+#endif
+
+#ifdef ASSERT
+//---------------------------- IndexSet::tally_iteration_statistics() -----------------------------
+// Update block/bit counts to reflect that this set has been iterated over.
+
+void IndexSet::tally_iteration_statistics() const {
+ _total_bits += count();
+
+ for (uint i = 0; i < _max_blocks; i++) {
+ if (_blocks[i] != &_empty_block) {
+ _total_used_blocks++;
+ } else {
+ _total_unused_blocks++;
+ }
+ }
+}
+
+//---------------------------- IndexSet::print_statistics() -----------------------------
+// Print statistics about IndexSet usage.
+
+void IndexSet::print_statistics() {
+ long total_blocks = _total_used_blocks + _total_unused_blocks;
+ tty->print_cr ("Accumulated IndexSet usage statistics:");
+ tty->print_cr ("--------------------------------------");
+ tty->print_cr (" Iteration:");
+ tty->print_cr (" blocks visited: %d", total_blocks);
+ tty->print_cr (" blocks empty: %4.2f%%", 100.0*_total_unused_blocks/total_blocks);
+ tty->print_cr (" bit density (bits/used blocks): %4.2f%%", (double)_total_bits/_total_used_blocks);
+ tty->print_cr (" bit density (bits/all blocks): %4.2f%%", (double)_total_bits/total_blocks);
+ tty->print_cr (" Allocation:");
+ tty->print_cr (" blocks allocated: %d", _alloc_new);
+ tty->print_cr (" blocks used/reused: %d", _alloc_total);
+}
+
+//---------------------------- IndexSet::verify() -----------------------------
+// Expensive test of IndexSet sanity. Ensure that the count agrees with the
+// number of bits in the blocks. Make sure the iterator is seeing all elements
+// of the set. Meant for use during development.
+
+void IndexSet::verify() const {
+ assert(!member(0), "zero cannot be a member");
+ uint count = 0;
+ uint i;
+ for (i = 1; i < _max_elements; i++) {
+ if (member(i)) {
+ count++;
+ assert(count <= _count, "_count is messed up");
+ }
+ }
+
+ IndexSetIterator elements(this);
+ count = 0;
+ while ((i = elements.next()) != 0) {
+ count++;
+ assert(member(i), "returned a non member");
+ assert(count <= _count, "iterator returned wrong number of elements");
+ }
+}
+#endif
+
+//---------------------------- IndexSetIterator() -----------------------------
+// Create an iterator for a set. If empty blocks are detected when iterating
+// over the set, these blocks are replaced.
+
+IndexSetIterator::IndexSetIterator(IndexSet *set) {
+#ifdef ASSERT
+ if (CollectIndexSetStatistics) {
+ set->tally_iteration_statistics();
+ }
+ set->check_watch("traversed", set->count());
+#endif
+ if (set->is_empty()) {
+ _current = 0;
+ _next_word = IndexSet::words_per_block;
+ _next_block = 1;
+ _max_blocks = 1;
+
+ // We don't need the following values when we iterate over an empty set.
+ // The commented out code is left here to document that the omission
+ // is intentional.
+ //
+ //_value = 0;
+ //_words = NULL;
+ //_blocks = NULL;
+ //_set = NULL;
+ } else {
+ _current = 0;
+ _value = 0;
+ _next_block = 0;
+ _next_word = IndexSet::words_per_block;
+
+ _max_blocks = set->_max_blocks;
+ _words = NULL;
+ _blocks = set->_blocks;
+ _set = set;
+ }
+}
+
+//---------------------------- IndexSetIterator(const) -----------------------------
+// Iterate over a constant IndexSet.
+
+IndexSetIterator::IndexSetIterator(const IndexSet *set) {
+#ifdef ASSERT
+ if (CollectIndexSetStatistics) {
+ set->tally_iteration_statistics();
+ }
+ // We don't call check_watch from here to avoid bad recursion.
+ // set->check_watch("traversed const", set->count());
+#endif
+ if (set->is_empty()) {
+ _current = 0;
+ _next_word = IndexSet::words_per_block;
+ _next_block = 1;
+ _max_blocks = 1;
+
+ // We don't need the following values when we iterate over an empty set.
+ // The commented out code is left here to document that the omission
+ // is intentional.
+ //
+ //_value = 0;
+ //_words = NULL;
+ //_blocks = NULL;
+ //_set = NULL;
+ } else {
+ _current = 0;
+ _value = 0;
+ _next_block = 0;
+ _next_word = IndexSet::words_per_block;
+
+ _max_blocks = set->_max_blocks;
+ _words = NULL;
+ _blocks = set->_blocks;
+ _set = NULL;
+ }
+}
+
+//---------------------------- List16Iterator::advance_and_next() -----------------------------
+// Advance to the next non-empty word in the set being iterated over. Return the next element
+// if there is one. If we are done, return 0. This method is called from the next() method
+// when it gets done with a word.
+
+uint IndexSetIterator::advance_and_next() {
+ // See if there is another non-empty word in the current block.
+ for (uint wi = _next_word; wi < (unsigned)IndexSet::words_per_block; wi++) {
+ if (_words[wi] != 0) {
+ // Found a non-empty word.
+ _value = ((_next_block - 1) * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
+ _current = _words[wi];
+
+ _next_word = wi+1;
+
+ return next();
+ }
+ }
+
+ // We ran out of words in the current block. Advance to next non-empty block.
+ for (uint bi = _next_block; bi < _max_blocks; bi++) {
+ if (_blocks[bi] != &IndexSet::_empty_block) {
+ // Found a non-empty block.
+
+ _words = _blocks[bi]->words();
+ for (uint wi = 0; wi < (unsigned)IndexSet::words_per_block; wi++) {
+ if (_words[wi] != 0) {
+ // Found a non-empty word.
+ _value = (bi * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
+ _current = _words[wi];
+
+ _next_block = bi+1;
+ _next_word = wi+1;
+
+ return next();
+ }
+ }
+
+ // All of the words in the block were empty. Replace
+ // the block with the empty block.
+ if (_set) {
+ _set->free_block(bi);
+ }
+ }
+ }
+
+ // These assignments make redundant calls to next on a finished iterator
+ // faster. Probably not necessary.
+ _next_block = _max_blocks;
+ _next_word = IndexSet::words_per_block;
+
+ // No more words.
+ return 0;
+}
diff --git a/src/share/vm/opto/indexSet.hpp b/src/share/vm/opto/indexSet.hpp
new file mode 100644
index 000000000..de7de22aa
--- /dev/null
+++ b/src/share/vm/opto/indexSet.hpp
@@ -0,0 +1,461 @@
+/*
+ * Copyright 1998-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This file defines the IndexSet class, a set of sparse integer indices.
+// This data structure is used by the compiler in its liveness analysis and
+// during register allocation.
+
+//-------------------------------- class IndexSet ----------------------------
+// An IndexSet is a piece-wise bitvector. At the top level, we have an array
+// of pointers to bitvector chunks called BitBlocks. Each BitBlock has a fixed
+// size and is allocated from a shared free list. The bits which are set in
+// each BitBlock correspond to the elements of the set.
+
+class IndexSet : public ResourceObj {
+ friend class IndexSetIterator;
+
+ public:
+ // When we allocate an IndexSet, it starts off with an array of top level block
+ // pointers of a set length. This size is intended to be large enough for the
+ // majority of IndexSets. In the cases when this size is not large enough,
+ // a separately allocated array is used.
+
+ // The length of the preallocated top level block array
+ enum { preallocated_block_list_size = 16 };
+
+ // Elements of a IndexSet get decomposed into three fields. The highest order
+ // bits are the block index, which tell which high level block holds the element.
+ // Within that block, the word index indicates which word holds the element.
+ // Finally, the bit index determines which single bit within that word indicates
+ // membership of the element in the set.
+
+ // The lengths of the index bitfields
+ enum { bit_index_length = 5,
+ word_index_length = 3,
+ block_index_length = 8 // not used
+ };
+
+ // Derived constants used for manipulating the index bitfields
+ enum {
+ bit_index_offset = 0, // not used
+ word_index_offset = bit_index_length,
+ block_index_offset = bit_index_length + word_index_length,
+
+ bits_per_word = 1 << bit_index_length,
+ words_per_block = 1 << word_index_length,
+ bits_per_block = bits_per_word * words_per_block,
+
+ bit_index_mask = right_n_bits(bit_index_length),
+ word_index_mask = right_n_bits(word_index_length)
+ };
+
+ // These routines are used for extracting the block, word, and bit index
+ // from an element.
+ static uint get_block_index(uint element) {
+ return element >> block_index_offset;
+ }
+ static uint get_word_index(uint element) {
+ return mask_bits(element >> word_index_offset,word_index_mask);
+ }
+ static uint get_bit_index(uint element) {
+ return mask_bits(element,bit_index_mask);
+ }
+
+ //------------------------------ class BitBlock ----------------------------
+ // The BitBlock class is a segment of a bitvector set.
+
+ class BitBlock : public ResourceObj {
+ friend class IndexSetIterator;
+ friend class IndexSet;
+
+ private:
+ // All of BitBlocks fields and methods are declared private. We limit
+ // access to IndexSet and IndexSetIterator.
+
+ // A BitBlock is composed of some number of 32 bit words. When a BitBlock
+ // is not in use by any IndexSet, it is stored on a free list. The next field
+ // is used by IndexSet to mainting this free list.
+
+ union {
+ uint32 _words[words_per_block];
+ BitBlock *_next;
+ } _data;
+
+ // accessors
+ uint32 *words() { return _data._words; }
+ void set_next(BitBlock *next) { _data._next = next; }
+ BitBlock *next() { return _data._next; }
+
+ // Operations. A BitBlock supports four simple operations,
+ // clear(), member(), insert(), and remove(). These methods do
+ // not assume that the block index has been masked out.
+
+ void clear() {
+ memset(words(), 0, sizeof(uint32) * words_per_block);
+ }
+
+ bool member(uint element) {
+ uint word_index = IndexSet::get_word_index(element);
+ uint bit_index = IndexSet::get_bit_index(element);
+
+ return ((words()[word_index] & (uint32)(0x1 << bit_index)) != 0);
+ }
+
+ bool insert(uint element) {
+ uint word_index = IndexSet::get_word_index(element);
+ uint bit_index = IndexSet::get_bit_index(element);
+
+ uint32 bit = (0x1 << bit_index);
+ uint32 before = words()[word_index];
+ words()[word_index] = before | bit;
+ return ((before & bit) != 0);
+ }
+
+ bool remove(uint element) {
+ uint word_index = IndexSet::get_word_index(element);
+ uint bit_index = IndexSet::get_bit_index(element);
+
+ uint32 bit = (0x1 << bit_index);
+ uint32 before = words()[word_index];
+ words()[word_index] = before & ~bit;
+ return ((before & bit) != 0);
+ }
+ };
+
+ //-------------------------- BitBlock allocation ---------------------------
+ private:
+
+ // All IndexSets share an arena from which they allocate BitBlocks. Unused
+ // BitBlocks are placed on a free list.
+
+ // The number of BitBlocks to allocate at a time
+ enum { bitblock_alloc_chunk_size = 50 };
+
+ static Arena *arena() { return Compile::current()->indexSet_arena(); }
+
+ static void populate_free_list();
+
+ public:
+
+ // Invalidate the current free BitBlock list and begin allocation
+ // from a new arena. It is essential that this method is called whenever
+ // the Arena being used for BitBlock allocation is reset.
+ static void reset_memory(Compile* compile, Arena *arena) {
+ compile->set_indexSet_free_block_list(NULL);
+ compile->set_indexSet_arena(arena);
+
+ // This should probably be done in a static initializer
+ _empty_block.clear();
+ }
+
+ private:
+ friend class BitBlock;
+ // A distinguished BitBlock which always remains empty. When a new IndexSet is
+ // created, all of its top level BitBlock pointers are initialized to point to
+ // this.
+ static BitBlock _empty_block;
+
+ //-------------------------- Members ------------------------------------------
+
+ // The number of elements in the set
+ uint _count;
+
+ // Our top level array of bitvector segments
+ BitBlock **_blocks;
+
+ BitBlock *_preallocated_block_list[preallocated_block_list_size];
+
+ // The number of top level array entries in use
+ uint _max_blocks;
+
+ // Our assertions need to know the maximum number allowed in the set
+#ifdef ASSERT
+ uint _max_elements;
+#endif
+
+ // The next IndexSet on the free list (not used at same time as count)
+ IndexSet *_next;
+
+ public:
+ //-------------------------- Free list operations ------------------------------
+ // Individual IndexSets can be placed on a free list. This is done in PhaseLive.
+
+ IndexSet *next() {
+#ifdef ASSERT
+ if( VerifyOpto ) {
+ check_watch("removed from free list?", ((_next == NULL) ? 0 : _next->_serial_number));
+ }
+#endif
+ return _next;
+ }
+
+ void set_next(IndexSet *next) {
+#ifdef ASSERT
+ if( VerifyOpto ) {
+ check_watch("put on free list?", ((next == NULL) ? 0 : next->_serial_number));
+ }
+#endif
+ _next = next;
+ }
+
+ private:
+ //-------------------------- Utility methods -----------------------------------
+
+ // Get the block which holds element
+ BitBlock *get_block_containing(uint element) const {
+ assert(element < _max_elements, "element out of bounds");
+ return _blocks[get_block_index(element)];
+ }
+
+ // Set a block in the top level array
+ void set_block(uint index, BitBlock *block) {
+#ifdef ASSERT
+ if( VerifyOpto )
+ check_watch("set block", index);
+#endif
+ _blocks[index] = block;
+ }
+
+ // Get a BitBlock from the free list
+ BitBlock *alloc_block();
+
+ // Get a BitBlock from the free list and place it in the top level array
+ BitBlock *alloc_block_containing(uint element);
+
+ // Free a block from the top level array, placing it on the free BitBlock list
+ void free_block(uint i);
+
+ public:
+ //-------------------------- Primitive set operations --------------------------
+
+ void clear() {
+#ifdef ASSERT
+ if( VerifyOpto )
+ check_watch("clear");
+#endif
+ _count = 0;
+ for (uint i = 0; i < _max_blocks; i++) {
+ BitBlock *block = _blocks[i];
+ if (block != &_empty_block) {
+ free_block(i);
+ }
+ }
+ }
+
+ uint count() const { return _count; }
+
+ bool is_empty() const { return _count == 0; }
+
+ bool member(uint element) const {
+ return get_block_containing(element)->member(element);
+ }
+
+ bool insert(uint element) {
+#ifdef ASSERT
+ if( VerifyOpto )
+ check_watch("insert", element);
+#endif
+ if (element == 0) {
+ return 0;
+ }
+ BitBlock *block = get_block_containing(element);
+ if (block == &_empty_block) {
+ block = alloc_block_containing(element);
+ }
+ bool present = block->insert(element);
+ if (!present) {
+ _count++;
+ }
+ return !present;
+ }
+
+ bool remove(uint element) {
+#ifdef ASSERT
+ if( VerifyOpto )
+ check_watch("remove", element);
+#endif
+
+ BitBlock *block = get_block_containing(element);
+ bool present = block->remove(element);
+ if (present) {
+ _count--;
+ }
+ return present;
+ }
+
+ //-------------------------- Compound set operations ------------------------
+ // Compute the union of all elements of one and two which interfere
+ // with the RegMask mask. If the degree of the union becomes
+ // exceeds fail_degree, the union bails out. The underlying set is
+ // cleared before the union is performed.
+ uint lrg_union(uint lr1, uint lr2,
+ const uint fail_degree,
+ const class PhaseIFG *ifg,
+ const RegMask &mask);
+
+
+ //------------------------- Construction, initialization -----------------------
+
+ IndexSet() {}
+
+ // This constructor is used for making a deep copy of a IndexSet.
+ IndexSet(IndexSet *set);
+
+ // Perform initialization on a IndexSet
+ void initialize(uint max_element);
+
+ // Initialize a IndexSet. If the top level BitBlock array needs to be
+ // allocated, do it from the proffered arena. BitBlocks are still allocated
+ // from the static Arena member.
+ void initialize(uint max_element, Arena *arena);
+
+ // Exchange two sets
+ void swap(IndexSet *set);
+
+ //-------------------------- Debugging and statistics --------------------------
+
+#ifndef PRODUCT
+ // Output a IndexSet for debugging
+ void dump() const;
+#endif
+
+#ifdef ASSERT
+ void tally_iteration_statistics() const;
+
+ // BitBlock allocation statistics
+ static uint _alloc_new;
+ static uint _alloc_total;
+
+ // Block density statistics
+ static long _total_bits;
+ static long _total_used_blocks;
+ static long _total_unused_blocks;
+
+ // Sanity tests
+ void verify() const;
+
+ static int _serial_count;
+ int _serial_number;
+
+ // Check to see if the serial number of the current set is the one we're tracing.
+ // If it is, print a message.
+ void check_watch(const char *operation, uint operand) const {
+ if (IndexSetWatch != 0) {
+ if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
+ tty->print_cr("IndexSet %d : %s ( %d )", _serial_number, operation, operand);
+ }
+ }
+ }
+ void check_watch(const char *operation) const {
+ if (IndexSetWatch != 0) {
+ if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
+ tty->print_cr("IndexSet %d : %s", _serial_number, operation);
+ }
+ }
+ }
+
+ public:
+ static void print_statistics();
+
+#endif
+};
+
+
+//-------------------------------- class IndexSetIterator --------------------
+// An iterator for IndexSets.
+
+class IndexSetIterator VALUE_OBJ_CLASS_SPEC {
+ friend class IndexSet;
+
+ public:
+
+ // We walk over the bits in a word in chunks of size window_size.
+ enum { window_size = 5,
+ window_mask = right_n_bits(window_size),
+ table_size = (1 << window_size) };
+
+ // For an integer of length window_size, what is the first set bit?
+ static const byte _first_bit[table_size];
+
+ // For an integer of length window_size, what is the second set bit?
+ static const byte _second_bit[table_size];
+
+ private:
+ // The current word we are inspecting
+ uint32 _current;
+
+ // What element number are we currently on?
+ uint _value;
+
+ // The index of the next word we will inspect
+ uint _next_word;
+
+ // A pointer to the contents of the current block
+ uint32 *_words;
+
+ // The index of the next block we will inspect
+ uint _next_block;
+
+ // A pointer to the blocks in our set
+ IndexSet::BitBlock **_blocks;
+
+ // The number of blocks in the set
+ uint _max_blocks;
+
+ // If the iterator was created from a non-const set, we replace
+ // non-canonical empty blocks with the _empty_block pointer. If
+ // _set is NULL, we do no replacement.
+ IndexSet *_set;
+
+ // Advance to the next non-empty word and return the next
+ // element in the set.
+ uint advance_and_next();
+
+
+ public:
+
+ // If an iterator is built from a constant set then empty blocks
+ // are not canonicalized.
+ IndexSetIterator(IndexSet *set);
+ IndexSetIterator(const IndexSet *set);
+
+ // Return the next element of the set. Return 0 when done.
+ uint next() {
+ uint current = _current;
+ if (current != 0) {
+ uint value = _value;
+ while (mask_bits(current,window_mask) == 0) {
+ current >>= window_size;
+ value += window_size;
+ }
+
+ uint advance = _second_bit[mask_bits(current,window_mask)];
+ _current = current >> advance;
+ _value = value + advance;
+ return value + _first_bit[mask_bits(current,window_mask)];
+ } else {
+ return advance_and_next();
+ }
+ }
+};
diff --git a/src/share/vm/opto/lcm.cpp b/src/share/vm/opto/lcm.cpp
new file mode 100644
index 000000000..5b0ddae99
--- /dev/null
+++ b/src/share/vm/opto/lcm.cpp
@@ -0,0 +1,934 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_lcm.cpp.incl"
+
+//------------------------------implicit_null_check----------------------------
+// Detect implicit-null-check opportunities. Basically, find NULL checks
+// with suitable memory ops nearby. Use the memory op to do the NULL check.
+// I can generate a memory op if there is not one nearby.
+// The proj is the control projection for the not-null case.
+// The val is the pointer being checked for nullness.
+void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) {
+ // Assume if null check need for 0 offset then always needed
+ // Intel solaris doesn't support any null checks yet and no
+ // mechanism exists (yet) to set the switches at an os_cpu level
+ if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;
+
+ // Make sure the ptr-is-null path appears to be uncommon!
+ float f = end()->as_MachIf()->_prob;
+ if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
+ if( f > PROB_UNLIKELY_MAG(4) ) return;
+
+ uint bidx = 0; // Capture index of value into memop
+ bool was_store; // Memory op is a store op
+
+ // Get the successor block for if the test ptr is non-null
+ Block* not_null_block; // this one goes with the proj
+ Block* null_block;
+ if (_nodes[_nodes.size()-1] == proj) {
+ null_block = _succs[0];
+ not_null_block = _succs[1];
+ } else {
+ assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other");
+ not_null_block = _succs[0];
+ null_block = _succs[1];
+ }
+
+ // Search the exception block for an uncommon trap.
+ // (See Parse::do_if and Parse::do_ifnull for the reason
+ // we need an uncommon trap. Briefly, we need a way to
+ // detect failure of this optimization, as in 6366351.)
+ {
+ bool found_trap = false;
+ for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) {
+ Node* nn = null_block->_nodes[i1];
+ if (nn->is_MachCall() &&
+ nn->as_MachCall()->entry_point() ==
+ SharedRuntime::uncommon_trap_blob()->instructions_begin()) {
+ const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();
+ if (trtype->isa_int() && trtype->is_int()->is_con()) {
+ jint tr_con = trtype->is_int()->get_con();
+ Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
+ Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
+ assert((int)reason < (int)BitsPerInt, "recode bit map");
+ if (is_set_nth_bit(allowed_reasons, (int) reason)
+ && action != Deoptimization::Action_none) {
+ // This uncommon trap is sure to recompile, eventually.
+ // When that happens, C->too_many_traps will prevent
+ // this transformation from happening again.
+ found_trap = true;
+ }
+ }
+ break;
+ }
+ }
+ if (!found_trap) {
+ // We did not find an uncommon trap.
+ return;
+ }
+ }
+
+ // Search the successor block for a load or store who's base value is also
+ // the tested value. There may be several.
+ Node_List *out = new Node_List(Thread::current()->resource_area());
+ MachNode *best = NULL; // Best found so far
+ for (DUIterator i = val->outs(); val->has_out(i); i++) {
+ Node *m = val->out(i);
+ if( !m->is_Mach() ) continue;
+ MachNode *mach = m->as_Mach();
+ was_store = false;
+ switch( mach->ideal_Opcode() ) {
+ case Op_LoadB:
+ case Op_LoadC:
+ case Op_LoadD:
+ case Op_LoadF:
+ case Op_LoadI:
+ case Op_LoadL:
+ case Op_LoadP:
+ case Op_LoadS:
+ case Op_LoadKlass:
+ case Op_LoadRange:
+ case Op_LoadD_unaligned:
+ case Op_LoadL_unaligned:
+ break;
+ case Op_StoreB:
+ case Op_StoreC:
+ case Op_StoreCM:
+ case Op_StoreD:
+ case Op_StoreF:
+ case Op_StoreI:
+ case Op_StoreL:
+ case Op_StoreP:
+ was_store = true; // Memory op is a store op
+ // Stores will have their address in slot 2 (memory in slot 1).
+ // If the value being nul-checked is in another slot, it means we
+ // are storing the checked value, which does NOT check the value!
+ if( mach->in(2) != val ) continue;
+ break; // Found a memory op?
+ case Op_StrComp:
+ // Not a legit memory op for implicit null check regardless of
+ // embedded loads
+ continue;
+ default: // Also check for embedded loads
+ if( !mach->needs_anti_dependence_check() )
+ continue; // Not an memory op; skip it
+ break;
+ }
+ // check if the offset is not too high for implicit exception
+ {
+ intptr_t offset = 0;
+ const TypePtr *adr_type = NULL; // Do not need this return value here
+ const Node* base = mach->get_base_and_disp(offset, adr_type);
+ if (base == NULL || base == NodeSentinel) {
+ // cannot reason about it; is probably not implicit null exception
+ } else {
+ const TypePtr* tptr = base->bottom_type()->is_ptr();
+ // Give up if offset is not a compile-time constant
+ if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
+ continue;
+ offset += tptr->_offset; // correct if base is offseted
+ if( MacroAssembler::needs_explicit_null_check(offset) )
+ continue; // Give up is reference is beyond 4K page size
+ }
+ }
+
+ // Check ctrl input to see if the null-check dominates the memory op
+ Block *cb = cfg->_bbs[mach->_idx];
+ cb = cb->_idom; // Always hoist at least 1 block
+ if( !was_store ) { // Stores can be hoisted only one block
+ while( cb->_dom_depth > (_dom_depth + 1))
+ cb = cb->_idom; // Hoist loads as far as we want
+ // The non-null-block should dominate the memory op, too. Live
+ // range spilling will insert a spill in the non-null-block if it is
+ // needs to spill the memory op for an implicit null check.
+ if (cb->_dom_depth == (_dom_depth + 1)) {
+ if (cb != not_null_block) continue;
+ cb = cb->_idom;
+ }
+ }
+ if( cb != this ) continue;
+
+ // Found a memory user; see if it can be hoisted to check-block
+ uint vidx = 0; // Capture index of value into memop
+ uint j;
+ for( j = mach->req()-1; j > 0; j-- ) {
+ if( mach->in(j) == val ) vidx = j;
+ // Block of memory-op input
+ Block *inb = cfg->_bbs[mach->in(j)->_idx];
+ Block *b = this; // Start from nul check
+ while( b != inb && b->_dom_depth > inb->_dom_depth )
+ b = b->_idom; // search upwards for input
+ // See if input dominates null check
+ if( b != inb )
+ break;
+ }
+ if( j > 0 )
+ continue;
+ Block *mb = cfg->_bbs[mach->_idx];
+ // Hoisting stores requires more checks for the anti-dependence case.
+ // Give up hoisting if we have to move the store past any load.
+ if( was_store ) {
+ Block *b = mb; // Start searching here for a local load
+ // mach use (faulting) trying to hoist
+ // n might be blocker to hoisting
+ while( b != this ) {
+ uint k;
+ for( k = 1; k < b->_nodes.size(); k++ ) {
+ Node *n = b->_nodes[k];
+ if( n->needs_anti_dependence_check() &&
+ n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
+ break; // Found anti-dependent load
+ }
+ if( k < b->_nodes.size() )
+ break; // Found anti-dependent load
+ // Make sure control does not do a merge (would have to check allpaths)
+ if( b->num_preds() != 2 ) break;
+ b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block
+ }
+ if( b != this ) continue;
+ }
+
+ // Make sure this memory op is not already being used for a NullCheck
+ Node *e = mb->end();
+ if( e->is_MachNullCheck() && e->in(1) == mach )
+ continue; // Already being used as a NULL check
+
+ // Found a candidate! Pick one with least dom depth - the highest
+ // in the dom tree should be closest to the null check.
+ if( !best ||
+ cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) {
+ best = mach;
+ bidx = vidx;
+
+ }
+ }
+ // No candidate!
+ if( !best ) return;
+
+ // ---- Found an implicit null check
+ extern int implicit_null_checks;
+ implicit_null_checks++;
+
+ // Hoist the memory candidate up to the end of the test block.
+ Block *old_block = cfg->_bbs[best->_idx];
+ old_block->find_remove(best);
+ add_inst(best);
+ cfg->_bbs.map(best->_idx,this);
+
+ // Move the control dependence
+ if (best->in(0) && best->in(0) == old_block->_nodes[0])
+ best->set_req(0, _nodes[0]);
+
+ // Check for flag-killing projections that also need to be hoisted
+ // Should be DU safe because no edge updates.
+ for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
+ Node* n = best->fast_out(j);
+ if( n->Opcode() == Op_MachProj ) {
+ cfg->_bbs[n->_idx]->find_remove(n);
+ add_inst(n);
+ cfg->_bbs.map(n->_idx,this);
+ }
+ }
+
+ Compile *C = cfg->C;
+ // proj==Op_True --> ne test; proj==Op_False --> eq test.
+ // One of two graph shapes got matched:
+ // (IfTrue (If (Bool NE (CmpP ptr NULL))))
+ // (IfFalse (If (Bool EQ (CmpP ptr NULL))))
+ // NULL checks are always branch-if-eq. If we see a IfTrue projection
+ // then we are replacing a 'ne' test with a 'eq' NULL check test.
+ // We need to flip the projections to keep the same semantics.
+ if( proj->Opcode() == Op_IfTrue ) {
+ // Swap order of projections in basic block to swap branch targets
+ Node *tmp1 = _nodes[end_idx()+1];
+ Node *tmp2 = _nodes[end_idx()+2];
+ _nodes.map(end_idx()+1, tmp2);
+ _nodes.map(end_idx()+2, tmp1);
+ Node *tmp = new (C, 1) Node(C->top()); // Use not NULL input
+ tmp1->replace_by(tmp);
+ tmp2->replace_by(tmp1);
+ tmp->replace_by(tmp2);
+ tmp->destruct();
+ }
+
+ // Remove the existing null check; use a new implicit null check instead.
+ // Since schedule-local needs precise def-use info, we need to correct
+ // it as well.
+ Node *old_tst = proj->in(0);
+ MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);
+ _nodes.map(end_idx(),nul_chk);
+ cfg->_bbs.map(nul_chk->_idx,this);
+ // Redirect users of old_test to nul_chk
+ for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
+ old_tst->last_out(i2)->set_req(0, nul_chk);
+ // Clean-up any dead code
+ for (uint i3 = 0; i3 < old_tst->req(); i3++)
+ old_tst->set_req(i3, NULL);
+
+ cfg->latency_from_uses(nul_chk);
+ cfg->latency_from_uses(best);
+}
+
+
+//------------------------------select-----------------------------------------
+// Select a nice fellow from the worklist to schedule next. If there is only
+// one choice, then use it. Projections take top priority for correctness
+// reasons - if I see a projection, then it is next. There are a number of
+// other special cases, for instructions that consume condition codes, et al.
+// These are chosen immediately. Some instructions are required to immediately
+// precede the last instruction in the block, and these are taken last. Of the
+// remaining cases (most), choose the instruction with the greatest latency
+// (that is, the most number of pseudo-cycles required to the end of the
+// routine). If there is a tie, choose the instruction with the most inputs.
+Node *Block::select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot) {
+
+ // If only a single entry on the stack, use it
+ uint cnt = worklist.size();
+ if (cnt == 1) {
+ Node *n = worklist[0];
+ worklist.map(0,worklist.pop());
+ return n;
+ }
+
+ uint choice = 0; // Bigger is most important
+ uint latency = 0; // Bigger is scheduled first
+ uint score = 0; // Bigger is better
+ uint idx; // Index in worklist
+
+ for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
+ // Order in worklist is used to break ties.
+ // See caller for how this is used to delay scheduling
+ // of induction variable increments to after the other
+ // uses of the phi are scheduled.
+ Node *n = worklist[i]; // Get Node on worklist
+
+ int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0;
+ if( n->is_Proj() || // Projections always win
+ n->Opcode()== Op_Con || // So does constant 'Top'
+ iop == Op_CreateEx || // Create-exception must start block
+ iop == Op_CheckCastPP
+ ) {
+ worklist.map(i,worklist.pop());
+ return n;
+ }
+
+ // Final call in a block must be adjacent to 'catch'
+ Node *e = end();
+ if( e->is_Catch() && e->in(0)->in(0) == n )
+ continue;
+
+ // Memory op for an implicit null check has to be at the end of the block
+ if( e->is_MachNullCheck() && e->in(1) == n )
+ continue;
+
+ uint n_choice = 2;
+
+ // See if this instruction is consumed by a branch. If so, then (as the
+ // branch is the last instruction in the basic block) force it to the
+ // end of the basic block
+ if ( must_clone[iop] ) {
+ // See if any use is a branch
+ bool found_machif = false;
+
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* use = n->fast_out(j);
+
+ // The use is a conditional branch, make them adjacent
+ if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) {
+ found_machif = true;
+ break;
+ }
+
+ // More than this instruction pending for successor to be ready,
+ // don't choose this if other opportunities are ready
+ if (ready_cnt[use->_idx] > 1)
+ n_choice = 1;
+ }
+
+ // loop terminated, prefer not to use this instruction
+ if (found_machif)
+ continue;
+ }
+
+ // See if this has a predecessor that is "must_clone", i.e. sets the
+ // condition code. If so, choose this first
+ for (uint j = 0; j < n->req() ; j++) {
+ Node *inn = n->in(j);
+ if (inn) {
+ if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) {
+ n_choice = 3;
+ break;
+ }
+ }
+ }
+
+ // MachTemps should be scheduled last so they are near their uses
+ if (n->is_MachTemp()) {
+ n_choice = 1;
+ }
+
+ uint n_latency = cfg->_node_latency.at_grow(n->_idx);
+ uint n_score = n->req(); // Many inputs get high score to break ties
+
+ // Keep best latency found
+ if( choice < n_choice ||
+ ( choice == n_choice &&
+ ( latency < n_latency ||
+ ( latency == n_latency &&
+ ( score < n_score ))))) {
+ choice = n_choice;
+ latency = n_latency;
+ score = n_score;
+ idx = i; // Also keep index in worklist
+ }
+ } // End of for all ready nodes in worklist
+
+ Node *n = worklist[idx]; // Get the winner
+
+ worklist.map(idx,worklist.pop()); // Compress worklist
+ return n;
+}
+
+
+//------------------------------set_next_call----------------------------------
+void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) {
+ if( next_call.test_set(n->_idx) ) return;
+ for( uint i=0; i<n->len(); i++ ) {
+ Node *m = n->in(i);
+ if( !m ) continue; // must see all nodes in block that precede call
+ if( bbs[m->_idx] == this )
+ set_next_call( m, next_call, bbs );
+ }
+}
+
+//------------------------------needed_for_next_call---------------------------
+// Set the flag 'next_call' for each Node that is needed for the next call to
+// be scheduled. This flag lets me bias scheduling so Nodes needed for the
+// next subroutine call get priority - basically it moves things NOT needed
+// for the next call till after the call. This prevents me from trying to
+// carry lots of stuff live across a call.
+void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) {
+ // Find the next control-defining Node in this block
+ Node* call = NULL;
+ for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {
+ Node* m = this_call->fast_out(i);
+ if( bbs[m->_idx] == this && // Local-block user
+ m != this_call && // Not self-start node
+ m->is_Call() )
+ call = m;
+ break;
+ }
+ if (call == NULL) return; // No next call (e.g., block end is near)
+ // Set next-call for all inputs to this call
+ set_next_call(call, next_call, bbs);
+}
+
+//------------------------------sched_call-------------------------------------
+uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
+ RegMask regs;
+
+ // Schedule all the users of the call right now. All the users are
+ // projection Nodes, so they must be scheduled next to the call.
+ // Collect all the defined registers.
+ for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
+ Node* n = mcall->fast_out(i);
+ assert( n->Opcode()==Op_MachProj, "" );
+ --ready_cnt[n->_idx];
+ assert( !ready_cnt[n->_idx], "" );
+ // Schedule next to call
+ _nodes.map(node_cnt++, n);
+ // Collect defined registers
+ regs.OR(n->out_RegMask());
+ // Check for scheduling the next control-definer
+ if( n->bottom_type() == Type::CONTROL )
+ // Warm up next pile of heuristic bits
+ needed_for_next_call(n, next_call, bbs);
+
+ // Children of projections are now all ready
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* m = n->fast_out(j); // Get user
+ if( bbs[m->_idx] != this ) continue;
+ if( m->is_Phi() ) continue;
+ if( !--ready_cnt[m->_idx] )
+ worklist.push(m);
+ }
+
+ }
+
+ // Act as if the call defines the Frame Pointer.
+ // Certainly the FP is alive and well after the call.
+ regs.Insert(matcher.c_frame_pointer());
+
+ // Set all registers killed and not already defined by the call.
+ uint r_cnt = mcall->tf()->range()->cnt();
+ int op = mcall->ideal_Opcode();
+ MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
+ bbs.map(proj->_idx,this);
+ _nodes.insert(node_cnt++, proj);
+
+ // Select the right register save policy.
+ const char * save_policy;
+ switch (op) {
+ case Op_CallRuntime:
+ case Op_CallLeaf:
+ case Op_CallLeafNoFP:
+ // Calling C code so use C calling convention
+ save_policy = matcher._c_reg_save_policy;
+ break;
+
+ case Op_CallStaticJava:
+ case Op_CallDynamicJava:
+ // Calling Java code so use Java calling convention
+ save_policy = matcher._register_save_policy;
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ // When using CallRuntime mark SOE registers as killed by the call
+ // so values that could show up in the RegisterMap aren't live in a
+ // callee saved register since the register wouldn't know where to
+ // find them. CallLeaf and CallLeafNoFP are ok because they can't
+ // have debug info on them. Strictly speaking this only needs to be
+ // done for oops since idealreg2debugmask takes care of debug info
+ // references but there no way to handle oops differently than other
+ // pointers as far as the kill mask goes.
+ bool exclude_soe = op == Op_CallRuntime;
+
+ // Fill in the kill mask for the call
+ for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
+ if( !regs.Member(r) ) { // Not already defined by the call
+ // Save-on-call register?
+ if ((save_policy[r] == 'C') ||
+ (save_policy[r] == 'A') ||
+ ((save_policy[r] == 'E') && exclude_soe)) {
+ proj->_rout.Insert(r);
+ }
+ }
+ }
+
+ return node_cnt;
+}
+
+
+//------------------------------schedule_local---------------------------------
+// Topological sort within a block. Someday become a real scheduler.
+bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) {
+ // Already "sorted" are the block start Node (as the first entry), and
+ // the block-ending Node and any trailing control projections. We leave
+ // these alone. PhiNodes and ParmNodes are made to follow the block start
+ // Node. Everything else gets topo-sorted.
+
+#ifndef PRODUCT
+ if (cfg->trace_opto_pipelining()) {
+ tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order);
+ for (uint i = 0;i < _nodes.size();i++) {
+ tty->print("# ");
+ _nodes[i]->fast_dump();
+ }
+ tty->print_cr("#");
+ }
+#endif
+
+ // RootNode is already sorted
+ if( _nodes.size() == 1 ) return true;
+
+ // Move PhiNodes and ParmNodes from 1 to cnt up to the start
+ uint node_cnt = end_idx();
+ uint phi_cnt = 1;
+ uint i;
+ for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
+ Node *n = _nodes[i];
+ if( n->is_Phi() || // Found a PhiNode or ParmNode
+ (n->is_Proj() && n->in(0) == head()) ) {
+ // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
+ _nodes.map(i,_nodes[phi_cnt]);
+ _nodes.map(phi_cnt++,n); // swap Phi/Parm up front
+ } else { // All others
+ // Count block-local inputs to 'n'
+ uint cnt = n->len(); // Input count
+ uint local = 0;
+ for( uint j=0; j<cnt; j++ ) {
+ Node *m = n->in(j);
+ if( m && cfg->_bbs[m->_idx] == this && !m->is_top() )
+ local++; // One more block-local input
+ }
+ ready_cnt[n->_idx] = local; // Count em up
+
+ // A few node types require changing a required edge to a precedence edge
+ // before allocation.
+ if( UseConcMarkSweepGC ) {
+ if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
+ // Note: Required edges with an index greater than oper_input_base
+ // are not supported by the allocator.
+ // Note2: Can only depend on unmatched edge being last,
+ // can not depend on its absolute position.
+ Node *oop_store = n->in(n->req() - 1);
+ n->del_req(n->req() - 1);
+ n->add_prec(oop_store);
+ assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
+ }
+ }
+ if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ) {
+ Node *x = n->in(TypeFunc::Parms);
+ n->del_req(TypeFunc::Parms);
+ n->add_prec(x);
+ }
+ }
+ }
+ for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count
+ ready_cnt[_nodes[i2]->_idx] = 0;
+
+ // All the prescheduled guys do not hold back internal nodes
+ uint i3;
+ for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled
+ Node *n = _nodes[i3]; // Get pre-scheduled
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* m = n->fast_out(j);
+ if( cfg->_bbs[m->_idx] ==this ) // Local-block user
+ ready_cnt[m->_idx]--; // Fix ready count
+ }
+ }
+
+ Node_List delay;
+ // Make a worklist
+ Node_List worklist;
+ for(uint i4=i3; i4<node_cnt; i4++ ) { // Put ready guys on worklist
+ Node *m = _nodes[i4];
+ if( !ready_cnt[m->_idx] ) { // Zero ready count?
+ if (m->is_iteratively_computed()) {
+ // Push induction variable increments last to allow other uses
+ // of the phi to be scheduled first. The select() method breaks
+ // ties in scheduling by worklist order.
+ delay.push(m);
+ } else {
+ worklist.push(m); // Then on to worklist!
+ }
+ }
+ }
+ while (delay.size()) {
+ Node* d = delay.pop();
+ worklist.push(d);
+ }
+
+ // Warm up the 'next_call' heuristic bits
+ needed_for_next_call(_nodes[0], next_call, cfg->_bbs);
+
+#ifndef PRODUCT
+ if (cfg->trace_opto_pipelining()) {
+ for (uint j=0; j<_nodes.size(); j++) {
+ Node *n = _nodes[j];
+ int idx = n->_idx;
+ tty->print("# ready cnt:%3d ", ready_cnt[idx]);
+ tty->print("latency:%3d ", cfg->_node_latency.at_grow(idx));
+ tty->print("%4d: %s\n", idx, n->Name());
+ }
+ }
+#endif
+
+ // Pull from worklist and schedule
+ while( worklist.size() ) { // Worklist is not ready
+
+#ifndef PRODUCT
+ if (cfg->trace_opto_pipelining()) {
+ tty->print("# ready list:");
+ for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
+ Node *n = worklist[i]; // Get Node on worklist
+ tty->print(" %d", n->_idx);
+ }
+ tty->cr();
+ }
+#endif
+
+ // Select and pop a ready guy from worklist
+ Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt);
+ _nodes.map(phi_cnt++,n); // Schedule him next
+
+#ifndef PRODUCT
+ if (cfg->trace_opto_pipelining()) {
+ tty->print("# select %d: %s", n->_idx, n->Name());
+ tty->print(", latency:%d", cfg->_node_latency.at_grow(n->_idx));
+ n->dump();
+ if (Verbose) {
+ tty->print("# ready list:");
+ for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
+ Node *n = worklist[i]; // Get Node on worklist
+ tty->print(" %d", n->_idx);
+ }
+ tty->cr();
+ }
+ }
+
+#endif
+ if( n->is_MachCall() ) {
+ MachCallNode *mcall = n->as_MachCall();
+ phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call);
+ continue;
+ }
+ // Children are now all ready
+ for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
+ Node* m = n->fast_out(i5); // Get user
+ if( cfg->_bbs[m->_idx] != this ) continue;
+ if( m->is_Phi() ) continue;
+ if( !--ready_cnt[m->_idx] )
+ worklist.push(m);
+ }
+ }
+
+ if( phi_cnt != end_idx() ) {
+ // did not schedule all. Retry, Bailout, or Die
+ Compile* C = matcher.C;
+ if (C->subsume_loads() == true && !C->failing()) {
+ // Retry with subsume_loads == false
+ // If this is the first failure, the sentinel string will "stick"
+ // to the Compile object, and the C2Compiler will see it and retry.
+ C->record_failure(C2Compiler::retry_no_subsuming_loads());
+ }
+ // assert( phi_cnt == end_idx(), "did not schedule all" );
+ return false;
+ }
+
+#ifndef PRODUCT
+ if (cfg->trace_opto_pipelining()) {
+ tty->print_cr("#");
+ tty->print_cr("# after schedule_local");
+ for (uint i = 0;i < _nodes.size();i++) {
+ tty->print("# ");
+ _nodes[i]->fast_dump();
+ }
+ tty->cr();
+ }
+#endif
+
+
+ return true;
+}
+
+//--------------------------catch_cleanup_fix_all_inputs-----------------------
+static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
+ for (uint l = 0; l < use->len(); l++) {
+ if (use->in(l) == old_def) {
+ if (l < use->req()) {
+ use->set_req(l, new_def);
+ } else {
+ use->rm_prec(l);
+ use->add_prec(new_def);
+ l--;
+ }
+ }
+ }
+}
+
+//------------------------------catch_cleanup_find_cloned_def------------------
+static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) {
+ assert( use_blk != def_blk, "Inter-block cleanup only");
+
+ // The use is some block below the Catch. Find and return the clone of the def
+ // that dominates the use. If there is no clone in a dominating block, then
+ // create a phi for the def in a dominating block.
+
+ // Find which successor block dominates this use. The successor
+ // blocks must all be single-entry (from the Catch only; I will have
+ // split blocks to make this so), hence they all dominate.
+ while( use_blk->_dom_depth > def_blk->_dom_depth+1 )
+ use_blk = use_blk->_idom;
+
+ // Find the successor
+ Node *fixup = NULL;
+
+ uint j;
+ for( j = 0; j < def_blk->_num_succs; j++ )
+ if( use_blk == def_blk->_succs[j] )
+ break;
+
+ if( j == def_blk->_num_succs ) {
+ // Block at same level in dom-tree is not a successor. It needs a
+ // PhiNode, the PhiNode uses from the def and IT's uses need fixup.
+ Node_Array inputs = new Node_List(Thread::current()->resource_area());
+ for(uint k = 1; k < use_blk->num_preds(); k++) {
+ inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx));
+ }
+
+ // Check to see if the use_blk already has an identical phi inserted.
+ // If it exists, it will be at the first position since all uses of a
+ // def are processed together.
+ Node *phi = use_blk->_nodes[1];
+ if( phi->is_Phi() ) {
+ fixup = phi;
+ for (uint k = 1; k < use_blk->num_preds(); k++) {
+ if (phi->in(k) != inputs[k]) {
+ // Not a match
+ fixup = NULL;
+ break;
+ }
+ }
+ }
+
+ // If an existing PhiNode was not found, make a new one.
+ if (fixup == NULL) {
+ Node *new_phi = PhiNode::make(use_blk->head(), def);
+ use_blk->_nodes.insert(1, new_phi);
+ bbs.map(new_phi->_idx, use_blk);
+ for (uint k = 1; k < use_blk->num_preds(); k++) {
+ new_phi->set_req(k, inputs[k]);
+ }
+ fixup = new_phi;
+ }
+
+ } else {
+ // Found the use just below the Catch. Make it use the clone.
+ fixup = use_blk->_nodes[n_clone_idx];
+ }
+
+ return fixup;
+}
+
+//--------------------------catch_cleanup_intra_block--------------------------
+// Fix all input edges in use that reference "def". The use is in the same
+// block as the def and both have been cloned in each successor block.
+static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) {
+
+ // Both the use and def have been cloned. For each successor block,
+ // get the clone of the use, and make its input the clone of the def
+ // found in that block.
+
+ uint use_idx = blk->find_node(use);
+ uint offset_idx = use_idx - beg;
+ for( uint k = 0; k < blk->_num_succs; k++ ) {
+ // Get clone in each successor block
+ Block *sb = blk->_succs[k];
+ Node *clone = sb->_nodes[offset_idx+1];
+ assert( clone->Opcode() == use->Opcode(), "" );
+
+ // Make use-clone reference the def-clone
+ catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]);
+ }
+}
+
+//------------------------------catch_cleanup_inter_block---------------------
+// Fix all input edges in use that reference "def". The use is in a different
+// block than the def.
+static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) {
+ if( !use_blk ) return; // Can happen if the use is a precedence edge
+
+ Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx);
+ catch_cleanup_fix_all_inputs(use, def, new_def);
+}
+
+//------------------------------call_catch_cleanup-----------------------------
+// If we inserted any instructions between a Call and his CatchNode,
+// clone the instructions on all paths below the Catch.
+void Block::call_catch_cleanup(Block_Array &bbs) {
+
+ // End of region to clone
+ uint end = end_idx();
+ if( !_nodes[end]->is_Catch() ) return;
+ // Start of region to clone
+ uint beg = end;
+ while( _nodes[beg-1]->Opcode() != Op_MachProj ||
+ !_nodes[beg-1]->in(0)->is_Call() ) {
+ beg--;
+ assert(beg > 0,"Catch cleanup walking beyond block boundary");
+ }
+ // Range of inserted instructions is [beg, end)
+ if( beg == end ) return;
+
+ // Clone along all Catch output paths. Clone area between the 'beg' and
+ // 'end' indices.
+ for( uint i = 0; i < _num_succs; i++ ) {
+ Block *sb = _succs[i];
+ // Clone the entire area; ignoring the edge fixup for now.
+ for( uint j = end; j > beg; j-- ) {
+ Node *clone = _nodes[j-1]->clone();
+ sb->_nodes.insert( 1, clone );
+ bbs.map(clone->_idx,sb);
+ }
+ }
+
+
+ // Fixup edges. Check the def-use info per cloned Node
+ for(uint i2 = beg; i2 < end; i2++ ) {
+ uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
+ Node *n = _nodes[i2]; // Node that got cloned
+ // Need DU safe iterator because of edge manipulation in calls.
+ Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());
+ for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
+ out->push(n->fast_out(j1));
+ }
+ uint max = out->size();
+ for (uint j = 0; j < max; j++) {// For all users
+ Node *use = out->pop();
+ Block *buse = bbs[use->_idx];
+ if( use->is_Phi() ) {
+ for( uint k = 1; k < use->req(); k++ )
+ if( use->in(k) == n ) {
+ Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx);
+ use->set_req(k, fixup);
+ }
+ } else {
+ if (this == buse) {
+ catch_cleanup_intra_block(use, n, this, beg, n_clone_idx);
+ } else {
+ catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx);
+ }
+ }
+ } // End for all users
+
+ } // End of for all Nodes in cloned area
+
+ // Remove the now-dead cloned ops
+ for(uint i3 = beg; i3 < end; i3++ ) {
+ _nodes[beg]->disconnect_inputs(NULL);
+ _nodes.remove(beg);
+ }
+
+ // If the successor blocks have a CreateEx node, move it back to the top
+ for(uint i4 = 0; i4 < _num_succs; i4++ ) {
+ Block *sb = _succs[i4];
+ uint new_cnt = end - beg;
+ // Remove any newly created, but dead, nodes.
+ for( uint j = new_cnt; j > 0; j-- ) {
+ Node *n = sb->_nodes[j];
+ if (n->outcnt() == 0 &&
+ (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
+ n->disconnect_inputs(NULL);
+ sb->_nodes.remove(j);
+ new_cnt--;
+ }
+ }
+ // If any newly created nodes remain, move the CreateEx node to the top
+ if (new_cnt > 0) {
+ Node *cex = sb->_nodes[1+new_cnt];
+ if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+ sb->_nodes.remove(1+new_cnt);
+ sb->_nodes.insert(1,cex);
+ }
+ }
+ }
+}
diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp
new file mode 100644
index 000000000..d78f62d40
--- /dev/null
+++ b/src/share/vm/opto/library_call.cpp
@@ -0,0 +1,4921 @@
+/*
+ * Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_library_call.cpp.incl"
+
+class LibraryIntrinsic : public InlineCallGenerator {
+ // Extend the set of intrinsics known to the runtime:
+ public:
+ private:
+ bool _is_virtual;
+ vmIntrinsics::ID _intrinsic_id;
+
+ public:
+ LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id)
+ : InlineCallGenerator(m),
+ _is_virtual(is_virtual),
+ _intrinsic_id(id)
+ {
+ }
+ virtual bool is_intrinsic() const { return true; }
+ virtual bool is_virtual() const { return _is_virtual; }
+ virtual JVMState* generate(JVMState* jvms);
+ vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
+};
+
+
+// Local helper class for LibraryIntrinsic:
+class LibraryCallKit : public GraphKit {
+ private:
+ LibraryIntrinsic* _intrinsic; // the library intrinsic being called
+
+ public:
+ LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic)
+ : GraphKit(caller),
+ _intrinsic(intrinsic)
+ {
+ }
+
+ ciMethod* caller() const { return jvms()->method(); }
+ int bci() const { return jvms()->bci(); }
+ LibraryIntrinsic* intrinsic() const { return _intrinsic; }
+ vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); }
+ ciMethod* callee() const { return _intrinsic->method(); }
+ ciSignature* signature() const { return callee()->signature(); }
+ int arg_size() const { return callee()->arg_size(); }
+
+ bool try_to_inline();
+
+ // Helper functions to inline natives
+ void push_result(RegionNode* region, PhiNode* value);
+ Node* generate_guard(Node* test, RegionNode* region, float true_prob);
+ Node* generate_slow_guard(Node* test, RegionNode* region);
+ Node* generate_fair_guard(Node* test, RegionNode* region);
+ Node* generate_negative_guard(Node* index, RegionNode* region,
+ // resulting CastII of index:
+ Node* *pos_index = NULL);
+ Node* generate_nonpositive_guard(Node* index, bool never_negative,
+ // resulting CastII of index:
+ Node* *pos_index = NULL);
+ Node* generate_limit_guard(Node* offset, Node* subseq_length,
+ Node* array_length,
+ RegionNode* region);
+ Node* generate_current_thread(Node* &tls_output);
+ address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
+ bool disjoint_bases, const char* &name);
+ Node* load_mirror_from_klass(Node* klass);
+ Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
+ int nargs,
+ RegionNode* region, int null_path,
+ int offset);
+ Node* load_klass_from_mirror(Node* mirror, bool never_see_null, int nargs,
+ RegionNode* region, int null_path) {
+ int offset = java_lang_Class::klass_offset_in_bytes();
+ return load_klass_from_mirror_common(mirror, never_see_null, nargs,
+ region, null_path,
+ offset);
+ }
+ Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
+ int nargs,
+ RegionNode* region, int null_path) {
+ int offset = java_lang_Class::array_klass_offset_in_bytes();
+ return load_klass_from_mirror_common(mirror, never_see_null, nargs,
+ region, null_path,
+ offset);
+ }
+ Node* generate_access_flags_guard(Node* kls,
+ int modifier_mask, int modifier_bits,
+ RegionNode* region);
+ Node* generate_interface_guard(Node* kls, RegionNode* region);
+ Node* generate_array_guard(Node* kls, RegionNode* region) {
+ return generate_array_guard_common(kls, region, false, false);
+ }
+ Node* generate_non_array_guard(Node* kls, RegionNode* region) {
+ return generate_array_guard_common(kls, region, false, true);
+ }
+ Node* generate_objArray_guard(Node* kls, RegionNode* region) {
+ return generate_array_guard_common(kls, region, true, false);
+ }
+ Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
+ return generate_array_guard_common(kls, region, true, true);
+ }
+ Node* generate_array_guard_common(Node* kls, RegionNode* region,
+ bool obj_array, bool not_array);
+ Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
+ CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
+ bool is_virtual = false, bool is_static = false);
+ CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
+ return generate_method_call(method_id, false, true);
+ }
+ CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
+ return generate_method_call(method_id, true, false);
+ }
+
+ bool inline_string_compareTo();
+ bool inline_string_indexOf();
+ Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
+ Node* pop_math_arg();
+ bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
+ bool inline_math_native(vmIntrinsics::ID id);
+ bool inline_trig(vmIntrinsics::ID id);
+ bool inline_trans(vmIntrinsics::ID id);
+ bool inline_abs(vmIntrinsics::ID id);
+ bool inline_sqrt(vmIntrinsics::ID id);
+ bool inline_pow(vmIntrinsics::ID id);
+ bool inline_exp(vmIntrinsics::ID id);
+ bool inline_min_max(vmIntrinsics::ID id);
+ Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
+ // This returns Type::AnyPtr, RawPtr, or OopPtr.
+ int classify_unsafe_addr(Node* &base, Node* &offset);
+ Node* make_unsafe_address(Node* base, Node* offset);
+ bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
+ bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
+ bool inline_unsafe_allocate();
+ bool inline_unsafe_copyMemory();
+ bool inline_native_currentThread();
+ bool inline_native_time_funcs(bool isNano);
+ bool inline_native_isInterrupted();
+ bool inline_native_Class_query(vmIntrinsics::ID id);
+ bool inline_native_subtype_check();
+
+ bool inline_native_newArray();
+ bool inline_native_getLength();
+ bool inline_array_copyOf(bool is_copyOfRange);
+ bool inline_native_clone(bool is_virtual);
+ bool inline_native_Reflection_getCallerClass();
+ bool inline_native_AtomicLong_get();
+ bool inline_native_AtomicLong_attemptUpdate();
+ bool is_method_invoke_or_aux_frame(JVMState* jvms);
+ // Helper function for inlining native object hash method
+ bool inline_native_hashcode(bool is_virtual, bool is_static);
+ bool inline_native_getClass();
+
+ // Helper functions for inlining arraycopy
+ bool inline_arraycopy();
+ void generate_arraycopy(const TypePtr* adr_type,
+ BasicType basic_elem_type,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length,
+ int nargs, // arguments on stack for debug info
+ bool disjoint_bases = false,
+ bool length_never_negative = false,
+ RegionNode* slow_region = NULL);
+ AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
+ RegionNode* slow_region);
+ void generate_clear_array(const TypePtr* adr_type,
+ Node* dest,
+ BasicType basic_elem_type,
+ Node* slice_off,
+ Node* slice_len,
+ Node* slice_end);
+ bool generate_block_arraycopy(const TypePtr* adr_type,
+ BasicType basic_elem_type,
+ AllocateNode* alloc,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* dest_size);
+ void generate_slow_arraycopy(const TypePtr* adr_type,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length,
+ int nargs);
+ Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
+ Node* dest_elem_klass,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length, int nargs);
+ Node* generate_generic_arraycopy(const TypePtr* adr_type,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length, int nargs);
+ void generate_unchecked_arraycopy(const TypePtr* adr_type,
+ BasicType basic_elem_type,
+ bool disjoint_bases,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length);
+ bool inline_unsafe_CAS(BasicType type);
+ bool inline_unsafe_ordered_store(BasicType type);
+ bool inline_fp_conversions(vmIntrinsics::ID id);
+ bool inline_reverseBytes(vmIntrinsics::ID id);
+};
+
+
+//---------------------------make_vm_intrinsic----------------------------
+CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
+ vmIntrinsics::ID id = m->intrinsic_id();
+ assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+ if (DisableIntrinsic[0] != '\0'
+ && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) {
+ // disabled by a user request on the command line:
+ // example: -XX:DisableIntrinsic=_hashCode,_getClass
+ return NULL;
+ }
+
+ if (!m->is_loaded()) {
+ // do not attempt to inline unloaded methods
+ return NULL;
+ }
+
+ // Only a few intrinsics implement a virtual dispatch.
+ // They are expensive calls which are also frequently overridden.
+ if (is_virtual) {
+ switch (id) {
+ case vmIntrinsics::_hashCode:
+ case vmIntrinsics::_clone:
+ // OK, Object.hashCode and Object.clone intrinsics come in both flavors
+ break;
+ default:
+ return NULL;
+ }
+ }
+
+ // -XX:-InlineNatives disables nearly all intrinsics:
+ if (!InlineNatives) {
+ switch (id) {
+ case vmIntrinsics::_indexOf:
+ case vmIntrinsics::_compareTo:
+ break; // InlineNatives does not control String.compareTo
+ default:
+ return NULL;
+ }
+ }
+
+ switch (id) {
+ case vmIntrinsics::_compareTo:
+ if (!SpecialStringCompareTo) return NULL;
+ break;
+ case vmIntrinsics::_indexOf:
+ if (!SpecialStringIndexOf) return NULL;
+ break;
+ case vmIntrinsics::_arraycopy:
+ if (!InlineArrayCopy) return NULL;
+ break;
+ case vmIntrinsics::_copyMemory:
+ if (StubRoutines::unsafe_arraycopy() == NULL) return NULL;
+ if (!InlineArrayCopy) return NULL;
+ break;
+ case vmIntrinsics::_hashCode:
+ if (!InlineObjectHash) return NULL;
+ break;
+ case vmIntrinsics::_clone:
+ case vmIntrinsics::_copyOf:
+ case vmIntrinsics::_copyOfRange:
+ if (!InlineObjectCopy) return NULL;
+ // These also use the arraycopy intrinsic mechanism:
+ if (!InlineArrayCopy) return NULL;
+ break;
+ case vmIntrinsics::_checkIndex:
+ // We do not intrinsify this. The optimizer does fine with it.
+ return NULL;
+
+ case vmIntrinsics::_get_AtomicLong:
+ case vmIntrinsics::_attemptUpdate:
+ if (!InlineAtomicLong) return NULL;
+ break;
+
+ case vmIntrinsics::_Object_init:
+ case vmIntrinsics::_invoke:
+ // We do not intrinsify these; they are marked for other purposes.
+ return NULL;
+
+ case vmIntrinsics::_getCallerClass:
+ if (!UseNewReflection) return NULL;
+ if (!InlineReflectionGetCallerClass) return NULL;
+ if (!JDK_Version::is_gte_jdk14x_version()) return NULL;
+ break;
+
+ default:
+ break;
+ }
+
+ // -XX:-InlineClassNatives disables natives from the Class class.
+ // The flag applies to all reflective calls, notably Array.newArray
+ // (visible to Java programmers as Array.newInstance).
+ if (m->holder()->name() == ciSymbol::java_lang_Class() ||
+ m->holder()->name() == ciSymbol::java_lang_reflect_Array()) {
+ if (!InlineClassNatives) return NULL;
+ }
+
+ // -XX:-InlineThreadNatives disables natives from the Thread class.
+ if (m->holder()->name() == ciSymbol::java_lang_Thread()) {
+ if (!InlineThreadNatives) return NULL;
+ }
+
+ // -XX:-InlineMathNatives disables natives from the Math,Float and Double classes.
+ if (m->holder()->name() == ciSymbol::java_lang_Math() ||
+ m->holder()->name() == ciSymbol::java_lang_Float() ||
+ m->holder()->name() == ciSymbol::java_lang_Double()) {
+ if (!InlineMathNatives) return NULL;
+ }
+
+ // -XX:-InlineUnsafeOps disables natives from the Unsafe class.
+ if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) {
+ if (!InlineUnsafeOps) return NULL;
+ }
+
+ return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id);
+}
+
+//----------------------register_library_intrinsics-----------------------
+// Initialize this file's data structures, for each Compile instance.
+void Compile::register_library_intrinsics() {
+ // Nothing to do here.
+}
+
+JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
+ LibraryCallKit kit(jvms, this);
+ Compile* C = kit.C;
+ int nodes = C->unique();
+#ifndef PRODUCT
+ if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
+ char buf[1000];
+ const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
+ tty->print_cr("Intrinsic %s", str);
+ }
+#endif
+ if (kit.try_to_inline()) {
+ if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
+ tty->print("Inlining intrinsic %s%s at bci:%d in",
+ vmIntrinsics::name_at(intrinsic_id()),
+ (is_virtual() ? " (virtual)" : ""), kit.bci());
+ kit.caller()->print_short_name(tty);
+ tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+ }
+ C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
+ if (C->log()) {
+ C->log()->elem("intrinsic id='%s'%s nodes='%d'",
+ vmIntrinsics::name_at(intrinsic_id()),
+ (is_virtual() ? " virtual='1'" : ""),
+ C->unique() - nodes);
+ }
+ return kit.transfer_exceptions_into_jvms();
+ }
+
+ if (PrintIntrinsics) {
+ switch (intrinsic_id()) {
+ case vmIntrinsics::_invoke:
+ case vmIntrinsics::_Object_init:
+ // We do not expect to inline these, so do not produce any noise about them.
+ break;
+ default:
+ tty->print("Did not inline intrinsic %s%s at bci:%d in",
+ vmIntrinsics::name_at(intrinsic_id()),
+ (is_virtual() ? " (virtual)" : ""), kit.bci());
+ kit.caller()->print_short_name(tty);
+ tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+ }
+ }
+ C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
+ return NULL;
+}
+
+bool LibraryCallKit::try_to_inline() {
+ // Handle symbolic names for otherwise undistinguished boolean switches:
+ const bool is_store = true;
+ const bool is_native_ptr = true;
+ const bool is_static = true;
+
+ switch (intrinsic_id()) {
+ case vmIntrinsics::_hashCode:
+ return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
+ case vmIntrinsics::_identityHashCode:
+ return inline_native_hashcode(/*!virtual*/ false, is_static);
+ case vmIntrinsics::_getClass:
+ return inline_native_getClass();
+
+ case vmIntrinsics::_dsin:
+ case vmIntrinsics::_dcos:
+ case vmIntrinsics::_dtan:
+ case vmIntrinsics::_dabs:
+ case vmIntrinsics::_datan2:
+ case vmIntrinsics::_dsqrt:
+ case vmIntrinsics::_dexp:
+ case vmIntrinsics::_dlog:
+ case vmIntrinsics::_dlog10:
+ case vmIntrinsics::_dpow:
+ return inline_math_native(intrinsic_id());
+
+ case vmIntrinsics::_min:
+ case vmIntrinsics::_max:
+ return inline_min_max(intrinsic_id());
+
+ case vmIntrinsics::_arraycopy:
+ return inline_arraycopy();
+
+ case vmIntrinsics::_compareTo:
+ return inline_string_compareTo();
+ case vmIntrinsics::_indexOf:
+ return inline_string_indexOf();
+
+ case vmIntrinsics::_getObject:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false);
+ case vmIntrinsics::_getBoolean:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, false);
+ case vmIntrinsics::_getByte:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, false);
+ case vmIntrinsics::_getShort:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, false);
+ case vmIntrinsics::_getChar:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, false);
+ case vmIntrinsics::_getInt:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, false);
+ case vmIntrinsics::_getLong:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, false);
+ case vmIntrinsics::_getFloat:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, false);
+ case vmIntrinsics::_getDouble:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, false);
+
+ case vmIntrinsics::_putObject:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, false);
+ case vmIntrinsics::_putBoolean:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, false);
+ case vmIntrinsics::_putByte:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, false);
+ case vmIntrinsics::_putShort:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, false);
+ case vmIntrinsics::_putChar:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, false);
+ case vmIntrinsics::_putInt:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_INT, false);
+ case vmIntrinsics::_putLong:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, false);
+ case vmIntrinsics::_putFloat:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, false);
+ case vmIntrinsics::_putDouble:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, false);
+
+ case vmIntrinsics::_getByte_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_BYTE, false);
+ case vmIntrinsics::_getShort_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_SHORT, false);
+ case vmIntrinsics::_getChar_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_CHAR, false);
+ case vmIntrinsics::_getInt_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_INT, false);
+ case vmIntrinsics::_getLong_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_LONG, false);
+ case vmIntrinsics::_getFloat_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_FLOAT, false);
+ case vmIntrinsics::_getDouble_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_DOUBLE, false);
+ case vmIntrinsics::_getAddress_raw:
+ return inline_unsafe_access(is_native_ptr, !is_store, T_ADDRESS, false);
+
+ case vmIntrinsics::_putByte_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_BYTE, false);
+ case vmIntrinsics::_putShort_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_SHORT, false);
+ case vmIntrinsics::_putChar_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_CHAR, false);
+ case vmIntrinsics::_putInt_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_INT, false);
+ case vmIntrinsics::_putLong_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_LONG, false);
+ case vmIntrinsics::_putFloat_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_FLOAT, false);
+ case vmIntrinsics::_putDouble_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_DOUBLE, false);
+ case vmIntrinsics::_putAddress_raw:
+ return inline_unsafe_access(is_native_ptr, is_store, T_ADDRESS, false);
+
+ case vmIntrinsics::_getObjectVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, true);
+ case vmIntrinsics::_getBooleanVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, true);
+ case vmIntrinsics::_getByteVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, true);
+ case vmIntrinsics::_getShortVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, true);
+ case vmIntrinsics::_getCharVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, true);
+ case vmIntrinsics::_getIntVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, true);
+ case vmIntrinsics::_getLongVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, true);
+ case vmIntrinsics::_getFloatVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, true);
+ case vmIntrinsics::_getDoubleVolatile:
+ return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, true);
+
+ case vmIntrinsics::_putObjectVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, true);
+ case vmIntrinsics::_putBooleanVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, true);
+ case vmIntrinsics::_putByteVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, true);
+ case vmIntrinsics::_putShortVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, true);
+ case vmIntrinsics::_putCharVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, true);
+ case vmIntrinsics::_putIntVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_INT, true);
+ case vmIntrinsics::_putLongVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, true);
+ case vmIntrinsics::_putFloatVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, true);
+ case vmIntrinsics::_putDoubleVolatile:
+ return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, true);
+
+ case vmIntrinsics::_prefetchRead:
+ return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static);
+ case vmIntrinsics::_prefetchWrite:
+ return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static);
+ case vmIntrinsics::_prefetchReadStatic:
+ return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static);
+ case vmIntrinsics::_prefetchWriteStatic:
+ return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static);
+
+ case vmIntrinsics::_compareAndSwapObject:
+ return inline_unsafe_CAS(T_OBJECT);
+ case vmIntrinsics::_compareAndSwapInt:
+ return inline_unsafe_CAS(T_INT);
+ case vmIntrinsics::_compareAndSwapLong:
+ return inline_unsafe_CAS(T_LONG);
+
+ case vmIntrinsics::_putOrderedObject:
+ return inline_unsafe_ordered_store(T_OBJECT);
+ case vmIntrinsics::_putOrderedInt:
+ return inline_unsafe_ordered_store(T_INT);
+ case vmIntrinsics::_putOrderedLong:
+ return inline_unsafe_ordered_store(T_LONG);
+
+ case vmIntrinsics::_currentThread:
+ return inline_native_currentThread();
+ case vmIntrinsics::_isInterrupted:
+ return inline_native_isInterrupted();
+
+ case vmIntrinsics::_currentTimeMillis:
+ return inline_native_time_funcs(false);
+ case vmIntrinsics::_nanoTime:
+ return inline_native_time_funcs(true);
+ case vmIntrinsics::_allocateInstance:
+ return inline_unsafe_allocate();
+ case vmIntrinsics::_copyMemory:
+ return inline_unsafe_copyMemory();
+ case vmIntrinsics::_newArray:
+ return inline_native_newArray();
+ case vmIntrinsics::_getLength:
+ return inline_native_getLength();
+ case vmIntrinsics::_copyOf:
+ return inline_array_copyOf(false);
+ case vmIntrinsics::_copyOfRange:
+ return inline_array_copyOf(true);
+ case vmIntrinsics::_clone:
+ return inline_native_clone(intrinsic()->is_virtual());
+
+ case vmIntrinsics::_isAssignableFrom:
+ return inline_native_subtype_check();
+
+ case vmIntrinsics::_isInstance:
+ case vmIntrinsics::_getModifiers:
+ case vmIntrinsics::_isInterface:
+ case vmIntrinsics::_isArray:
+ case vmIntrinsics::_isPrimitive:
+ case vmIntrinsics::_getSuperclass:
+ case vmIntrinsics::_getComponentType:
+ case vmIntrinsics::_getClassAccessFlags:
+ return inline_native_Class_query(intrinsic_id());
+
+ case vmIntrinsics::_floatToRawIntBits:
+ case vmIntrinsics::_floatToIntBits:
+ case vmIntrinsics::_intBitsToFloat:
+ case vmIntrinsics::_doubleToRawLongBits:
+ case vmIntrinsics::_doubleToLongBits:
+ case vmIntrinsics::_longBitsToDouble:
+ return inline_fp_conversions(intrinsic_id());
+
+ case vmIntrinsics::_reverseBytes_i:
+ case vmIntrinsics::_reverseBytes_l:
+ return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
+
+ case vmIntrinsics::_get_AtomicLong:
+ return inline_native_AtomicLong_get();
+ case vmIntrinsics::_attemptUpdate:
+ return inline_native_AtomicLong_attemptUpdate();
+
+ case vmIntrinsics::_getCallerClass:
+ return inline_native_Reflection_getCallerClass();
+
+ default:
+ // If you get here, it may be that someone has added a new intrinsic
+ // to the list in vmSymbols.hpp without implementing it here.
+#ifndef PRODUCT
+ if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
+ tty->print_cr("*** Warning: Unimplemented intrinsic %s(%d)",
+ vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
+ }
+#endif
+ return false;
+ }
+}
+
+//------------------------------push_result------------------------------
+// Helper function for finishing intrinsics.
+void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
+ record_for_igvn(region);
+ set_control(_gvn.transform(region));
+ BasicType value_type = value->type()->basic_type();
+ push_node(value_type, _gvn.transform(value));
+}
+
+//------------------------------generate_guard---------------------------
+// Helper function for generating guarded fast-slow graph structures.
+// The given 'test', if true, guards a slow path. If the test fails
+// then a fast path can be taken. (We generally hope it fails.)
+// In all cases, GraphKit::control() is updated to the fast path.
+// The returned value represents the control for the slow path.
+// The return value is never 'top'; it is either a valid control
+// or NULL if it is obvious that the slow path can never be taken.
+// Also, if region and the slow control are not NULL, the slow edge
+// is appended to the region.
+Node* LibraryCallKit::generate_guard(Node* test, RegionNode* region, float true_prob) {
+ if (stopped()) {
+ // Already short circuited.
+ return NULL;
+ }
+
+ // Build an if node and its projections.
+ // If test is true we take the slow path, which we assume is uncommon.
+ if (_gvn.type(test) == TypeInt::ZERO) {
+ // The slow branch is never taken. No need to build this guard.
+ return NULL;
+ }
+
+ IfNode* iff = create_and_map_if(control(), test, true_prob, COUNT_UNKNOWN);
+
+ Node* if_slow = _gvn.transform( new (C, 1) IfTrueNode(iff) );
+ if (if_slow == top()) {
+ // The slow branch is never taken. No need to build this guard.
+ return NULL;
+ }
+
+ if (region != NULL)
+ region->add_req(if_slow);
+
+ Node* if_fast = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+ set_control(if_fast);
+
+ return if_slow;
+}
+
+inline Node* LibraryCallKit::generate_slow_guard(Node* test, RegionNode* region) {
+ return generate_guard(test, region, PROB_UNLIKELY_MAG(3));
+}
+inline Node* LibraryCallKit::generate_fair_guard(Node* test, RegionNode* region) {
+ return generate_guard(test, region, PROB_FAIR);
+}
+
+inline Node* LibraryCallKit::generate_negative_guard(Node* index, RegionNode* region,
+ Node* *pos_index) {
+ if (stopped())
+ return NULL; // already stopped
+ if (_gvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
+ return NULL; // index is already adequately typed
+ Node* cmp_lt = _gvn.transform( new (C, 3) CmpINode(index, intcon(0)) );
+ Node* bol_lt = _gvn.transform( new (C, 2) BoolNode(cmp_lt, BoolTest::lt) );
+ Node* is_neg = generate_guard(bol_lt, region, PROB_MIN);
+ if (is_neg != NULL && pos_index != NULL) {
+ // Emulate effect of Parse::adjust_map_after_if.
+ Node* ccast = new (C, 2) CastIINode(index, TypeInt::POS);
+ ccast->set_req(0, control());
+ (*pos_index) = _gvn.transform(ccast);
+ }
+ return is_neg;
+}
+
+inline Node* LibraryCallKit::generate_nonpositive_guard(Node* index, bool never_negative,
+ Node* *pos_index) {
+ if (stopped())
+ return NULL; // already stopped
+ if (_gvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
+ return NULL; // index is already adequately typed
+ Node* cmp_le = _gvn.transform( new (C, 3) CmpINode(index, intcon(0)) );
+ BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
+ Node* bol_le = _gvn.transform( new (C, 2) BoolNode(cmp_le, le_or_eq) );
+ Node* is_notp = generate_guard(bol_le, NULL, PROB_MIN);
+ if (is_notp != NULL && pos_index != NULL) {
+ // Emulate effect of Parse::adjust_map_after_if.
+ Node* ccast = new (C, 2) CastIINode(index, TypeInt::POS1);
+ ccast->set_req(0, control());
+ (*pos_index) = _gvn.transform(ccast);
+ }
+ return is_notp;
+}
+
+// Make sure that 'position' is a valid limit index, in [0..length].
+// There are two equivalent plans for checking this:
+// A. (offset + copyLength) unsigned<= arrayLength
+// B. offset <= (arrayLength - copyLength)
+// We require that all of the values above, except for the sum and
+// difference, are already known to be non-negative.
+// Plan A is robust in the face of overflow, if offset and copyLength
+// are both hugely positive.
+//
+// Plan B is less direct and intuitive, but it does not overflow at
+// all, since the difference of two non-negatives is always
+// representable. Whenever Java methods must perform the equivalent
+// check they generally use Plan B instead of Plan A.
+// For the moment we use Plan A.
+inline Node* LibraryCallKit::generate_limit_guard(Node* offset,
+ Node* subseq_length,
+ Node* array_length,
+ RegionNode* region) {
+ if (stopped())
+ return NULL; // already stopped
+ bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
+ if (zero_offset && _gvn.eqv_uncast(subseq_length, array_length))
+ return NULL; // common case of whole-array copy
+ Node* last = subseq_length;
+ if (!zero_offset) // last += offset
+ last = _gvn.transform( new (C, 3) AddINode(last, offset));
+ Node* cmp_lt = _gvn.transform( new (C, 3) CmpUNode(array_length, last) );
+ Node* bol_lt = _gvn.transform( new (C, 2) BoolNode(cmp_lt, BoolTest::lt) );
+ Node* is_over = generate_guard(bol_lt, region, PROB_MIN);
+ return is_over;
+}
+
+
+//--------------------------generate_current_thread--------------------
+Node* LibraryCallKit::generate_current_thread(Node* &tls_output) {
+ ciKlass* thread_klass = env()->Thread_klass();
+ const Type* thread_type = TypeOopPtr::make_from_klass(thread_klass)->cast_to_ptr_type(TypePtr::NotNull);
+ Node* thread = _gvn.transform(new (C, 1) ThreadLocalNode());
+ Node* p = basic_plus_adr(top()/*!oop*/, thread, in_bytes(JavaThread::threadObj_offset()));
+ Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT);
+ tls_output = thread;
+ return threadObj;
+}
+
+
+//------------------------------inline_string_compareTo------------------------
+bool LibraryCallKit::inline_string_compareTo() {
+
+ const int value_offset = java_lang_String::value_offset_in_bytes();
+ const int count_offset = java_lang_String::count_offset_in_bytes();
+ const int offset_offset = java_lang_String::offset_offset_in_bytes();
+
+ _sp += 2;
+ Node *argument = pop(); // pop non-receiver first: it was pushed second
+ Node *receiver = pop();
+
+ // Null check on self without removing any arguments. The argument
+ // null check technically happens in the wrong place, which can lead to
+ // invalid stack traces when string compare is inlined into a method
+ // which handles NullPointerExceptions.
+ _sp += 2;
+ receiver = do_null_check(receiver, T_OBJECT);
+ argument = do_null_check(argument, T_OBJECT);
+ _sp -= 2;
+ if (stopped()) {
+ return true;
+ }
+
+ ciInstanceKlass* klass = env()->String_klass();
+ const TypeInstPtr* string_type =
+ TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
+
+ Node* compare =
+ _gvn.transform(new (C, 7) StrCompNode(
+ control(),
+ memory(TypeAryPtr::CHARS),
+ memory(string_type->add_offset(value_offset)),
+ memory(string_type->add_offset(count_offset)),
+ memory(string_type->add_offset(offset_offset)),
+ receiver,
+ argument));
+ push(compare);
+ return true;
+}
+
+// Java version of String.indexOf(constant string)
+// class StringDecl {
+// StringDecl(char[] ca) {
+// offset = 0;
+// count = ca.length;
+// value = ca;
+// }
+// int offset;
+// int count;
+// char[] value;
+// }
+//
+// static int string_indexOf_J(StringDecl string_object, char[] target_object,
+// int targetOffset, int cache_i, int md2) {
+// int cache = cache_i;
+// int sourceOffset = string_object.offset;
+// int sourceCount = string_object.count;
+// int targetCount = target_object.length;
+//
+// int targetCountLess1 = targetCount - 1;
+// int sourceEnd = sourceOffset + sourceCount - targetCountLess1;
+//
+// char[] source = string_object.value;
+// char[] target = target_object;
+// int lastChar = target[targetCountLess1];
+//
+// outer_loop:
+// for (int i = sourceOffset; i < sourceEnd; ) {
+// int src = source[i + targetCountLess1];
+// if (src == lastChar) {
+// // With random strings and a 4-character alphabet,
+// // reverse matching at this point sets up 0.8% fewer
+// // frames, but (paradoxically) makes 0.3% more probes.
+// // Since those probes are nearer the lastChar probe,
+// // there is may be a net D$ win with reverse matching.
+// // But, reversing loop inhibits unroll of inner loop
+// // for unknown reason. So, does running outer loop from
+// // (sourceOffset - targetCountLess1) to (sourceOffset + sourceCount)
+// for (int j = 0; j < targetCountLess1; j++) {
+// if (target[targetOffset + j] != source[i+j]) {
+// if ((cache & (1 << source[i+j])) == 0) {
+// if (md2 < j+1) {
+// i += j+1;
+// continue outer_loop;
+// }
+// }
+// i += md2;
+// continue outer_loop;
+// }
+// }
+// return i - sourceOffset;
+// }
+// if ((cache & (1 << src)) == 0) {
+// i += targetCountLess1;
+// } // using "i += targetCount;" and an "else i++;" causes a jump to jump.
+// i++;
+// }
+// return -1;
+// }
+
+//------------------------------string_indexOf------------------------
+Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_array, jint targetOffset_i,
+ jint cache_i, jint md2_i) {
+
+ Node* no_ctrl = NULL;
+ float likely = PROB_LIKELY(0.9);
+ float unlikely = PROB_UNLIKELY(0.9);
+
+ const int value_offset = java_lang_String::value_offset_in_bytes();
+ const int count_offset = java_lang_String::count_offset_in_bytes();
+ const int offset_offset = java_lang_String::offset_offset_in_bytes();
+
+ ciInstanceKlass* klass = env()->String_klass();
+ const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
+ const TypeAryPtr* source_type = TypeAryPtr::make(TypePtr::NotNull, TypeAry::make(TypeInt::CHAR,TypeInt::POS), ciTypeArrayKlass::make(T_CHAR), true, 0);
+
+ Node* sourceOffseta = basic_plus_adr(string_object, string_object, offset_offset);
+ Node* sourceOffset = make_load(no_ctrl, sourceOffseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
+ Node* sourceCounta = basic_plus_adr(string_object, string_object, count_offset);
+ Node* sourceCount = make_load(no_ctrl, sourceCounta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+ Node* sourcea = basic_plus_adr(string_object, string_object, value_offset);
+ Node* source = make_load(no_ctrl, sourcea, source_type, T_OBJECT, string_type->add_offset(value_offset));
+
+ Node* target = _gvn.transform(ConPNode::make(C, target_array));
+ jint target_length = target_array->length();
+ const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
+ const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
+
+ IdealKit kit(gvn(), control(), merged_memory());
+#define __ kit.
+ Node* zero = __ ConI(0);
+ Node* one = __ ConI(1);
+ Node* cache = __ ConI(cache_i);
+ Node* md2 = __ ConI(md2_i);
+ Node* lastChar = __ ConI(target_array->char_at(target_length - 1));
+ Node* targetCount = __ ConI(target_length);
+ Node* targetCountLess1 = __ ConI(target_length - 1);
+ Node* targetOffset = __ ConI(targetOffset_i);
+ Node* sourceEnd = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);
+
+ IdealVariable rtn(kit), i(kit), j(kit); __ declares_done();
+ Node* outer_loop = __ make_label(2 /* goto */);
+ Node* return_ = __ make_label(1);
+
+ __ set(rtn,__ ConI(-1));
+ __ loop(i, sourceOffset, BoolTest::lt, sourceEnd); {
+ Node* i2 = __ AddI(__ value(i), targetCountLess1);
+ // pin to prohibit loading of "next iteration" value which may SEGV (rare)
+ Node* src = load_array_element(__ ctrl(), source, i2, TypeAryPtr::CHARS);
+ __ if_then(src, BoolTest::eq, lastChar, unlikely); {
+ __ loop(j, zero, BoolTest::lt, targetCountLess1); {
+ Node* tpj = __ AddI(targetOffset, __ value(j));
+ Node* targ = load_array_element(no_ctrl, target, tpj, target_type);
+ Node* ipj = __ AddI(__ value(i), __ value(j));
+ Node* src2 = load_array_element(no_ctrl, source, ipj, TypeAryPtr::CHARS);
+ __ if_then(targ, BoolTest::ne, src2); {
+ __ if_then(__ AndI(cache, __ LShiftI(one, src2)), BoolTest::eq, zero); {
+ __ if_then(md2, BoolTest::lt, __ AddI(__ value(j), one)); {
+ __ increment(i, __ AddI(__ value(j), one));
+ __ goto_(outer_loop);
+ } __ end_if(); __ dead(j);
+ }__ end_if(); __ dead(j);
+ __ increment(i, md2);
+ __ goto_(outer_loop);
+ }__ end_if();
+ __ increment(j, one);
+ }__ end_loop(); __ dead(j);
+ __ set(rtn, __ SubI(__ value(i), sourceOffset)); __ dead(i);
+ __ goto_(return_);
+ }__ end_if();
+ __ if_then(__ AndI(cache, __ LShiftI(one, src)), BoolTest::eq, zero, likely); {
+ __ increment(i, targetCountLess1);
+ }__ end_if();
+ __ increment(i, one);
+ __ bind(outer_loop);
+ }__ end_loop(); __ dead(i);
+ __ bind(return_);
+ __ drain_delay_transform();
+
+ set_control(__ ctrl());
+ Node* result = __ value(rtn);
+#undef __
+ C->set_has_loops(true);
+ return result;
+}
+
+
+//------------------------------inline_string_indexOf------------------------
+bool LibraryCallKit::inline_string_indexOf() {
+
+ _sp += 2;
+ Node *argument = pop(); // pop non-receiver first: it was pushed second
+ Node *receiver = pop();
+
+ // don't intrinsify is argument isn't a constant string.
+ if (!argument->is_Con()) {
+ return false;
+ }
+ const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
+ if (str_type == NULL) {
+ return false;
+ }
+ ciInstanceKlass* klass = env()->String_klass();
+ ciObject* str_const = str_type->const_oop();
+ if (str_const == NULL || str_const->klass() != klass) {
+ return false;
+ }
+ ciInstance* str = str_const->as_instance();
+ assert(str != NULL, "must be instance");
+
+ const int value_offset = java_lang_String::value_offset_in_bytes();
+ const int count_offset = java_lang_String::count_offset_in_bytes();
+ const int offset_offset = java_lang_String::offset_offset_in_bytes();
+
+ ciObject* v = str->field_value_by_offset(value_offset).as_object();
+ int o = str->field_value_by_offset(offset_offset).as_int();
+ int c = str->field_value_by_offset(count_offset).as_int();
+ ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
+
+ // constant strings have no offset and count == length which
+ // simplifies the resulting code somewhat so lets optimize for that.
+ if (o != 0 || c != pat->length()) {
+ return false;
+ }
+
+ // Null check on self without removing any arguments. The argument
+ // null check technically happens in the wrong place, which can lead to
+ // invalid stack traces when string compare is inlined into a method
+ // which handles NullPointerExceptions.
+ _sp += 2;
+ receiver = do_null_check(receiver, T_OBJECT);
+ // No null check on the argument is needed since it's a constant String oop.
+ _sp -= 2;
+ if (stopped()) {
+ return true;
+ }
+
+ // The null string as a pattern always returns 0 (match at beginning of string)
+ if (c == 0) {
+ push(intcon(0));
+ return true;
+ }
+
+ jchar lastChar = pat->char_at(o + (c - 1));
+ int cache = 0;
+ int i;
+ for (i = 0; i < c - 1; i++) {
+ assert(i < pat->length(), "out of range");
+ cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
+ }
+
+ int md2 = c;
+ for (i = 0; i < c - 1; i++) {
+ assert(i < pat->length(), "out of range");
+ if (pat->char_at(o + i) == lastChar) {
+ md2 = (c - 1) - i;
+ }
+ }
+
+ Node* result = string_indexOf(receiver, pat, o, cache, md2);
+ push(result);
+ return true;
+}
+
+//--------------------------pop_math_arg--------------------------------
+// Pop a double argument to a math function from the stack
+// rounding it if necessary.
+Node * LibraryCallKit::pop_math_arg() {
+ Node *arg = pop_pair();
+ if( Matcher::strict_fp_requires_explicit_rounding && UseSSE<=1 )
+ arg = _gvn.transform( new (C, 2) RoundDoubleNode(0, arg) );
+ return arg;
+}
+
+//------------------------------inline_trig----------------------------------
+// Inline sin/cos/tan instructions, if possible. If rounding is required, do
+// argument reduction which will turn into a fast/slow diamond.
+bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
+ _sp += arg_size(); // restore stack pointer
+ Node* arg = pop_math_arg();
+ Node* trig = NULL;
+
+ switch (id) {
+ case vmIntrinsics::_dsin:
+ trig = _gvn.transform((Node*)new (C, 2) SinDNode(arg));
+ break;
+ case vmIntrinsics::_dcos:
+ trig = _gvn.transform((Node*)new (C, 2) CosDNode(arg));
+ break;
+ case vmIntrinsics::_dtan:
+ trig = _gvn.transform((Node*)new (C, 2) TanDNode(arg));
+ break;
+ default:
+ assert(false, "bad intrinsic was passed in");
+ return false;
+ }
+
+ // Rounding required? Check for argument reduction!
+ if( Matcher::strict_fp_requires_explicit_rounding ) {
+
+ static const double pi_4 = 0.7853981633974483;
+ static const double neg_pi_4 = -0.7853981633974483;
+ // pi/2 in 80-bit extended precision
+ // static const unsigned char pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0x3f,0x00,0x00,0x00,0x00,0x00,0x00};
+ // -pi/2 in 80-bit extended precision
+ // static const unsigned char neg_pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0xbf,0x00,0x00,0x00,0x00,0x00,0x00};
+ // Cutoff value for using this argument reduction technique
+ //static const double pi_2_minus_epsilon = 1.564660403643354;
+ //static const double neg_pi_2_plus_epsilon = -1.564660403643354;
+
+ // Pseudocode for sin:
+ // if (x <= Math.PI / 4.0) {
+ // if (x >= -Math.PI / 4.0) return fsin(x);
+ // if (x >= -Math.PI / 2.0) return -fcos(x + Math.PI / 2.0);
+ // } else {
+ // if (x <= Math.PI / 2.0) return fcos(x - Math.PI / 2.0);
+ // }
+ // return StrictMath.sin(x);
+
+ // Pseudocode for cos:
+ // if (x <= Math.PI / 4.0) {
+ // if (x >= -Math.PI / 4.0) return fcos(x);
+ // if (x >= -Math.PI / 2.0) return fsin(x + Math.PI / 2.0);
+ // } else {
+ // if (x <= Math.PI / 2.0) return -fsin(x - Math.PI / 2.0);
+ // }
+ // return StrictMath.cos(x);
+
+ // Actually, sticking in an 80-bit Intel value into C2 will be tough; it
+ // requires a special machine instruction to load it. Instead we'll try
+ // the 'easy' case. If we really need the extra range +/- PI/2 we'll
+ // probably do the math inside the SIN encoding.
+
+ // Make the merge point
+ RegionNode *r = new (C, 3) RegionNode(3);
+ Node *phi = new (C, 3) PhiNode(r,Type::DOUBLE);
+
+ // Flatten arg so we need only 1 test
+ Node *abs = _gvn.transform(new (C, 2) AbsDNode(arg));
+ // Node for PI/4 constant
+ Node *pi4 = makecon(TypeD::make(pi_4));
+ // Check PI/4 : abs(arg)
+ Node *cmp = _gvn.transform(new (C, 3) CmpDNode(pi4,abs));
+ // Check: If PI/4 < abs(arg) then go slow
+ Node *bol = _gvn.transform( new (C, 2) BoolNode( cmp, BoolTest::lt ) );
+ // Branch either way
+ IfNode *iff = create_and_xform_if(control(),bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+ set_control(opt_iff(r,iff));
+
+ // Set fast path result
+ phi->init_req(2,trig);
+
+ // Slow path - non-blocking leaf call
+ Node* call = NULL;
+ switch (id) {
+ case vmIntrinsics::_dsin:
+ call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::dsin),
+ "Sin", NULL, arg, top());
+ break;
+ case vmIntrinsics::_dcos:
+ call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::dcos),
+ "Cos", NULL, arg, top());
+ break;
+ case vmIntrinsics::_dtan:
+ call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtan),
+ "Tan", NULL, arg, top());
+ break;
+ }
+ assert(control()->in(0) == call, "");
+ Node* slow_result = _gvn.transform(new (C, 1) ProjNode(call,TypeFunc::Parms));
+ r->init_req(1,control());
+ phi->init_req(1,slow_result);
+
+ // Post-merge
+ set_control(_gvn.transform(r));
+ record_for_igvn(r);
+ trig = _gvn.transform(phi);
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+ }
+ // Push result back on JVM stack
+ push_pair(trig);
+ return true;
+}
+
+//------------------------------inline_sqrt-------------------------------------
+// Inline square root instruction, if possible.
+bool LibraryCallKit::inline_sqrt(vmIntrinsics::ID id) {
+ assert(id == vmIntrinsics::_dsqrt, "Not square root");
+ _sp += arg_size(); // restore stack pointer
+ push_pair(_gvn.transform(new (C, 2) SqrtDNode(0, pop_math_arg())));
+ return true;
+}
+
+//------------------------------inline_abs-------------------------------------
+// Inline absolute value instruction, if possible.
+bool LibraryCallKit::inline_abs(vmIntrinsics::ID id) {
+ assert(id == vmIntrinsics::_dabs, "Not absolute value");
+ _sp += arg_size(); // restore stack pointer
+ push_pair(_gvn.transform(new (C, 2) AbsDNode(pop_math_arg())));
+ return true;
+}
+
+//------------------------------inline_exp-------------------------------------
+// Inline exp instructions, if possible. The Intel hardware only misses
+// really odd corner cases (+/- Infinity). Just uncommon-trap them.
+bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) {
+ assert(id == vmIntrinsics::_dexp, "Not exp");
+
+ // If this inlining ever returned NaN in the past, we do not intrinsify it
+ // every again. NaN results requires StrictMath.exp handling.
+ if (too_many_traps(Deoptimization::Reason_intrinsic)) return false;
+
+ // Do not intrinsify on older platforms which lack cmove.
+ if (ConditionalMoveLimit == 0) return false;
+
+ _sp += arg_size(); // restore stack pointer
+ Node *x = pop_math_arg();
+ Node *result = _gvn.transform(new (C, 2) ExpDNode(0,x));
+
+ //-------------------
+ //result=(result.isNaN())? StrictMath::exp():result;
+ // Check: If isNaN() by checking result!=result? then go to Strict Math
+ Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
+ // Build the boolean node
+ Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
+
+ { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
+ // End the current control-flow path
+ push_pair(x);
+ // Math.exp intrinsic returned a NaN, which requires StrictMath.exp
+ // to handle. Recompile without intrinsifying Math.exp
+ uncommon_trap(Deoptimization::Reason_intrinsic,
+ Deoptimization::Action_make_not_entrant);
+ }
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ push_pair(result);
+
+ return true;
+}
+
+//------------------------------inline_pow-------------------------------------
+// Inline power instructions, if possible.
+bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) {
+ assert(id == vmIntrinsics::_dpow, "Not pow");
+
+ // If this inlining ever returned NaN in the past, we do not intrinsify it
+ // every again. NaN results requires StrictMath.pow handling.
+ if (too_many_traps(Deoptimization::Reason_intrinsic)) return false;
+
+ // Do not intrinsify on older platforms which lack cmove.
+ if (ConditionalMoveLimit == 0) return false;
+
+ // Pseudocode for pow
+ // if (x <= 0.0) {
+ // if ((double)((int)y)==y) { // if y is int
+ // result = ((1&(int)y)==0)?-DPow(abs(x), y):DPow(abs(x), y)
+ // } else {
+ // result = NaN;
+ // }
+ // } else {
+ // result = DPow(x,y);
+ // }
+ // if (result != result)? {
+ // ucommon_trap();
+ // }
+ // return result;
+
+ _sp += arg_size(); // restore stack pointer
+ Node* y = pop_math_arg();
+ Node* x = pop_math_arg();
+
+ Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, x, y) );
+
+ // Short form: if not top-level (i.e., Math.pow but inlining Math.pow
+ // inside of something) then skip the fancy tests and just check for
+ // NaN result.
+ Node *result = NULL;
+ if( jvms()->depth() >= 1 ) {
+ result = fast_result;
+ } else {
+
+ // Set the merge point for If node with condition of (x <= 0.0)
+ // There are four possible paths to region node and phi node
+ RegionNode *r = new (C, 4) RegionNode(4);
+ Node *phi = new (C, 4) PhiNode(r, Type::DOUBLE);
+
+ // Build the first if node: if (x <= 0.0)
+ // Node for 0 constant
+ Node *zeronode = makecon(TypeD::ZERO);
+ // Check x:0
+ Node *cmp = _gvn.transform(new (C, 3) CmpDNode(x, zeronode));
+ // Check: If (x<=0) then go complex path
+ Node *bol1 = _gvn.transform( new (C, 2) BoolNode( cmp, BoolTest::le ) );
+ // Branch either way
+ IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
+ Node *opt_test = _gvn.transform(if1);
+ //assert( opt_test->is_If(), "Expect an IfNode");
+ IfNode *opt_if1 = (IfNode*)opt_test;
+ // Fast path taken; set region slot 3
+ Node *fast_taken = _gvn.transform( new (C, 1) IfFalseNode(opt_if1) );
+ r->init_req(3,fast_taken); // Capture fast-control
+
+ // Fast path not-taken, i.e. slow path
+ Node *complex_path = _gvn.transform( new (C, 1) IfTrueNode(opt_if1) );
+
+ // Set fast path result
+ Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, y, x) );
+ phi->init_req(3, fast_result);
+
+ // Complex path
+ // Build the second if node (if y is int)
+ // Node for (int)y
+ Node *inty = _gvn.transform( new (C, 2) ConvD2INode(y));
+ // Node for (double)((int) y)
+ Node *doubleinty= _gvn.transform( new (C, 2) ConvI2DNode(inty));
+ // Check (double)((int) y) : y
+ Node *cmpinty= _gvn.transform(new (C, 3) CmpDNode(doubleinty, y));
+ // Check if (y isn't int) then go to slow path
+
+ Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmpinty, BoolTest::ne ) );
+ // Branch eith way
+ IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
+ Node *slow_path = opt_iff(r,if2); // Set region path 2
+
+ // Calculate DPow(abs(x), y)*(1 & (int)y)
+ // Node for constant 1
+ Node *conone = intcon(1);
+ // 1& (int)y
+ Node *signnode= _gvn.transform( new (C, 3) AndINode(conone, inty) );
+ // zero node
+ Node *conzero = intcon(0);
+ // Check (1&(int)y)==0?
+ Node *cmpeq1 = _gvn.transform(new (C, 3) CmpINode(signnode, conzero));
+ // Check if (1&(int)y)!=0?, if so the result is negative
+ Node *bol3 = _gvn.transform( new (C, 2) BoolNode( cmpeq1, BoolTest::ne ) );
+ // abs(x)
+ Node *absx=_gvn.transform( new (C, 2) AbsDNode(x));
+ // abs(x)^y
+ Node *absxpowy = _gvn.transform( new (C, 3) PowDNode(0, y, absx) );
+ // -abs(x)^y
+ Node *negabsxpowy = _gvn.transform(new (C, 2) NegDNode (absxpowy));
+ // (1&(int)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
+ Node *signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
+ // Set complex path fast result
+ phi->init_req(2, signresult);
+
+ static const jlong nan_bits = CONST64(0x7ff8000000000000);
+ Node *slow_result = makecon(TypeD::make(*(double*)&nan_bits)); // return NaN
+ r->init_req(1,slow_path);
+ phi->init_req(1,slow_result);
+
+ // Post merge
+ set_control(_gvn.transform(r));
+ record_for_igvn(r);
+ result=_gvn.transform(phi);
+ }
+
+ //-------------------
+ //result=(result.isNaN())? uncommon_trap():result;
+ // Check: If isNaN() by checking result!=result? then go to Strict Math
+ Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
+ // Build the boolean node
+ Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
+
+ { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
+ // End the current control-flow path
+ push_pair(x);
+ push_pair(y);
+ // Math.pow intrinsic returned a NaN, which requires StrictMath.pow
+ // to handle. Recompile without intrinsifying Math.pow.
+ uncommon_trap(Deoptimization::Reason_intrinsic,
+ Deoptimization::Action_make_not_entrant);
+ }
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ push_pair(result);
+
+ return true;
+}
+
+//------------------------------inline_trans-------------------------------------
+// Inline transcendental instructions, if possible. The Intel hardware gets
+// these right, no funny corner cases missed.
+bool LibraryCallKit::inline_trans(vmIntrinsics::ID id) {
+ _sp += arg_size(); // restore stack pointer
+ Node* arg = pop_math_arg();
+ Node* trans = NULL;
+
+ switch (id) {
+ case vmIntrinsics::_dlog:
+ trans = _gvn.transform((Node*)new (C, 2) LogDNode(arg));
+ break;
+ case vmIntrinsics::_dlog10:
+ trans = _gvn.transform((Node*)new (C, 2) Log10DNode(arg));
+ break;
+ default:
+ assert(false, "bad intrinsic was passed in");
+ return false;
+ }
+
+ // Push result back on JVM stack
+ push_pair(trans);
+ return true;
+}
+
+//------------------------------runtime_math-----------------------------
+bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
+ Node* a = NULL;
+ Node* b = NULL;
+
+ assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
+ "must be (DD)D or (D)D type");
+
+ // Inputs
+ _sp += arg_size(); // restore stack pointer
+ if (call_type == OptoRuntime::Math_DD_D_Type()) {
+ b = pop_math_arg();
+ }
+ a = pop_math_arg();
+
+ const TypePtr* no_memory_effects = NULL;
+ Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
+ no_memory_effects,
+ a, top(), b, b ? top() : NULL);
+ Node* value = _gvn.transform(new (C, 1) ProjNode(trig, TypeFunc::Parms+0));
+#ifdef ASSERT
+ Node* value_top = _gvn.transform(new (C, 1) ProjNode(trig, TypeFunc::Parms+1));
+ assert(value_top == top(), "second value must be top");
+#endif
+
+ push_pair(value);
+ return true;
+}
+
+//------------------------------inline_math_native-----------------------------
+bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
+ switch (id) {
+ // These intrinsics are not properly supported on all hardware
+ case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) :
+ runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dcos), "COS");
+ case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) :
+ runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dsin), "SIN");
+ case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
+ runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), "TAN");
+
+ case vmIntrinsics::_dlog: return Matcher::has_match_rule(Op_LogD) ? inline_trans(id) :
+ runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog), "LOG");
+ case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_trans(id) :
+ runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), "LOG10");
+
+ // These intrinsics are supported on all hardware
+ case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false;
+ case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_abs(id) : false;
+
+ // These intrinsics don't work on X86. The ad implementation doesn't
+ // handle NaN's properly. Instead of returning infinity, the ad
+ // implementation returns a NaN on overflow. See bug: 6304089
+ // Once the ad implementations are fixed, change the code below
+ // to match the intrinsics above
+
+ case vmIntrinsics::_dexp: return
+ runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
+ case vmIntrinsics::_dpow: return
+ runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
+
+ // These intrinsics are not yet correctly implemented
+ case vmIntrinsics::_datan2:
+ return false;
+
+ default:
+ ShouldNotReachHere();
+ return false;
+ }
+}
+
+static bool is_simple_name(Node* n) {
+ return (n->req() == 1 // constant
+ || (n->is_Type() && n->as_Type()->type()->singleton())
+ || n->is_Proj() // parameter or return value
+ || n->is_Phi() // local of some sort
+ );
+}
+
+//----------------------------inline_min_max-----------------------------------
+bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
+ push(generate_min_max(id, argument(0), argument(1)));
+
+ return true;
+}
+
+Node*
+LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
+ // These are the candidate return value:
+ Node* xvalue = x0;
+ Node* yvalue = y0;
+
+ if (xvalue == yvalue) {
+ return xvalue;
+ }
+
+ bool want_max = (id == vmIntrinsics::_max);
+
+ const TypeInt* txvalue = _gvn.type(xvalue)->isa_int();
+ const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int();
+ if (txvalue == NULL || tyvalue == NULL) return top();
+ // This is not really necessary, but it is consistent with a
+ // hypothetical MaxINode::Value method:
+ int widen = MAX2(txvalue->_widen, tyvalue->_widen);
+
+ // %%% This folding logic should (ideally) be in a different place.
+ // Some should be inside IfNode, and there to be a more reliable
+ // transformation of ?: style patterns into cmoves. We also want
+ // more powerful optimizations around cmove and min/max.
+
+ // Try to find a dominating comparison of these guys.
+ // It can simplify the index computation for Arrays.copyOf
+ // and similar uses of System.arraycopy.
+ // First, compute the normalized version of CmpI(x, y).
+ int cmp_op = Op_CmpI;
+ Node* xkey = xvalue;
+ Node* ykey = yvalue;
+ Node* ideal_cmpxy = _gvn.transform( new(C, 3) CmpINode(xkey, ykey) );
+ if (ideal_cmpxy->is_Cmp()) {
+ // E.g., if we have CmpI(length - offset, count),
+ // it might idealize to CmpI(length, count + offset)
+ cmp_op = ideal_cmpxy->Opcode();
+ xkey = ideal_cmpxy->in(1);
+ ykey = ideal_cmpxy->in(2);
+ }
+
+ // Start by locating any relevant comparisons.
+ Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey;
+ Node* cmpxy = NULL;
+ Node* cmpyx = NULL;
+ for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) {
+ Node* cmp = start_from->fast_out(k);
+ if (cmp->outcnt() > 0 && // must have prior uses
+ cmp->in(0) == NULL && // must be context-independent
+ cmp->Opcode() == cmp_op) { // right kind of compare
+ if (cmp->in(1) == xkey && cmp->in(2) == ykey) cmpxy = cmp;
+ if (cmp->in(1) == ykey && cmp->in(2) == xkey) cmpyx = cmp;
+ }
+ }
+
+ const int NCMPS = 2;
+ Node* cmps[NCMPS] = { cmpxy, cmpyx };
+ int cmpn;
+ for (cmpn = 0; cmpn < NCMPS; cmpn++) {
+ if (cmps[cmpn] != NULL) break; // find a result
+ }
+ if (cmpn < NCMPS) {
+ // Look for a dominating test that tells us the min and max.
+ int depth = 0; // Limit search depth for speed
+ Node* dom = control();
+ for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) {
+ if (++depth >= 100) break;
+ Node* ifproj = dom;
+ if (!ifproj->is_Proj()) continue;
+ Node* iff = ifproj->in(0);
+ if (!iff->is_If()) continue;
+ Node* bol = iff->in(1);
+ if (!bol->is_Bool()) continue;
+ Node* cmp = bol->in(1);
+ if (cmp == NULL) continue;
+ for (cmpn = 0; cmpn < NCMPS; cmpn++)
+ if (cmps[cmpn] == cmp) break;
+ if (cmpn == NCMPS) continue;
+ BoolTest::mask btest = bol->as_Bool()->_test._test;
+ if (ifproj->is_IfFalse()) btest = BoolTest(btest).negate();
+ if (cmp->in(1) == ykey) btest = BoolTest(btest).commute();
+ // At this point, we know that 'x btest y' is true.
+ switch (btest) {
+ case BoolTest::eq:
+ // They are proven equal, so we can collapse the min/max.
+ // Either value is the answer. Choose the simpler.
+ if (is_simple_name(yvalue) && !is_simple_name(xvalue))
+ return yvalue;
+ return xvalue;
+ case BoolTest::lt: // x < y
+ case BoolTest::le: // x <= y
+ return (want_max ? yvalue : xvalue);
+ case BoolTest::gt: // x > y
+ case BoolTest::ge: // x >= y
+ return (want_max ? xvalue : yvalue);
+ }
+ }
+ }
+
+ // We failed to find a dominating test.
+ // Let's pick a test that might GVN with prior tests.
+ Node* best_bol = NULL;
+ BoolTest::mask best_btest = BoolTest::illegal;
+ for (cmpn = 0; cmpn < NCMPS; cmpn++) {
+ Node* cmp = cmps[cmpn];
+ if (cmp == NULL) continue;
+ for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) {
+ Node* bol = cmp->fast_out(j);
+ if (!bol->is_Bool()) continue;
+ BoolTest::mask btest = bol->as_Bool()->_test._test;
+ if (btest == BoolTest::eq || btest == BoolTest::ne) continue;
+ if (cmp->in(1) == ykey) btest = BoolTest(btest).commute();
+ if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) {
+ best_bol = bol->as_Bool();
+ best_btest = btest;
+ }
+ }
+ }
+
+ Node* answer_if_true = NULL;
+ Node* answer_if_false = NULL;
+ switch (best_btest) {
+ default:
+ if (cmpxy == NULL)
+ cmpxy = ideal_cmpxy;
+ best_bol = _gvn.transform( new(C, 2) BoolNode(cmpxy, BoolTest::lt) );
+ // and fall through:
+ case BoolTest::lt: // x < y
+ case BoolTest::le: // x <= y
+ answer_if_true = (want_max ? yvalue : xvalue);
+ answer_if_false = (want_max ? xvalue : yvalue);
+ break;
+ case BoolTest::gt: // x > y
+ case BoolTest::ge: // x >= y
+ answer_if_true = (want_max ? xvalue : yvalue);
+ answer_if_false = (want_max ? yvalue : xvalue);
+ break;
+ }
+
+ jint hi, lo;
+ if (want_max) {
+ // We can sharpen the minimum.
+ hi = MAX2(txvalue->_hi, tyvalue->_hi);
+ lo = MAX2(txvalue->_lo, tyvalue->_lo);
+ } else {
+ // We can sharpen the maximum.
+ hi = MIN2(txvalue->_hi, tyvalue->_hi);
+ lo = MIN2(txvalue->_lo, tyvalue->_lo);
+ }
+
+ // Use a flow-free graph structure, to avoid creating excess control edges
+ // which could hinder other optimizations.
+ // Since Math.min/max is often used with arraycopy, we want
+ // tightly_coupled_allocation to be able to see beyond min/max expressions.
+ Node* cmov = CMoveNode::make(C, NULL, best_bol,
+ answer_if_false, answer_if_true,
+ TypeInt::make(lo, hi, widen));
+
+ return _gvn.transform(cmov);
+
+ /*
+ // This is not as desirable as it may seem, since Min and Max
+ // nodes do not have a full set of optimizations.
+ // And they would interfere, anyway, with 'if' optimizations
+ // and with CMoveI canonical forms.
+ switch (id) {
+ case vmIntrinsics::_min:
+ result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break;
+ case vmIntrinsics::_max:
+ result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break;
+ default:
+ ShouldNotReachHere();
+ }
+ */
+}
+
+inline int
+LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset) {
+ const TypePtr* base_type = TypePtr::NULL_PTR;
+ if (base != NULL) base_type = _gvn.type(base)->isa_ptr();
+ if (base_type == NULL) {
+ // Unknown type.
+ return Type::AnyPtr;
+ } else if (base_type == TypePtr::NULL_PTR) {
+ // Since this is a NULL+long form, we have to switch to a rawptr.
+ base = _gvn.transform( new (C, 2) CastX2PNode(offset) );
+ offset = MakeConX(0);
+ return Type::RawPtr;
+ } else if (base_type->base() == Type::RawPtr) {
+ return Type::RawPtr;
+ } else if (base_type->isa_oopptr()) {
+ // Base is never null => always a heap address.
+ if (base_type->ptr() == TypePtr::NotNull) {
+ return Type::OopPtr;
+ }
+ // Offset is small => always a heap address.
+ const TypeX* offset_type = _gvn.type(offset)->isa_intptr_t();
+ if (offset_type != NULL &&
+ base_type->offset() == 0 && // (should always be?)
+ offset_type->_lo >= 0 &&
+ !MacroAssembler::needs_explicit_null_check(offset_type->_hi)) {
+ return Type::OopPtr;
+ }
+ // Otherwise, it might either be oop+off or NULL+addr.
+ return Type::AnyPtr;
+ } else {
+ // No information:
+ return Type::AnyPtr;
+ }
+}
+
+inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) {
+ int kind = classify_unsafe_addr(base, offset);
+ if (kind == Type::RawPtr) {
+ return basic_plus_adr(top(), base, offset);
+ } else {
+ return basic_plus_adr(base, offset);
+ }
+}
+
+//----------------------------inline_reverseBytes_int/long-------------------
+// inline Int.reverseBytes(int)
+// inline Long.reverseByes(long)
+bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) {
+ assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l, "not reverse Bytes");
+ if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI)) return false;
+ if (id == vmIntrinsics::_reverseBytes_l && !Matcher::has_match_rule(Op_ReverseBytesL)) return false;
+ _sp += arg_size(); // restore stack pointer
+ switch (id) {
+ case vmIntrinsics::_reverseBytes_i:
+ push(_gvn.transform(new (C, 2) ReverseBytesINode(0, pop())));
+ break;
+ case vmIntrinsics::_reverseBytes_l:
+ push_pair(_gvn.transform(new (C, 2) ReverseBytesLNode(0, pop_pair())));
+ break;
+ default:
+ ;
+ }
+ return true;
+}
+
+//----------------------------inline_unsafe_access----------------------------
+
+const static BasicType T_ADDRESS_HOLDER = T_LONG;
+
+// Interpret Unsafe.fieldOffset cookies correctly:
+extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
+
+bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) {
+ if (callee()->is_static()) return false; // caller must have the capability!
+
+#ifndef PRODUCT
+ {
+ ResourceMark rm;
+ // Check the signatures.
+ ciSignature* sig = signature();
+#ifdef ASSERT
+ if (!is_store) {
+ // Object getObject(Object base, int/long offset), etc.
+ BasicType rtype = sig->return_type()->basic_type();
+ if (rtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::getAddress_name())
+ rtype = T_ADDRESS; // it is really a C void*
+ assert(rtype == type, "getter must return the expected value");
+ if (!is_native_ptr) {
+ assert(sig->count() == 2, "oop getter has 2 arguments");
+ assert(sig->type_at(0)->basic_type() == T_OBJECT, "getter base is object");
+ assert(sig->type_at(1)->basic_type() == T_LONG, "getter offset is correct");
+ } else {
+ assert(sig->count() == 1, "native getter has 1 argument");
+ assert(sig->type_at(0)->basic_type() == T_LONG, "getter base is long");
+ }
+ } else {
+ // void putObject(Object base, int/long offset, Object x), etc.
+ assert(sig->return_type()->basic_type() == T_VOID, "putter must not return a value");
+ if (!is_native_ptr) {
+ assert(sig->count() == 3, "oop putter has 3 arguments");
+ assert(sig->type_at(0)->basic_type() == T_OBJECT, "putter base is object");
+ assert(sig->type_at(1)->basic_type() == T_LONG, "putter offset is correct");
+ } else {
+ assert(sig->count() == 2, "native putter has 2 arguments");
+ assert(sig->type_at(0)->basic_type() == T_LONG, "putter base is long");
+ }
+ BasicType vtype = sig->type_at(sig->count()-1)->basic_type();
+ if (vtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::putAddress_name())
+ vtype = T_ADDRESS; // it is really a C void*
+ assert(vtype == type, "putter must accept the expected value");
+ }
+#endif // ASSERT
+ }
+#endif //PRODUCT
+
+ C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
+
+ int type_words = type2size[ (type == T_ADDRESS) ? T_LONG : type ];
+
+ // Argument words: "this" plus (oop/offset) or (lo/hi) args plus maybe 1 or 2 value words
+ int nargs = 1 + (is_native_ptr ? 2 : 3) + (is_store ? type_words : 0);
+
+ debug_only(int saved_sp = _sp);
+ _sp += nargs;
+
+ Node* val;
+ debug_only(val = (Node*)(uintptr_t)-1);
+
+
+ if (is_store) {
+ // Get the value being stored. (Pop it first; it was pushed last.)
+ switch (type) {
+ case T_DOUBLE:
+ case T_LONG:
+ case T_ADDRESS:
+ val = pop_pair();
+ break;
+ default:
+ val = pop();
+ }
+ }
+
+ // Build address expression. See the code in inline_unsafe_prefetch.
+ Node *adr;
+ Node *heap_base_oop = top();
+ if (!is_native_ptr) {
+ // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
+ Node* offset = pop_pair();
+ // The base is either a Java object or a value produced by Unsafe.staticFieldBase
+ Node* base = pop();
+ // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
+ // to be plain byte offsets, which are also the same as those accepted
+ // by oopDesc::field_base.
+ assert(Unsafe_field_offset_to_byte_offset(11) == 11,
+ "fieldOffset must be byte-scaled");
+ // 32-bit machines ignore the high half!
+ offset = ConvL2X(offset);
+ adr = make_unsafe_address(base, offset);
+ heap_base_oop = base;
+ } else {
+ Node* ptr = pop_pair();
+ // Adjust Java long to machine word:
+ ptr = ConvL2X(ptr);
+ adr = make_unsafe_address(NULL, ptr);
+ }
+
+ // Pop receiver last: it was pushed first.
+ Node *receiver = pop();
+
+ assert(saved_sp == _sp, "must have correct argument count");
+
+ const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
+
+ // First guess at the value type.
+ const Type *value_type = Type::get_const_basic_type(type);
+
+ // Try to categorize the address. If it comes up as TypeJavaPtr::BOTTOM,
+ // there was not enough information to nail it down.
+ Compile::AliasType* alias_type = C->alias_type(adr_type);
+ assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
+
+ // We will need memory barriers unless we can determine a unique
+ // alias category for this reference. (Note: If for some reason
+ // the barriers get omitted and the unsafe reference begins to "pollute"
+ // the alias analysis of the rest of the graph, either Compile::can_alias
+ // or Compile::must_alias will throw a diagnostic assert.)
+ bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);
+
+ if (!is_store && type == T_OBJECT) {
+ // Attempt to infer a sharper value type from the offset and base type.
+ ciKlass* sharpened_klass = NULL;
+
+ // See if it is an instance field, with an object type.
+ if (alias_type->field() != NULL) {
+ assert(!is_native_ptr, "native pointer op cannot use a java address");
+ if (alias_type->field()->type()->is_klass()) {
+ sharpened_klass = alias_type->field()->type()->as_klass();
+ }
+ }
+
+ // See if it is a narrow oop array.
+ if (adr_type->isa_aryptr()) {
+ if (adr_type->offset() >= objArrayOopDesc::header_size() * wordSize) {
+ const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
+ if (elem_type != NULL) {
+ sharpened_klass = elem_type->klass();
+ }
+ }
+ }
+
+ if (sharpened_klass != NULL) {
+ const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass);
+
+ // Sharpen the value type.
+ value_type = tjp;
+
+#ifndef PRODUCT
+ if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
+ tty->print(" from base type: "); adr_type->dump();
+ tty->print(" sharpened value: "); value_type->dump();
+ }
+#endif
+ }
+ }
+
+ // Null check on self without removing any arguments. The argument
+ // null check technically happens in the wrong place, which can lead to
+ // invalid stack traces when the primitive is inlined into a method
+ // which handles NullPointerExceptions.
+ _sp += nargs;
+ do_null_check(receiver, T_OBJECT);
+ _sp -= nargs;
+ if (stopped()) {
+ return true;
+ }
+ // Heap pointers get a null-check from the interpreter,
+ // as a courtesy. However, this is not guaranteed by Unsafe,
+ // and it is not possible to fully distinguish unintended nulls
+ // from intended ones in this API.
+
+ if (is_volatile) {
+ // We need to emit leading and trailing CPU membars (see below) in
+ // addition to memory membars when is_volatile. This is a little
+ // too strong, but avoids the need to insert per-alias-type
+ // volatile membars (for stores; compare Parse::do_put_xxx), which
+ // we cannot do effctively here because we probably only have a
+ // rough approximation of type.
+ need_mem_bar = true;
+ // For Stores, place a memory ordering barrier now.
+ if (is_store)
+ insert_mem_bar(Op_MemBarRelease);
+ }
+
+ // Memory barrier to prevent normal and 'unsafe' accesses from
+ // bypassing each other. Happens after null checks, so the
+ // exception paths do not take memory state from the memory barrier,
+ // so there's no problems making a strong assert about mixing users
+ // of safe & unsafe memory. Otherwise fails in a CTW of rt.jar
+ // around 5701, class sun/reflect/UnsafeBooleanFieldAccessorImpl.
+ if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
+
+ if (!is_store) {
+ Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile);
+ // load value and push onto stack
+ switch (type) {
+ case T_BOOLEAN:
+ case T_CHAR:
+ case T_BYTE:
+ case T_SHORT:
+ case T_INT:
+ case T_FLOAT:
+ case T_OBJECT:
+ push( p );
+ break;
+ case T_ADDRESS:
+ // Cast to an int type.
+ p = _gvn.transform( new (C, 2) CastP2XNode(NULL,p) );
+ p = ConvX2L(p);
+ push_pair(p);
+ break;
+ case T_DOUBLE:
+ case T_LONG:
+ push_pair( p );
+ break;
+ default: ShouldNotReachHere();
+ }
+ } else {
+ // place effect of store into memory
+ switch (type) {
+ case T_DOUBLE:
+ val = dstore_rounding(val);
+ break;
+ case T_ADDRESS:
+ // Repackage the long as a pointer.
+ val = ConvL2X(val);
+ val = _gvn.transform( new (C, 2) CastX2PNode(val) );
+ break;
+ }
+
+ if (type != T_OBJECT ) {
+ (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
+ } else {
+ // Possibly an oop being stored to Java heap or native memory
+ if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
+ // oop to Java heap.
+ (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
+ } else {
+
+ // We can't tell at compile time if we are storing in the Java heap or outside
+ // of it. So we need to emit code to conditionally do the proper type of
+ // store.
+
+ IdealKit kit(gvn(), control(), merged_memory());
+ kit.declares_done();
+ // QQQ who knows what probability is here??
+ kit.if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
+ (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
+ } kit.else_(); {
+ (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
+ } kit.end_if();
+ }
+ }
+ }
+
+ if (is_volatile) {
+ if (!is_store)
+ insert_mem_bar(Op_MemBarAcquire);
+ else
+ insert_mem_bar(Op_MemBarVolatile);
+ }
+
+ if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
+
+ return true;
+}
+
+//----------------------------inline_unsafe_prefetch----------------------------
+
+bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static) {
+#ifndef PRODUCT
+ {
+ ResourceMark rm;
+ // Check the signatures.
+ ciSignature* sig = signature();
+#ifdef ASSERT
+ // Object getObject(Object base, int/long offset), etc.
+ BasicType rtype = sig->return_type()->basic_type();
+ if (!is_native_ptr) {
+ assert(sig->count() == 2, "oop prefetch has 2 arguments");
+ assert(sig->type_at(0)->basic_type() == T_OBJECT, "prefetch base is object");
+ assert(sig->type_at(1)->basic_type() == T_LONG, "prefetcha offset is correct");
+ } else {
+ assert(sig->count() == 1, "native prefetch has 1 argument");
+ assert(sig->type_at(0)->basic_type() == T_LONG, "prefetch base is long");
+ }
+#endif // ASSERT
+ }
+#endif // !PRODUCT
+
+ C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
+
+ // Argument words: "this" if not static, plus (oop/offset) or (lo/hi) args
+ int nargs = (is_static ? 0 : 1) + (is_native_ptr ? 2 : 3);
+
+ debug_only(int saved_sp = _sp);
+ _sp += nargs;
+
+ // Build address expression. See the code in inline_unsafe_access.
+ Node *adr;
+ if (!is_native_ptr) {
+ // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
+ Node* offset = pop_pair();
+ // The base is either a Java object or a value produced by Unsafe.staticFieldBase
+ Node* base = pop();
+ // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
+ // to be plain byte offsets, which are also the same as those accepted
+ // by oopDesc::field_base.
+ assert(Unsafe_field_offset_to_byte_offset(11) == 11,
+ "fieldOffset must be byte-scaled");
+ // 32-bit machines ignore the high half!
+ offset = ConvL2X(offset);
+ adr = make_unsafe_address(base, offset);
+ } else {
+ Node* ptr = pop_pair();
+ // Adjust Java long to machine word:
+ ptr = ConvL2X(ptr);
+ adr = make_unsafe_address(NULL, ptr);
+ }
+
+ if (is_static) {
+ assert(saved_sp == _sp, "must have correct argument count");
+ } else {
+ // Pop receiver last: it was pushed first.
+ Node *receiver = pop();
+ assert(saved_sp == _sp, "must have correct argument count");
+
+ // Null check on self without removing any arguments. The argument
+ // null check technically happens in the wrong place, which can lead to
+ // invalid stack traces when the primitive is inlined into a method
+ // which handles NullPointerExceptions.
+ _sp += nargs;
+ do_null_check(receiver, T_OBJECT);
+ _sp -= nargs;
+ if (stopped()) {
+ return true;
+ }
+ }
+
+ // Generate the read or write prefetch
+ Node *prefetch;
+ if (is_store) {
+ prefetch = new (C, 3) PrefetchWriteNode(i_o(), adr);
+ } else {
+ prefetch = new (C, 3) PrefetchReadNode(i_o(), adr);
+ }
+ prefetch->init_req(0, control());
+ set_i_o(_gvn.transform(prefetch));
+
+ return true;
+}
+
+//----------------------------inline_unsafe_CAS----------------------------
+
+bool LibraryCallKit::inline_unsafe_CAS(BasicType type) {
+ // This basic scheme here is the same as inline_unsafe_access, but
+ // differs in enough details that combining them would make the code
+ // overly confusing. (This is a true fact! I originally combined
+ // them, but even I was confused by it!) As much code/comments as
+ // possible are retained from inline_unsafe_access though to make
+ // the correspondances clearer. - dl
+
+ if (callee()->is_static()) return false; // caller must have the capability!
+
+#ifndef PRODUCT
+ {
+ ResourceMark rm;
+ // Check the signatures.
+ ciSignature* sig = signature();
+#ifdef ASSERT
+ BasicType rtype = sig->return_type()->basic_type();
+ assert(rtype == T_BOOLEAN, "CAS must return boolean");
+ assert(sig->count() == 4, "CAS has 4 arguments");
+ assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
+ assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
+#endif // ASSERT
+ }
+#endif //PRODUCT
+
+ // number of stack slots per value argument (1 or 2)
+ int type_words = type2size[type];
+
+ // Cannot inline wide CAS on machines that don't support it natively
+ if (type2aelembytes[type] > BytesPerInt && !VM_Version::supports_cx8())
+ return false;
+
+ C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
+
+ // Argument words: "this" plus oop plus offset plus oldvalue plus newvalue;
+ int nargs = 1 + 1 + 2 + type_words + type_words;
+
+ // pop arguments: newval, oldval, offset, base, and receiver
+ debug_only(int saved_sp = _sp);
+ _sp += nargs;
+ Node* newval = (type_words == 1) ? pop() : pop_pair();
+ Node* oldval = (type_words == 1) ? pop() : pop_pair();
+ Node *offset = pop_pair();
+ Node *base = pop();
+ Node *receiver = pop();
+ assert(saved_sp == _sp, "must have correct argument count");
+
+ // Null check receiver.
+ _sp += nargs;
+ do_null_check(receiver, T_OBJECT);
+ _sp -= nargs;
+ if (stopped()) {
+ return true;
+ }
+
+ // Build field offset expression.
+ // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
+ // to be plain byte offsets, which are also the same as those accepted
+ // by oopDesc::field_base.
+ assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
+ // 32-bit machines ignore the high half of long offsets
+ offset = ConvL2X(offset);
+ Node* adr = make_unsafe_address(base, offset);
+ const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
+
+ // (Unlike inline_unsafe_access, there seems no point in trying
+ // to refine types. Just use the coarse types here.
+ const Type *value_type = Type::get_const_basic_type(type);
+ Compile::AliasType* alias_type = C->alias_type(adr_type);
+ assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
+ int alias_idx = C->get_alias_index(adr_type);
+
+ // Memory-model-wise, a CAS acts like a little synchronized block,
+ // so needs barriers on each side. These don't't translate into
+ // actual barriers on most machines, but we still need rest of
+ // compiler to respect ordering.
+
+ insert_mem_bar(Op_MemBarRelease);
+ insert_mem_bar(Op_MemBarCPUOrder);
+
+ // 4984716: MemBars must be inserted before this
+ // memory node in order to avoid a false
+ // dependency which will confuse the scheduler.
+ Node *mem = memory(alias_idx);
+
+ // For now, we handle only those cases that actually exist: ints,
+ // longs, and Object. Adding others should be straightforward.
+ Node* cas;
+ switch(type) {
+ case T_INT:
+ cas = _gvn.transform(new (C, 5) CompareAndSwapINode(control(), mem, adr, newval, oldval));
+ break;
+ case T_LONG:
+ cas = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
+ break;
+ case T_OBJECT:
+ // reference stores need a store barrier.
+ // (They don't if CAS fails, but it isn't worth checking.)
+ pre_barrier(control(), base, adr, alias_idx, newval, value_type, T_OBJECT);
+ cas = _gvn.transform(new (C, 5) CompareAndSwapPNode(control(), mem, adr, newval, oldval));
+ post_barrier(control(), cas, base, adr, alias_idx, newval, T_OBJECT, true);
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+
+ // SCMemProjNodes represent the memory state of CAS. Their main
+ // role is to prevent CAS nodes from being optimized away when their
+ // results aren't used.
+ Node* proj = _gvn.transform( new (C, 1) SCMemProjNode(cas));
+ set_memory(proj, alias_idx);
+
+ // Add the trailing membar surrounding the access
+ insert_mem_bar(Op_MemBarCPUOrder);
+ insert_mem_bar(Op_MemBarAcquire);
+
+ push(cas);
+ return true;
+}
+
+bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
+ // This is another variant of inline_unsafe_access, differing in
+ // that it always issues store-store ("release") barrier and ensures
+ // store-atomicity (which only matters for "long").
+
+ if (callee()->is_static()) return false; // caller must have the capability!
+
+#ifndef PRODUCT
+ {
+ ResourceMark rm;
+ // Check the signatures.
+ ciSignature* sig = signature();
+#ifdef ASSERT
+ BasicType rtype = sig->return_type()->basic_type();
+ assert(rtype == T_VOID, "must return void");
+ assert(sig->count() == 3, "has 3 arguments");
+ assert(sig->type_at(0)->basic_type() == T_OBJECT, "base is object");
+ assert(sig->type_at(1)->basic_type() == T_LONG, "offset is long");
+#endif // ASSERT
+ }
+#endif //PRODUCT
+
+ // number of stack slots per value argument (1 or 2)
+ int type_words = type2size[type];
+
+ C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
+
+ // Argument words: "this" plus oop plus offset plus value;
+ int nargs = 1 + 1 + 2 + type_words;
+
+ // pop arguments: val, offset, base, and receiver
+ debug_only(int saved_sp = _sp);
+ _sp += nargs;
+ Node* val = (type_words == 1) ? pop() : pop_pair();
+ Node *offset = pop_pair();
+ Node *base = pop();
+ Node *receiver = pop();
+ assert(saved_sp == _sp, "must have correct argument count");
+
+ // Null check receiver.
+ _sp += nargs;
+ do_null_check(receiver, T_OBJECT);
+ _sp -= nargs;
+ if (stopped()) {
+ return true;
+ }
+
+ // Build field offset expression.
+ assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
+ // 32-bit machines ignore the high half of long offsets
+ offset = ConvL2X(offset);
+ Node* adr = make_unsafe_address(base, offset);
+ const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
+ const Type *value_type = Type::get_const_basic_type(type);
+ Compile::AliasType* alias_type = C->alias_type(adr_type);
+
+ insert_mem_bar(Op_MemBarRelease);
+ insert_mem_bar(Op_MemBarCPUOrder);
+ // Ensure that the store is atomic for longs:
+ bool require_atomic_access = true;
+ Node* store;
+ if (type == T_OBJECT) // reference stores need a store barrier.
+ store = store_oop_to_unknown(control(), base, adr, adr_type, val, value_type, type);
+ else {
+ store = store_to_memory(control(), adr, val, type, adr_type, require_atomic_access);
+ }
+ insert_mem_bar(Op_MemBarCPUOrder);
+ return true;
+}
+
+bool LibraryCallKit::inline_unsafe_allocate() {
+ if (callee()->is_static()) return false; // caller must have the capability!
+ int nargs = 1 + 1;
+ assert(signature()->size() == nargs-1, "alloc has 1 argument");
+ null_check_receiver(callee()); // check then ignore argument(0)
+ _sp += nargs; // set original stack for use by uncommon_trap
+ Node* cls = do_null_check(argument(1), T_OBJECT);
+ _sp -= nargs;
+ if (stopped()) return true;
+
+ Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0);
+ _sp += nargs; // set original stack for use by uncommon_trap
+ kls = do_null_check(kls, T_OBJECT);
+ _sp -= nargs;
+ if (stopped()) return true; // argument was like int.class
+
+ // Note: The argument might still be an illegal value like
+ // Serializable.class or Object[].class. The runtime will handle it.
+ // But we must make an explicit check for initialization.
+ Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
+ Node* inst = make_load(NULL, insp, TypeInt::INT, T_INT);
+ Node* bits = intcon(instanceKlass::fully_initialized);
+ Node* test = _gvn.transform( new (C, 3) SubINode(inst, bits) );
+ // The 'test' is non-zero if we need to take a slow path.
+
+ Node* obj = new_instance(kls, test);
+ push(obj);
+
+ return true;
+}
+
+//------------------------inline_native_time_funcs--------------
+// inline code for System.currentTimeMillis() and System.nanoTime()
+// these have the same type and signature
+bool LibraryCallKit::inline_native_time_funcs(bool isNano) {
+ address funcAddr = isNano ? CAST_FROM_FN_PTR(address, os::javaTimeNanos) :
+ CAST_FROM_FN_PTR(address, os::javaTimeMillis);
+ const char * funcName = isNano ? "nanoTime" : "currentTimeMillis";
+ const TypeFunc *tf = OptoRuntime::current_time_millis_Type();
+ const TypePtr* no_memory_effects = NULL;
+ Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects);
+ Node* value = _gvn.transform(new (C, 1) ProjNode(time, TypeFunc::Parms+0));
+#ifdef ASSERT
+ Node* value_top = _gvn.transform(new (C, 1) ProjNode(time, TypeFunc::Parms + 1));
+ assert(value_top == top(), "second value must be top");
+#endif
+ push_pair(value);
+ return true;
+}
+
+//------------------------inline_native_currentThread------------------
+bool LibraryCallKit::inline_native_currentThread() {
+ Node* junk = NULL;
+ push(generate_current_thread(junk));
+ return true;
+}
+
+//------------------------inline_native_isInterrupted------------------
+bool LibraryCallKit::inline_native_isInterrupted() {
+ const int nargs = 1+1; // receiver + boolean
+ assert(nargs == arg_size(), "sanity");
+ // Add a fast path to t.isInterrupted(clear_int):
+ // (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int))
+ // ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
+ // So, in the common case that the interrupt bit is false,
+ // we avoid making a call into the VM. Even if the interrupt bit
+ // is true, if the clear_int argument is false, we avoid the VM call.
+ // However, if the receiver is not currentThread, we must call the VM,
+ // because there must be some locking done around the operation.
+
+ // We only go to the fast case code if we pass two guards.
+ // Paths which do not pass are accumulated in the slow_region.
+ RegionNode* slow_region = new (C, 1) RegionNode(1);
+ record_for_igvn(slow_region);
+ RegionNode* result_rgn = new (C, 4) RegionNode(1+3); // fast1, fast2, slow
+ PhiNode* result_val = new (C, 4) PhiNode(result_rgn, TypeInt::BOOL);
+ enum { no_int_result_path = 1,
+ no_clear_result_path = 2,
+ slow_result_path = 3
+ };
+
+ // (a) Receiving thread must be the current thread.
+ Node* rec_thr = argument(0);
+ Node* tls_ptr = NULL;
+ Node* cur_thr = generate_current_thread(tls_ptr);
+ Node* cmp_thr = _gvn.transform( new (C, 3) CmpPNode(cur_thr, rec_thr) );
+ Node* bol_thr = _gvn.transform( new (C, 2) BoolNode(cmp_thr, BoolTest::ne) );
+
+ bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
+ if (!known_current_thread)
+ generate_slow_guard(bol_thr, slow_region);
+
+ // (b) Interrupt bit on TLS must be false.
+ Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
+ Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
+ p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
+ Node* int_bit = make_load(NULL, p, TypeInt::BOOL, T_INT);
+ Node* cmp_bit = _gvn.transform( new (C, 3) CmpINode(int_bit, intcon(0)) );
+ Node* bol_bit = _gvn.transform( new (C, 2) BoolNode(cmp_bit, BoolTest::ne) );
+
+ IfNode* iff_bit = create_and_map_if(control(), bol_bit, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
+
+ // First fast path: if (!TLS._interrupted) return false;
+ Node* false_bit = _gvn.transform( new (C, 1) IfFalseNode(iff_bit) );
+ result_rgn->init_req(no_int_result_path, false_bit);
+ result_val->init_req(no_int_result_path, intcon(0));
+
+ // drop through to next case
+ set_control( _gvn.transform(new (C, 1) IfTrueNode(iff_bit)) );
+
+ // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
+ Node* clr_arg = argument(1);
+ Node* cmp_arg = _gvn.transform( new (C, 3) CmpINode(clr_arg, intcon(0)) );
+ Node* bol_arg = _gvn.transform( new (C, 2) BoolNode(cmp_arg, BoolTest::ne) );
+ IfNode* iff_arg = create_and_map_if(control(), bol_arg, PROB_FAIR, COUNT_UNKNOWN);
+
+ // Second fast path: ... else if (!clear_int) return true;
+ Node* false_arg = _gvn.transform( new (C, 1) IfFalseNode(iff_arg) );
+ result_rgn->init_req(no_clear_result_path, false_arg);
+ result_val->init_req(no_clear_result_path, intcon(1));
+
+ // drop through to next case
+ set_control( _gvn.transform(new (C, 1) IfTrueNode(iff_arg)) );
+
+ // (d) Otherwise, go to the slow path.
+ slow_region->add_req(control());
+ set_control( _gvn.transform(slow_region) );
+
+ if (stopped()) {
+ // There is no slow path.
+ result_rgn->init_req(slow_result_path, top());
+ result_val->init_req(slow_result_path, top());
+ } else {
+ // non-virtual because it is a private non-static
+ CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_isInterrupted);
+
+ Node* slow_val = set_results_for_java_call(slow_call);
+ // this->control() comes from set_results_for_java_call
+
+ // If we know that the result of the slow call will be true, tell the optimizer!
+ if (known_current_thread) slow_val = intcon(1);
+
+ Node* fast_io = slow_call->in(TypeFunc::I_O);
+ Node* fast_mem = slow_call->in(TypeFunc::Memory);
+ // These two phis are pre-filled with copies of of the fast IO and Memory
+ Node* io_phi = PhiNode::make(result_rgn, fast_io, Type::ABIO);
+ Node* mem_phi = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+
+ result_rgn->init_req(slow_result_path, control());
+ io_phi ->init_req(slow_result_path, i_o());
+ mem_phi ->init_req(slow_result_path, reset_memory());
+ result_val->init_req(slow_result_path, slow_val);
+
+ set_all_memory( _gvn.transform(mem_phi) );
+ set_i_o( _gvn.transform(io_phi) );
+ }
+
+ push_result(result_rgn, result_val);
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ return true;
+}
+
+//---------------------------load_mirror_from_klass----------------------------
+// Given a klass oop, load its java mirror (a java.lang.Class oop).
+Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
+ Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc));
+ return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT);
+}
+
+//-----------------------load_klass_from_mirror_common-------------------------
+// Given a java mirror (a java.lang.Class oop), load its corresponding klass oop.
+// Test the klass oop for null (signifying a primitive Class like Integer.TYPE),
+// and branch to the given path on the region.
+// If never_see_null, take an uncommon trap on null, so we can optimistically
+// compile for the non-null case.
+// If the region is NULL, force never_see_null = true.
+Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror,
+ bool never_see_null,
+ int nargs,
+ RegionNode* region,
+ int null_path,
+ int offset) {
+ if (region == NULL) never_see_null = true;
+ Node* p = basic_plus_adr(mirror, offset);
+ const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL;
+ Node* kls = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type));
+ _sp += nargs; // any deopt will start just before call to enclosing method
+ Node* null_ctl = top();
+ kls = null_check_oop(kls, &null_ctl, never_see_null);
+ if (region != NULL) {
+ // Set region->in(null_path) if the mirror is a primitive (e.g, int.class).
+ region->init_req(null_path, null_ctl);
+ } else {
+ assert(null_ctl == top(), "no loose ends");
+ }
+ _sp -= nargs;
+ return kls;
+}
+
+//--------------------(inline_native_Class_query helpers)---------------------
+// Use this for JVM_ACC_INTERFACE, JVM_ACC_IS_CLONEABLE, JVM_ACC_HAS_FINALIZER.
+// Fall through if (mods & mask) == bits, take the guard otherwise.
+Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) {
+ // Branch around if the given klass has the given modifier bit set.
+ // Like generate_guard, adds a new path onto the region.
+ Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+ Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
+ Node* mask = intcon(modifier_mask);
+ Node* bits = intcon(modifier_bits);
+ Node* mbit = _gvn.transform( new (C, 3) AndINode(mods, mask) );
+ Node* cmp = _gvn.transform( new (C, 3) CmpINode(mbit, bits) );
+ Node* bol = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
+ return generate_fair_guard(bol, region);
+}
+Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) {
+ return generate_access_flags_guard(kls, JVM_ACC_INTERFACE, 0, region);
+}
+
+//-------------------------inline_native_Class_query-------------------
+bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
+ int nargs = 1+0; // just the Class mirror, in most cases
+ const Type* return_type = TypeInt::BOOL;
+ Node* prim_return_value = top(); // what happens if it's a primitive class?
+ bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
+ bool expect_prim = false; // most of these guys expect to work on refs
+
+ enum { _normal_path = 1, _prim_path = 2, PATH_LIMIT };
+
+ switch (id) {
+ case vmIntrinsics::_isInstance:
+ nargs = 1+1; // the Class mirror, plus the object getting queried about
+ // nothing is an instance of a primitive type
+ prim_return_value = intcon(0);
+ break;
+ case vmIntrinsics::_getModifiers:
+ prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC);
+ assert(is_power_of_2((int)JVM_ACC_WRITTEN_FLAGS+1), "change next line");
+ return_type = TypeInt::make(0, JVM_ACC_WRITTEN_FLAGS, Type::WidenMin);
+ break;
+ case vmIntrinsics::_isInterface:
+ prim_return_value = intcon(0);
+ break;
+ case vmIntrinsics::_isArray:
+ prim_return_value = intcon(0);
+ expect_prim = true; // cf. ObjectStreamClass.getClassSignature
+ break;
+ case vmIntrinsics::_isPrimitive:
+ prim_return_value = intcon(1);
+ expect_prim = true; // obviously
+ break;
+ case vmIntrinsics::_getSuperclass:
+ prim_return_value = null();
+ return_type = TypeInstPtr::MIRROR->cast_to_ptr_type(TypePtr::BotPTR);
+ break;
+ case vmIntrinsics::_getComponentType:
+ prim_return_value = null();
+ return_type = TypeInstPtr::MIRROR->cast_to_ptr_type(TypePtr::BotPTR);
+ break;
+ case vmIntrinsics::_getClassAccessFlags:
+ prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC);
+ return_type = TypeInt::INT; // not bool! 6297094
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+
+ Node* mirror = argument(0);
+ Node* obj = (nargs <= 1)? top(): argument(1);
+
+ const TypeInstPtr* mirror_con = _gvn.type(mirror)->isa_instptr();
+ if (mirror_con == NULL) return false; // cannot happen?
+
+#ifndef PRODUCT
+ if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
+ ciType* k = mirror_con->java_mirror_type();
+ if (k) {
+ tty->print("Inlining %s on constant Class ", vmIntrinsics::name_at(intrinsic_id()));
+ k->print_name();
+ tty->cr();
+ }
+ }
+#endif
+
+ // Null-check the mirror, and the mirror's klass ptr (in case it is a primitive).
+ RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ record_for_igvn(region);
+ PhiNode* phi = new (C, PATH_LIMIT) PhiNode(region, return_type);
+
+ // The mirror will never be null of Reflection.getClassAccessFlags, however
+ // it may be null for Class.isInstance or Class.getModifiers. Throw a NPE
+ // if it is. See bug 4774291.
+
+ // For Reflection.getClassAccessFlags(), the null check occurs in
+ // the wrong place; see inline_unsafe_access(), above, for a similar
+ // situation.
+ _sp += nargs; // set original stack for use by uncommon_trap
+ mirror = do_null_check(mirror, T_OBJECT);
+ _sp -= nargs;
+ // If mirror or obj is dead, only null-path is taken.
+ if (stopped()) return true;
+
+ if (expect_prim) never_see_null = false; // expect nulls (meaning prims)
+
+ // Now load the mirror's klass metaobject, and null-check it.
+ // Side-effects region with the control path if the klass is null.
+ Node* kls = load_klass_from_mirror(mirror, never_see_null, nargs,
+ region, _prim_path);
+ // If kls is null, we have a primitive mirror.
+ phi->init_req(_prim_path, prim_return_value);
+ if (stopped()) { push_result(region, phi); return true; }
+
+ Node* p; // handy temp
+ Node* null_ctl;
+
+ // Now that we have the non-null klass, we can perform the real query.
+ // For constant classes, the query will constant-fold in LoadNode::Value.
+ Node* query_value = top();
+ switch (id) {
+ case vmIntrinsics::_isInstance:
+ // nothing is an instance of a primitive type
+ query_value = gen_instanceof(obj, kls);
+ break;
+
+ case vmIntrinsics::_getModifiers:
+ p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc));
+ query_value = make_load(NULL, p, TypeInt::INT, T_INT);
+ break;
+
+ case vmIntrinsics::_isInterface:
+ // (To verify this code sequence, check the asserts in JVM_IsInterface.)
+ if (generate_interface_guard(kls, region) != NULL)
+ // A guard was added. If the guard is taken, it was an interface.
+ phi->add_req(intcon(1));
+ // If we fall through, it's a plain class.
+ query_value = intcon(0);
+ break;
+
+ case vmIntrinsics::_isArray:
+ // (To verify this code sequence, check the asserts in JVM_IsArrayClass.)
+ if (generate_array_guard(kls, region) != NULL)
+ // A guard was added. If the guard is taken, it was an array.
+ phi->add_req(intcon(1));
+ // If we fall through, it's a plain class.
+ query_value = intcon(0);
+ break;
+
+ case vmIntrinsics::_isPrimitive:
+ query_value = intcon(0); // "normal" path produces false
+ break;
+
+ case vmIntrinsics::_getSuperclass:
+ // The rules here are somewhat unfortunate, but we can still do better
+ // with random logic than with a JNI call.
+ // Interfaces store null or Object as _super, but must report null.
+ // Arrays store an intermediate super as _super, but must report Object.
+ // Other types can report the actual _super.
+ // (To verify this code sequence, check the asserts in JVM_IsInterface.)
+ if (generate_interface_guard(kls, region) != NULL)
+ // A guard was added. If the guard is taken, it was an interface.
+ phi->add_req(null());
+ if (generate_array_guard(kls, region) != NULL)
+ // A guard was added. If the guard is taken, it was an array.
+ phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
+ // If we fall through, it's a plain class. Get its _super.
+ p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc));
+ kls = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL));
+ null_ctl = top();
+ kls = null_check_oop(kls, &null_ctl);
+ if (null_ctl != top()) {
+ // If the guard is taken, Object.superClass is null (both klass and mirror).
+ region->add_req(null_ctl);
+ phi ->add_req(null());
+ }
+ if (!stopped()) {
+ query_value = load_mirror_from_klass(kls);
+ }
+ break;
+
+ case vmIntrinsics::_getComponentType:
+ if (generate_array_guard(kls, region) != NULL) {
+ // Be sure to pin the oop load to the guard edge just created:
+ Node* is_array_ctrl = region->in(region->req()-1);
+ Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc));
+ Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT);
+ phi->add_req(cmo);
+ }
+ query_value = null(); // non-array case is null
+ break;
+
+ case vmIntrinsics::_getClassAccessFlags:
+ p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+ query_value = make_load(NULL, p, TypeInt::INT, T_INT);
+ break;
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ // Fall-through is the normal case of a query to a real class.
+ phi->init_req(1, query_value);
+ region->init_req(1, control());
+
+ push_result(region, phi);
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ return true;
+}
+
+//--------------------------inline_native_subtype_check------------------------
+// This intrinsic takes the JNI calls out of the heart of
+// UnsafeFieldAccessorImpl.set, which improves Field.set, readObject, etc.
+bool LibraryCallKit::inline_native_subtype_check() {
+ int nargs = 1+1; // the Class mirror, plus the other class getting examined
+
+ // Pull both arguments off the stack.
+ Node* args[2]; // two java.lang.Class mirrors: superc, subc
+ args[0] = argument(0);
+ args[1] = argument(1);
+ Node* klasses[2]; // corresponding Klasses: superk, subk
+ klasses[0] = klasses[1] = top();
+
+ enum {
+ // A full decision tree on {superc is prim, subc is prim}:
+ _prim_0_path = 1, // {P,N} => false
+ // {P,P} & superc!=subc => false
+ _prim_same_path, // {P,P} & superc==subc => true
+ _prim_1_path, // {N,P} => false
+ _ref_subtype_path, // {N,N} & subtype check wins => true
+ _both_ref_path, // {N,N} & subtype check loses => false
+ PATH_LIMIT
+ };
+
+ RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ Node* phi = new (C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL);
+ record_for_igvn(region);
+
+ const TypePtr* adr_type = TypeRawPtr::BOTTOM; // memory type of loads
+ const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL;
+ int class_klass_offset = java_lang_Class::klass_offset_in_bytes();
+
+ // First null-check both mirrors and load each mirror's klass metaobject.
+ int which_arg;
+ for (which_arg = 0; which_arg <= 1; which_arg++) {
+ Node* arg = args[which_arg];
+ _sp += nargs; // set original stack for use by uncommon_trap
+ arg = do_null_check(arg, T_OBJECT);
+ _sp -= nargs;
+ if (stopped()) break;
+ args[which_arg] = _gvn.transform(arg);
+
+ Node* p = basic_plus_adr(arg, class_klass_offset);
+ Node* kls = new (C, 3) LoadKlassNode(0, immutable_memory(), p, adr_type, kls_type);
+ klasses[which_arg] = _gvn.transform(kls);
+ }
+
+ // Having loaded both klasses, test each for null.
+ bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
+ for (which_arg = 0; which_arg <= 1; which_arg++) {
+ Node* kls = klasses[which_arg];
+ Node* null_ctl = top();
+ _sp += nargs; // set original stack for use by uncommon_trap
+ kls = null_check_oop(kls, &null_ctl, never_see_null);
+ _sp -= nargs;
+ int prim_path = (which_arg == 0 ? _prim_0_path : _prim_1_path);
+ region->init_req(prim_path, null_ctl);
+ if (stopped()) break;
+ klasses[which_arg] = kls;
+ }
+
+ if (!stopped()) {
+ // now we have two reference types, in klasses[0..1]
+ Node* subk = klasses[1]; // the argument to isAssignableFrom
+ Node* superk = klasses[0]; // the receiver
+ region->set_req(_both_ref_path, gen_subtype_check(subk, superk));
+ // now we have a successful reference subtype check
+ region->set_req(_ref_subtype_path, control());
+ }
+
+ // If both operands are primitive (both klasses null), then
+ // we must return true when they are identical primitives.
+ // It is convenient to test this after the first null klass check.
+ set_control(region->in(_prim_0_path)); // go back to first null check
+ if (!stopped()) {
+ // Since superc is primitive, make a guard for the superc==subc case.
+ Node* cmp_eq = _gvn.transform( new (C, 3) CmpPNode(args[0], args[1]) );
+ Node* bol_eq = _gvn.transform( new (C, 2) BoolNode(cmp_eq, BoolTest::eq) );
+ generate_guard(bol_eq, region, PROB_FAIR);
+ if (region->req() == PATH_LIMIT+1) {
+ // A guard was added. If the added guard is taken, superc==subc.
+ region->swap_edges(PATH_LIMIT, _prim_same_path);
+ region->del_req(PATH_LIMIT);
+ }
+ region->set_req(_prim_0_path, control()); // Not equal after all.
+ }
+
+ // these are the only paths that produce 'true':
+ phi->set_req(_prim_same_path, intcon(1));
+ phi->set_req(_ref_subtype_path, intcon(1));
+
+ // pull together the cases:
+ assert(region->req() == PATH_LIMIT, "sane region");
+ for (uint i = 1; i < region->req(); i++) {
+ Node* ctl = region->in(i);
+ if (ctl == NULL || ctl == top()) {
+ region->set_req(i, top());
+ phi ->set_req(i, top());
+ } else if (phi->in(i) == NULL) {
+ phi->set_req(i, intcon(0)); // all other paths produce 'false'
+ }
+ }
+
+ set_control(_gvn.transform(region));
+ push(_gvn.transform(phi));
+
+ return true;
+}
+
+//---------------------generate_array_guard_common------------------------
+Node* LibraryCallKit::generate_array_guard_common(Node* kls, RegionNode* region,
+ bool obj_array, bool not_array) {
+ // If obj_array/non_array==false/false:
+ // Branch around if the given klass is in fact an array (either obj or prim).
+ // If obj_array/non_array==false/true:
+ // Branch around if the given klass is not an array klass of any kind.
+ // If obj_array/non_array==true/true:
+ // Branch around if the kls is not an oop array (kls is int[], String, etc.)
+ // If obj_array/non_array==true/false:
+ // Branch around if the kls is an oop array (Object[] or subtype)
+ //
+ // Like generate_guard, adds a new path onto the region.
+ jint layout_con = 0;
+ Node* layout_val = get_layout_helper(kls, layout_con);
+ if (layout_val == NULL) {
+ bool query = (obj_array
+ ? Klass::layout_helper_is_objArray(layout_con)
+ : Klass::layout_helper_is_javaArray(layout_con));
+ if (query == not_array) {
+ return NULL; // never a branch
+ } else { // always a branch
+ Node* always_branch = control();
+ if (region != NULL)
+ region->add_req(always_branch);
+ set_control(top());
+ return always_branch;
+ }
+ }
+ // Now test the correct condition.
+ jint nval = (obj_array
+ ? ((jint)Klass::_lh_array_tag_type_value
+ << Klass::_lh_array_tag_shift)
+ : Klass::_lh_neutral_value);
+ Node* cmp = _gvn.transform( new(C, 3) CmpINode(layout_val, intcon(nval)) );
+ BoolTest::mask btest = BoolTest::lt; // correct for testing is_[obj]array
+ // invert the test if we are looking for a non-array
+ if (not_array) btest = BoolTest(btest).negate();
+ Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, btest) );
+ return generate_fair_guard(bol, region);
+}
+
+
+//-----------------------inline_native_newArray--------------------------
+bool LibraryCallKit::inline_native_newArray() {
+ int nargs = 2;
+ Node* mirror = argument(0);
+ Node* count_val = argument(1);
+
+ _sp += nargs; // set original stack for use by uncommon_trap
+ mirror = do_null_check(mirror, T_OBJECT);
+ _sp -= nargs;
+
+ enum { _normal_path = 1, _slow_path = 2, PATH_LIMIT };
+ RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ PhiNode* result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
+ TypeInstPtr::NOTNULL);
+ PhiNode* result_io = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
+ PhiNode* result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
+ TypePtr::BOTTOM);
+
+ bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
+ Node* klass_node = load_array_klass_from_mirror(mirror, never_see_null,
+ nargs,
+ result_reg, _slow_path);
+ Node* normal_ctl = control();
+ Node* no_array_ctl = result_reg->in(_slow_path);
+
+ // Generate code for the slow case. We make a call to newArray().
+ set_control(no_array_ctl);
+ if (!stopped()) {
+ // Either the input type is void.class, or else the
+ // array klass has not yet been cached. Either the
+ // ensuing call will throw an exception, or else it
+ // will cache the array klass for next time.
+ PreserveJVMState pjvms(this);
+ CallJavaNode* slow_call = generate_method_call_static(vmIntrinsics::_newArray);
+ Node* slow_result = set_results_for_java_call(slow_call);
+ // this->control() comes from set_results_for_java_call
+ result_reg->set_req(_slow_path, control());
+ result_val->set_req(_slow_path, slow_result);
+ result_io ->set_req(_slow_path, i_o());
+ result_mem->set_req(_slow_path, reset_memory());
+ }
+
+ set_control(normal_ctl);
+ if (!stopped()) {
+ // Normal case: The array type has been cached in the java.lang.Class.
+ // The following call works fine even if the array type is polymorphic.
+ // It could be a dynamic mix of int[], boolean[], Object[], etc.
+ _sp += nargs; // set original stack for use by uncommon_trap
+ Node* obj = new_array(klass_node, count_val);
+ _sp -= nargs;
+ result_reg->init_req(_normal_path, control());
+ result_val->init_req(_normal_path, obj);
+ result_io ->init_req(_normal_path, i_o());
+ result_mem->init_req(_normal_path, reset_memory());
+ }
+
+ // Return the combined state.
+ set_i_o( _gvn.transform(result_io) );
+ set_all_memory( _gvn.transform(result_mem) );
+ push_result(result_reg, result_val);
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ return true;
+}
+
+//----------------------inline_native_getLength--------------------------
+bool LibraryCallKit::inline_native_getLength() {
+ if (too_many_traps(Deoptimization::Reason_intrinsic)) return false;
+
+ int nargs = 1;
+ Node* array = argument(0);
+
+ _sp += nargs; // set original stack for use by uncommon_trap
+ array = do_null_check(array, T_OBJECT);
+ _sp -= nargs;
+
+ // If array is dead, only null-path is taken.
+ if (stopped()) return true;
+
+ // Deoptimize if it is a non-array.
+ Node* non_array = generate_non_array_guard(load_object_klass(array), NULL);
+
+ if (non_array != NULL) {
+ PreserveJVMState pjvms(this);
+ set_control(non_array);
+ _sp += nargs; // push the arguments back on the stack
+ uncommon_trap(Deoptimization::Reason_intrinsic,
+ Deoptimization::Action_maybe_recompile);
+ }
+
+ // If control is dead, only non-array-path is taken.
+ if (stopped()) return true;
+
+ // The works fine even if the array type is polymorphic.
+ // It could be a dynamic mix of int[], boolean[], Object[], etc.
+ push( load_array_length(array) );
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ return true;
+}
+
+//------------------------inline_array_copyOf----------------------------
+bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) {
+ if (too_many_traps(Deoptimization::Reason_intrinsic)) return false;
+
+ // Restore the stack and pop off the arguments.
+ int nargs = 3 + (is_copyOfRange? 1: 0);
+ Node* original = argument(0);
+ Node* start = is_copyOfRange? argument(1): intcon(0);
+ Node* end = is_copyOfRange? argument(2): argument(1);
+ Node* array_type_mirror = is_copyOfRange? argument(3): argument(2);
+
+ _sp += nargs; // set original stack for use by uncommon_trap
+ array_type_mirror = do_null_check(array_type_mirror, T_OBJECT);
+ original = do_null_check(original, T_OBJECT);
+ _sp -= nargs;
+
+ // Check if a null path was taken unconditionally.
+ if (stopped()) return true;
+
+ Node* orig_length = load_array_length(original);
+
+ Node* klass_node = load_klass_from_mirror(array_type_mirror, false, nargs,
+ NULL, 0);
+ _sp += nargs; // set original stack for use by uncommon_trap
+ klass_node = do_null_check(klass_node, T_OBJECT);
+ _sp -= nargs;
+
+ RegionNode* bailout = new (C, 1) RegionNode(1);
+ record_for_igvn(bailout);
+
+ // Despite the generic type of Arrays.copyOf, the mirror might be int, int[], etc.
+ // Bail out if that is so.
+ Node* not_objArray = generate_non_objArray_guard(klass_node, bailout);
+ if (not_objArray != NULL) {
+ // Improve the klass node's type from the new optimistic assumption:
+ ciKlass* ak = ciArrayKlass::make(env()->Object_klass());
+ const Type* akls = TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
+ Node* cast = new (C, 2) CastPPNode(klass_node, akls);
+ cast->init_req(0, control());
+ klass_node = _gvn.transform(cast);
+ }
+
+ // Bail out if either start or end is negative.
+ generate_negative_guard(start, bailout, &start);
+ generate_negative_guard(end, bailout, &end);
+
+ Node* length = end;
+ if (_gvn.type(start) != TypeInt::ZERO) {
+ length = _gvn.transform( new (C, 3) SubINode(end, start) );
+ }
+
+ // Bail out if length is negative.
+ // ...Not needed, since the new_array will throw the right exception.
+ //generate_negative_guard(length, bailout, &length);
+
+ if (bailout->req() > 1) {
+ PreserveJVMState pjvms(this);
+ set_control( _gvn.transform(bailout) );
+ _sp += nargs; // push the arguments back on the stack
+ uncommon_trap(Deoptimization::Reason_intrinsic,
+ Deoptimization::Action_maybe_recompile);
+ }
+
+ if (!stopped()) {
+ // How many elements will we copy from the original?
+ // The answer is MinI(orig_length - start, length).
+ Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
+ Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
+
+ _sp += nargs; // set original stack for use by uncommon_trap
+ Node* newcopy = new_array(klass_node, length);
+ _sp -= nargs;
+
+ // Generate a direct call to the right arraycopy function(s).
+ // We know the copy is disjoint but we might not know if the
+ // oop stores need checking.
+ // Extreme case: Arrays.copyOf((Integer[])x, 10, String[].class).
+ // This will fail a store-check if x contains any non-nulls.
+ bool disjoint_bases = true;
+ bool length_never_negative = true;
+ generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
+ original, start, newcopy, intcon(0), moved,
+ nargs, disjoint_bases, length_never_negative);
+
+ push(newcopy);
+ }
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ return true;
+}
+
+
+//----------------------generate_virtual_guard---------------------------
+// Helper for hashCode and clone. Peeks inside the vtable to avoid a call.
+Node* LibraryCallKit::generate_virtual_guard(Node* obj_klass,
+ RegionNode* slow_region) {
+ ciMethod* method = callee();
+ int vtable_index = method->vtable_index();
+ // Get the methodOop out of the appropriate vtable entry.
+ int entry_offset = (instanceKlass::vtable_start_offset() +
+ vtable_index*vtableEntry::size()) * wordSize +
+ vtableEntry::method_offset_in_bytes();
+ Node* entry_addr = basic_plus_adr(obj_klass, entry_offset);
+ Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT);
+
+ // Compare the target method with the expected method (e.g., Object.hashCode).
+ const TypeInstPtr* native_call_addr = TypeInstPtr::make(method);
+
+ Node* native_call = makecon(native_call_addr);
+ Node* chk_native = _gvn.transform( new(C, 3) CmpPNode(target_call, native_call) );
+ Node* test_native = _gvn.transform( new(C, 2) BoolNode(chk_native, BoolTest::ne) );
+
+ return generate_slow_guard(test_native, slow_region);
+}
+
+//-----------------------generate_method_call----------------------------
+// Use generate_method_call to make a slow-call to the real
+// method if the fast path fails. An alternative would be to
+// use a stub like OptoRuntime::slow_arraycopy_Java.
+// This only works for expanding the current library call,
+// not another intrinsic. (E.g., don't use this for making an
+// arraycopy call inside of the copyOf intrinsic.)
+CallJavaNode*
+LibraryCallKit::generate_method_call(vmIntrinsics::ID method_id, bool is_virtual, bool is_static) {
+ // When compiling the intrinsic method itself, do not use this technique.
+ guarantee(callee() != C->method(), "cannot make slow-call to self");
+
+ ciMethod* method = callee();
+ // ensure the JVMS we have will be correct for this call
+ guarantee(method_id == method->intrinsic_id(), "must match");
+
+ const TypeFunc* tf = TypeFunc::make(method);
+ int tfdc = tf->domain()->cnt();
+ CallJavaNode* slow_call;
+ if (is_static) {
+ assert(!is_virtual, "");
+ slow_call = new(C, tfdc) CallStaticJavaNode(tf,
+ SharedRuntime::get_resolve_static_call_stub(),
+ method, bci());
+ } else if (is_virtual) {
+ null_check_receiver(method);
+ int vtable_index = methodOopDesc::invalid_vtable_index;
+ if (UseInlineCaches) {
+ // Suppress the vtable call
+ } else {
+ // hashCode and clone are not a miranda methods,
+ // so the vtable index is fixed.
+ // No need to use the linkResolver to get it.
+ vtable_index = method->vtable_index();
+ }
+ slow_call = new(C, tfdc) CallDynamicJavaNode(tf,
+ SharedRuntime::get_resolve_virtual_call_stub(),
+ method, vtable_index, bci());
+ } else { // neither virtual nor static: opt_virtual
+ null_check_receiver(method);
+ slow_call = new(C, tfdc) CallStaticJavaNode(tf,
+ SharedRuntime::get_resolve_opt_virtual_call_stub(),
+ method, bci());
+ slow_call->set_optimized_virtual(true);
+ }
+ set_arguments_for_java_call(slow_call);
+ set_edges_for_java_call(slow_call);
+ return slow_call;
+}
+
+
+//------------------------------inline_native_hashcode--------------------
+// Build special case code for calls to hashCode on an object.
+bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) {
+ assert(is_static == callee()->is_static(), "correct intrinsic selection");
+ assert(!(is_virtual && is_static), "either virtual, special, or static");
+
+ enum { _slow_path = 1, _fast_path, _null_path, PATH_LIMIT };
+
+ RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ PhiNode* result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
+ TypeInt::INT);
+ PhiNode* result_io = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
+ PhiNode* result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
+ TypePtr::BOTTOM);
+ Node* obj = NULL;
+ if (!is_static) {
+ // Check for hashing null object
+ obj = null_check_receiver(callee());
+ if (stopped()) return true; // unconditionally null
+ result_reg->init_req(_null_path, top());
+ result_val->init_req(_null_path, top());
+ } else {
+ // Do a null check, and return zero if null.
+ // System.identityHashCode(null) == 0
+ obj = argument(0);
+ Node* null_ctl = top();
+ obj = null_check_oop(obj, &null_ctl);
+ result_reg->init_req(_null_path, null_ctl);
+ result_val->init_req(_null_path, _gvn.intcon(0));
+ }
+
+ // Unconditionally null? Then return right away.
+ if (stopped()) {
+ set_control( result_reg->in(_null_path) );
+ if (!stopped())
+ push( result_val ->in(_null_path) );
+ return true;
+ }
+
+ // After null check, get the object's klass.
+ Node* obj_klass = load_object_klass(obj);
+
+ // This call may be virtual (invokevirtual) or bound (invokespecial).
+ // For each case we generate slightly different code.
+
+ // We only go to the fast case code if we pass a number of guards. The
+ // paths which do not pass are accumulated in the slow_region.
+ RegionNode* slow_region = new (C, 1) RegionNode(1);
+ record_for_igvn(slow_region);
+
+ // If this is a virtual call, we generate a funny guard. We pull out
+ // the vtable entry corresponding to hashCode() from the target object.
+ // If the target method which we are calling happens to be the native
+ // Object hashCode() method, we pass the guard. We do not need this
+ // guard for non-virtual calls -- the caller is known to be the native
+ // Object hashCode().
+ if (is_virtual) {
+ generate_virtual_guard(obj_klass, slow_region);
+ }
+
+ // Get the header out of the object, use LoadMarkNode when available
+ Node* header_addr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
+ Node* header = make_load(NULL, header_addr, TypeRawPtr::BOTTOM, T_ADDRESS);
+ header = _gvn.transform( new (C, 2) CastP2XNode(NULL, header) );
+
+ // Test the header to see if it is unlocked.
+ Node *lock_mask = _gvn.MakeConX(markOopDesc::biased_lock_mask_in_place);
+ Node *lmasked_header = _gvn.transform( new (C, 3) AndXNode(header, lock_mask) );
+ Node *unlocked_val = _gvn.MakeConX(markOopDesc::unlocked_value);
+ Node *chk_unlocked = _gvn.transform( new (C, 3) CmpXNode( lmasked_header, unlocked_val));
+ Node *test_unlocked = _gvn.transform( new (C, 2) BoolNode( chk_unlocked, BoolTest::ne) );
+
+ generate_slow_guard(test_unlocked, slow_region);
+
+ // Get the hash value and check to see that it has been properly assigned.
+ // We depend on hash_mask being at most 32 bits and avoid the use of
+ // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
+ // vm: see markOop.hpp.
+ Node *hash_mask = _gvn.intcon(markOopDesc::hash_mask);
+ Node *hash_shift = _gvn.intcon(markOopDesc::hash_shift);
+ Node *hshifted_header= _gvn.transform( new (C, 3) URShiftXNode(header, hash_shift) );
+ // This hack lets the hash bits live anywhere in the mark object now, as long
+ // as the shift drops the relevent bits into the low 32 bits. Note that
+ // Java spec says that HashCode is an int so there's no point in capturing
+ // an 'X'-sized hashcode (32 in 32-bit build or 64 in 64-bit build).
+ hshifted_header = ConvX2I(hshifted_header);
+ Node *hash_val = _gvn.transform( new (C, 3) AndINode(hshifted_header, hash_mask) );
+
+ Node *no_hash_val = _gvn.intcon(markOopDesc::no_hash);
+ Node *chk_assigned = _gvn.transform( new (C, 3) CmpINode( hash_val, no_hash_val));
+ Node *test_assigned = _gvn.transform( new (C, 2) BoolNode( chk_assigned, BoolTest::eq) );
+
+ generate_slow_guard(test_assigned, slow_region);
+
+ Node* init_mem = reset_memory();
+ // fill in the rest of the null path:
+ result_io ->init_req(_null_path, i_o());
+ result_mem->init_req(_null_path, init_mem);
+
+ result_val->init_req(_fast_path, hash_val);
+ result_reg->init_req(_fast_path, control());
+ result_io ->init_req(_fast_path, i_o());
+ result_mem->init_req(_fast_path, init_mem);
+
+ // Generate code for the slow case. We make a call to hashCode().
+ set_control(_gvn.transform(slow_region));
+ if (!stopped()) {
+ // No need for PreserveJVMState, because we're using up the present state.
+ set_all_memory(init_mem);
+ vmIntrinsics::ID hashCode_id = vmIntrinsics::_hashCode;
+ if (is_static) hashCode_id = vmIntrinsics::_identityHashCode;
+ CallJavaNode* slow_call = generate_method_call(hashCode_id, is_virtual, is_static);
+ Node* slow_result = set_results_for_java_call(slow_call);
+ // this->control() comes from set_results_for_java_call
+ result_reg->init_req(_slow_path, control());
+ result_val->init_req(_slow_path, slow_result);
+ result_io ->set_req(_slow_path, i_o());
+ result_mem ->set_req(_slow_path, reset_memory());
+ }
+
+ // Return the combined state.
+ set_i_o( _gvn.transform(result_io) );
+ set_all_memory( _gvn.transform(result_mem) );
+ push_result(result_reg, result_val);
+
+ return true;
+}
+
+//---------------------------inline_native_getClass----------------------------
+// Build special case code for calls to hashCode on an object.
+bool LibraryCallKit::inline_native_getClass() {
+ Node* obj = null_check_receiver(callee());
+ if (stopped()) return true;
+ push( load_mirror_from_klass(load_object_klass(obj)) );
+ return true;
+}
+
+//-----------------inline_native_Reflection_getCallerClass---------------------
+// In the presence of deep enough inlining, getCallerClass() becomes a no-op.
+//
+// NOTE that this code must perform the same logic as
+// vframeStream::security_get_caller_frame in that it must skip
+// Method.invoke() and auxiliary frames.
+
+
+
+
+bool LibraryCallKit::inline_native_Reflection_getCallerClass() {
+ ciMethod* method = callee();
+
+#ifndef PRODUCT
+ if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+ tty->print_cr("Attempting to inline sun.reflect.Reflection.getCallerClass");
+ }
+#endif
+
+ debug_only(int saved_sp = _sp);
+
+ // Argument words: (int depth)
+ int nargs = 1;
+
+ _sp += nargs;
+ Node* caller_depth_node = pop();
+
+ assert(saved_sp == _sp, "must have correct argument count");
+
+ // The depth value must be a constant in order for the runtime call
+ // to be eliminated.
+ const TypeInt* caller_depth_type = _gvn.type(caller_depth_node)->isa_int();
+ if (caller_depth_type == NULL || !caller_depth_type->is_con()) {
+#ifndef PRODUCT
+ if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+ tty->print_cr(" Bailing out because caller depth was not a constant");
+ }
+#endif
+ return false;
+ }
+ // Note that the JVM state at this point does not include the
+ // getCallerClass() frame which we are trying to inline. The
+ // semantics of getCallerClass(), however, are that the "first"
+ // frame is the getCallerClass() frame, so we subtract one from the
+ // requested depth before continuing. We don't inline requests of
+ // getCallerClass(0).
+ int caller_depth = caller_depth_type->get_con() - 1;
+ if (caller_depth < 0) {
+#ifndef PRODUCT
+ if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+ tty->print_cr(" Bailing out because caller depth was %d", caller_depth);
+ }
+#endif
+ return false;
+ }
+
+ if (!jvms()->has_method()) {
+#ifndef PRODUCT
+ if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+ tty->print_cr(" Bailing out because intrinsic was inlined at top level");
+ }
+#endif
+ return false;
+ }
+ int _depth = jvms()->depth(); // cache call chain depth
+
+ // Walk back up the JVM state to find the caller at the required
+ // depth. NOTE that this code must perform the same logic as
+ // vframeStream::security_get_caller_frame in that it must skip
+ // Method.invoke() and auxiliary frames. Note also that depth is
+ // 1-based (1 is the bottom of the inlining).
+ int inlining_depth = _depth;
+ JVMState* caller_jvms = NULL;
+
+ if (inlining_depth > 0) {
+ caller_jvms = jvms();
+ assert(caller_jvms = jvms()->of_depth(inlining_depth), "inlining_depth == our depth");
+ do {
+ // The following if-tests should be performed in this order
+ if (is_method_invoke_or_aux_frame(caller_jvms)) {
+ // Skip a Method.invoke() or auxiliary frame
+ } else if (caller_depth > 0) {
+ // Skip real frame
+ --caller_depth;
+ } else {
+ // We're done: reached desired caller after skipping.
+ break;
+ }
+ caller_jvms = caller_jvms->caller();
+ --inlining_depth;
+ } while (inlining_depth > 0);
+ }
+
+ if (inlining_depth == 0) {
+#ifndef PRODUCT
+ if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+ tty->print_cr(" Bailing out because caller depth (%d) exceeded inlining depth (%d)", caller_depth_type->get_con(), _depth);
+ tty->print_cr(" JVM state at this point:");
+ for (int i = _depth; i >= 1; i--) {
+ tty->print_cr(" %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
+ }
+ }
+#endif
+ return false; // Reached end of inlining
+ }
+
+ // Acquire method holder as java.lang.Class
+ ciInstanceKlass* caller_klass = caller_jvms->method()->holder();
+ ciInstance* caller_mirror = caller_klass->java_mirror();
+ // Push this as a constant
+ push(makecon(TypeInstPtr::make(caller_mirror)));
+#ifndef PRODUCT
+ if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+ tty->print_cr(" Succeeded: caller = %s.%s, caller depth = %d, depth = %d", caller_klass->name()->as_utf8(), caller_jvms->method()->name()->as_utf8(), caller_depth_type->get_con(), _depth);
+ tty->print_cr(" JVM state at this point:");
+ for (int i = _depth; i >= 1; i--) {
+ tty->print_cr(" %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
+ }
+ }
+#endif
+ return true;
+}
+
+// Helper routine for above
+bool LibraryCallKit::is_method_invoke_or_aux_frame(JVMState* jvms) {
+ // Is this the Method.invoke method itself?
+ if (jvms->method()->intrinsic_id() == vmIntrinsics::_invoke)
+ return true;
+
+ // Is this a helper, defined somewhere underneath MethodAccessorImpl.
+ ciKlass* k = jvms->method()->holder();
+ if (k->is_instance_klass()) {
+ ciInstanceKlass* ik = k->as_instance_klass();
+ for (; ik != NULL; ik = ik->super()) {
+ if (ik->name() == ciSymbol::sun_reflect_MethodAccessorImpl() &&
+ ik == env()->find_system_klass(ik->name())) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static int value_field_offset = -1; // offset of the "value" field of AtomicLongCSImpl. This is needed by
+ // inline_native_AtomicLong_attemptUpdate() but it has no way of
+ // computing it since there is no lookup field by name function in the
+ // CI interface. This is computed and set by inline_native_AtomicLong_get().
+ // Using a static variable here is safe even if we have multiple compilation
+ // threads because the offset is constant. At worst the same offset will be
+ // computed and stored multiple
+
+bool LibraryCallKit::inline_native_AtomicLong_get() {
+ // Restore the stack and pop off the argument
+ _sp+=1;
+ Node *obj = pop();
+
+ // get the offset of the "value" field. Since the CI interfaces
+ // does not provide a way to look up a field by name, we scan the bytecodes
+ // to get the field index. We expect the first 2 instructions of the method
+ // to be:
+ // 0 aload_0
+ // 1 getfield "value"
+ ciMethod* method = callee();
+ if (value_field_offset == -1)
+ {
+ ciField* value_field;
+ ciBytecodeStream iter(method);
+ Bytecodes::Code bc = iter.next();
+
+ if ((bc != Bytecodes::_aload_0) &&
+ ((bc != Bytecodes::_aload) || (iter.get_index() != 0)))
+ return false;
+ bc = iter.next();
+ if (bc != Bytecodes::_getfield)
+ return false;
+ bool ignore;
+ value_field = iter.get_field(ignore);
+ value_field_offset = value_field->offset_in_bytes();
+ }
+
+ // Null check without removing any arguments.
+ _sp++;
+ obj = do_null_check(obj, T_OBJECT);
+ _sp--;
+ // Check for locking null object
+ if (stopped()) return true;
+
+ Node *adr = basic_plus_adr(obj, obj, value_field_offset);
+ const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
+ int alias_idx = C->get_alias_index(adr_type);
+
+ Node *result = _gvn.transform(new (C, 3) LoadLLockedNode(control(), memory(alias_idx), adr));
+
+ push_pair(result);
+
+ return true;
+}
+
+bool LibraryCallKit::inline_native_AtomicLong_attemptUpdate() {
+ // Restore the stack and pop off the arguments
+ _sp+=5;
+ Node *newVal = pop_pair();
+ Node *oldVal = pop_pair();
+ Node *obj = pop();
+
+ // we need the offset of the "value" field which was computed when
+ // inlining the get() method. Give up if we don't have it.
+ if (value_field_offset == -1)
+ return false;
+
+ // Null check without removing any arguments.
+ _sp+=5;
+ obj = do_null_check(obj, T_OBJECT);
+ _sp-=5;
+ // Check for locking null object
+ if (stopped()) return true;
+
+ Node *adr = basic_plus_adr(obj, obj, value_field_offset);
+ const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
+ int alias_idx = C->get_alias_index(adr_type);
+
+ Node *result = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal));
+ Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(result));
+ set_memory(store_proj, alias_idx);
+
+ push(result);
+ return true;
+}
+
+bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
+ // restore the arguments
+ _sp += arg_size();
+
+ switch (id) {
+ case vmIntrinsics::_floatToRawIntBits:
+ push(_gvn.transform( new (C, 2) MoveF2INode(pop())));
+ break;
+
+ case vmIntrinsics::_intBitsToFloat:
+ push(_gvn.transform( new (C, 2) MoveI2FNode(pop())));
+ break;
+
+ case vmIntrinsics::_doubleToRawLongBits:
+ push_pair(_gvn.transform( new (C, 2) MoveD2LNode(pop_pair())));
+ break;
+
+ case vmIntrinsics::_longBitsToDouble:
+ push_pair(_gvn.transform( new (C, 2) MoveL2DNode(pop_pair())));
+ break;
+
+ case vmIntrinsics::_doubleToLongBits: {
+ Node* value = pop_pair();
+
+ // two paths (plus control) merge in a wood
+ RegionNode *r = new (C, 3) RegionNode(3);
+ Node *phi = new (C, 3) PhiNode(r, TypeLong::LONG);
+
+ Node *cmpisnan = _gvn.transform( new (C, 3) CmpDNode(value, value));
+ // Build the boolean node
+ Node *bolisnan = _gvn.transform( new (C, 2) BoolNode( cmpisnan, BoolTest::ne ) );
+
+ // Branch either way.
+ // NaN case is less traveled, which makes all the difference.
+ IfNode *ifisnan = create_and_xform_if(control(), bolisnan, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+ Node *opt_isnan = _gvn.transform(ifisnan);
+ assert( opt_isnan->is_If(), "Expect an IfNode");
+ IfNode *opt_ifisnan = (IfNode*)opt_isnan;
+ Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(opt_ifisnan) );
+
+ set_control(iftrue);
+
+ static const jlong nan_bits = CONST64(0x7ff8000000000000);
+ Node *slow_result = longcon(nan_bits); // return NaN
+ phi->init_req(1, _gvn.transform( slow_result ));
+ r->init_req(1, iftrue);
+
+ // Else fall through
+ Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(opt_ifisnan) );
+ set_control(iffalse);
+
+ phi->init_req(2, _gvn.transform( new (C, 2) MoveD2LNode(value)));
+ r->init_req(2, iffalse);
+
+ // Post merge
+ set_control(_gvn.transform(r));
+ record_for_igvn(r);
+
+ Node* result = _gvn.transform(phi);
+ assert(result->bottom_type()->isa_long(), "must be");
+ push_pair(result);
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ break;
+ }
+
+ case vmIntrinsics::_floatToIntBits: {
+ Node* value = pop();
+
+ // two paths (plus control) merge in a wood
+ RegionNode *r = new (C, 3) RegionNode(3);
+ Node *phi = new (C, 3) PhiNode(r, TypeInt::INT);
+
+ Node *cmpisnan = _gvn.transform( new (C, 3) CmpFNode(value, value));
+ // Build the boolean node
+ Node *bolisnan = _gvn.transform( new (C, 2) BoolNode( cmpisnan, BoolTest::ne ) );
+
+ // Branch either way.
+ // NaN case is less traveled, which makes all the difference.
+ IfNode *ifisnan = create_and_xform_if(control(), bolisnan, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+ Node *opt_isnan = _gvn.transform(ifisnan);
+ assert( opt_isnan->is_If(), "Expect an IfNode");
+ IfNode *opt_ifisnan = (IfNode*)opt_isnan;
+ Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(opt_ifisnan) );
+
+ set_control(iftrue);
+
+ static const jint nan_bits = 0x7fc00000;
+ Node *slow_result = makecon(TypeInt::make(nan_bits)); // return NaN
+ phi->init_req(1, _gvn.transform( slow_result ));
+ r->init_req(1, iftrue);
+
+ // Else fall through
+ Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(opt_ifisnan) );
+ set_control(iffalse);
+
+ phi->init_req(2, _gvn.transform( new (C, 2) MoveF2INode(value)));
+ r->init_req(2, iffalse);
+
+ // Post merge
+ set_control(_gvn.transform(r));
+ record_for_igvn(r);
+
+ Node* result = _gvn.transform(phi);
+ assert(result->bottom_type()->isa_int(), "must be");
+ push(result);
+
+ C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+ break;
+ }
+
+ default:
+ ShouldNotReachHere();
+ }
+
+ return true;
+}
+
+#ifdef _LP64
+#define XTOP ,top() /*additional argument*/
+#else //_LP64
+#define XTOP /*no additional argument*/
+#endif //_LP64
+
+//----------------------inline_unsafe_copyMemory-------------------------
+bool LibraryCallKit::inline_unsafe_copyMemory() {
+ if (callee()->is_static()) return false; // caller must have the capability!
+ int nargs = 1 + 5 + 3; // 5 args: (src: ptr,off, dst: ptr,off, size)
+ assert(signature()->size() == nargs-1, "copy has 5 arguments");
+ null_check_receiver(callee()); // check then ignore argument(0)
+ if (stopped()) return true;
+
+ C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe".
+
+ Node* src_ptr = argument(1);
+ Node* src_off = ConvL2X(argument(2));
+ assert(argument(3)->is_top(), "2nd half of long");
+ Node* dst_ptr = argument(4);
+ Node* dst_off = ConvL2X(argument(5));
+ assert(argument(6)->is_top(), "2nd half of long");
+ Node* size = ConvL2X(argument(7));
+ assert(argument(8)->is_top(), "2nd half of long");
+
+ assert(Unsafe_field_offset_to_byte_offset(11) == 11,
+ "fieldOffset must be byte-scaled");
+
+ Node* src = make_unsafe_address(src_ptr, src_off);
+ Node* dst = make_unsafe_address(dst_ptr, dst_off);
+
+ // Conservatively insert a memory barrier on all memory slices.
+ // Do not let writes of the copy source or destination float below the copy.
+ insert_mem_bar(Op_MemBarCPUOrder);
+
+ // Call it. Note that the length argument is not scaled.
+ make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::fast_arraycopy_Type(),
+ StubRoutines::unsafe_arraycopy(),
+ "unsafe_arraycopy",
+ TypeRawPtr::BOTTOM,
+ src, dst, size XTOP);
+
+ // Do not let reads of the copy destination float above the copy.
+ insert_mem_bar(Op_MemBarCPUOrder);
+
+ return true;
+}
+
+
+//------------------------inline_native_clone----------------------------
+// Here are the simple edge cases:
+// null receiver => normal trap
+// virtual and clone was overridden => slow path to out-of-line clone
+// not cloneable or finalizer => slow path to out-of-line Object.clone
+//
+// The general case has two steps, allocation and copying.
+// Allocation has two cases, and uses GraphKit::new_instance or new_array.
+//
+// Copying also has two cases, oop arrays and everything else.
+// Oop arrays use arrayof_oop_arraycopy (same as System.arraycopy).
+// Everything else uses the tight inline loop supplied by CopyArrayNode.
+//
+// These steps fold up nicely if and when the cloned object's klass
+// can be sharply typed as an object array, a type array, or an instance.
+//
+bool LibraryCallKit::inline_native_clone(bool is_virtual) {
+ int nargs = 1;
+ Node* obj = null_check_receiver(callee());
+ if (stopped()) return true;
+ Node* obj_klass = load_object_klass(obj);
+ const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
+ const TypeOopPtr* toop = ((tklass != NULL)
+ ? tklass->as_instance_type()
+ : TypeInstPtr::NOTNULL);
+
+ // Conservatively insert a memory barrier on all memory slices.
+ // Do not let writes into the original float below the clone.
+ insert_mem_bar(Op_MemBarCPUOrder);
+
+ // paths into result_reg:
+ enum {
+ _slow_path = 1, // out-of-line call to clone method (virtual or not)
+ _objArray_path, // plain allocation, plus arrayof_oop_arraycopy
+ _fast_path, // plain allocation, plus a CopyArray operation
+ PATH_LIMIT
+ };
+ RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ PhiNode* result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
+ TypeInstPtr::NOTNULL);
+ PhiNode* result_i_o = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
+ PhiNode* result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
+ TypePtr::BOTTOM);
+ record_for_igvn(result_reg);
+
+ const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+ int raw_adr_idx = Compile::AliasIdxRaw;
+ const bool raw_mem_only = true;
+
+ // paths into alloc_reg (on the fast path, just before the CopyArray):
+ enum { _typeArray_alloc = 1, _instance_alloc, ALLOC_LIMIT };
+ RegionNode* alloc_reg = new(C, ALLOC_LIMIT) RegionNode(ALLOC_LIMIT);
+ PhiNode* alloc_val = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, raw_adr_type);
+ PhiNode* alloc_siz = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, TypeX_X);
+ PhiNode* alloc_i_o = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::ABIO);
+ PhiNode* alloc_mem = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::MEMORY,
+ raw_adr_type);
+ record_for_igvn(alloc_reg);
+
+ bool card_mark = false; // (see below)
+
+ Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
+ if (array_ctl != NULL) {
+ // It's an array.
+ PreserveJVMState pjvms(this);
+ set_control(array_ctl);
+ Node* obj_length = load_array_length(obj);
+ Node* obj_size = NULL;
+ _sp += nargs; // set original stack for use by uncommon_trap
+ Node* alloc_obj = new_array(obj_klass, obj_length,
+ raw_mem_only, &obj_size);
+ _sp -= nargs;
+ assert(obj_size != NULL, "");
+ Node* raw_obj = alloc_obj->in(1);
+ assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
+ if (ReduceBulkZeroing) {
+ AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+ if (alloc != NULL) {
+ // We will be completely responsible for initializing this object.
+ alloc->maybe_set_complete(&_gvn);
+ }
+ }
+
+ if (!use_ReduceInitialCardMarks()) {
+ // If it is an oop array, it requires very special treatment,
+ // because card marking is required on each card of the array.
+ Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
+ if (is_obja != NULL) {
+ PreserveJVMState pjvms2(this);
+ set_control(is_obja);
+ // Generate a direct call to the right arraycopy function(s).
+ bool disjoint_bases = true;
+ bool length_never_negative = true;
+ generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
+ obj, intcon(0), alloc_obj, intcon(0),
+ obj_length, nargs,
+ disjoint_bases, length_never_negative);
+ result_reg->init_req(_objArray_path, control());
+ result_val->init_req(_objArray_path, alloc_obj);
+ result_i_o ->set_req(_objArray_path, i_o());
+ result_mem ->set_req(_objArray_path, reset_memory());
+ }
+ }
+ // We can dispense with card marks if we know the allocation
+ // comes out of eden (TLAB)... In fact, ReduceInitialCardMarks
+ // causes the non-eden paths to simulate a fresh allocation,
+ // insofar that no further card marks are required to initialize
+ // the object.
+
+ // Otherwise, there are no card marks to worry about.
+ alloc_val->init_req(_typeArray_alloc, raw_obj);
+ alloc_siz->init_req(_typeArray_alloc, obj_size);
+ alloc_reg->init_req(_typeArray_alloc, control());
+ alloc_i_o->init_req(_typeArray_alloc, i_o());
+ alloc_mem->init_req(_typeArray_alloc, memory(raw_adr_type));
+ }
+
+ // We only go to the fast case code if we pass a number of guards.
+ // The paths which do not pass are accumulated in the slow_region.
+ RegionNode* slow_region = new (C, 1) RegionNode(1);
+ record_for_igvn(slow_region);
+ if (!stopped()) {
+ // It's an instance. Make the slow-path tests.
+ // If this is a virtual call, we generate a funny guard. We grab
+ // the vtable entry corresponding to clone() from the target object.
+ // If the target method which we are calling happens to be the
+ // Object clone() method, we pass the guard. We do not need this
+ // guard for non-virtual calls; the caller is known to be the native
+ // Object clone().
+ if (is_virtual) {
+ generate_virtual_guard(obj_klass, slow_region);
+ }
+
+ // The object must be cloneable and must not have a finalizer.
+ // Both of these conditions may be checked in a single test.
+ // We could optimize the cloneable test further, but we don't care.
+ generate_access_flags_guard(obj_klass,
+ // Test both conditions:
+ JVM_ACC_IS_CLONEABLE | JVM_ACC_HAS_FINALIZER,
+ // Must be cloneable but not finalizer:
+ JVM_ACC_IS_CLONEABLE,
+ slow_region);
+ }
+
+ if (!stopped()) {
+ // It's an instance, and it passed the slow-path tests.
+ PreserveJVMState pjvms(this);
+ Node* obj_size = NULL;
+ Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
+ assert(obj_size != NULL, "");
+ Node* raw_obj = alloc_obj->in(1);
+ assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
+ if (ReduceBulkZeroing) {
+ AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+ if (alloc != NULL && !alloc->maybe_set_complete(&_gvn))
+ alloc = NULL;
+ }
+ if (!use_ReduceInitialCardMarks()) {
+ // Put in store barrier for any and all oops we are sticking
+ // into this object. (We could avoid this if we could prove
+ // that the object type contains no oop fields at all.)
+ card_mark = true;
+ }
+ alloc_val->init_req(_instance_alloc, raw_obj);
+ alloc_siz->init_req(_instance_alloc, obj_size);
+ alloc_reg->init_req(_instance_alloc, control());
+ alloc_i_o->init_req(_instance_alloc, i_o());
+ alloc_mem->init_req(_instance_alloc, memory(raw_adr_type));
+ }
+
+ // Generate code for the slow case. We make a call to clone().
+ set_control(_gvn.transform(slow_region));
+ if (!stopped()) {
+ PreserveJVMState pjvms(this);
+ CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_clone, is_virtual);
+ Node* slow_result = set_results_for_java_call(slow_call);
+ // this->control() comes from set_results_for_java_call
+ result_reg->init_req(_slow_path, control());
+ result_val->init_req(_slow_path, slow_result);
+ result_i_o ->set_req(_slow_path, i_o());
+ result_mem ->set_req(_slow_path, reset_memory());
+ }
+
+ // The object is allocated, as an array and/or an instance. Now copy it.
+ set_control( _gvn.transform(alloc_reg) );
+ set_i_o( _gvn.transform(alloc_i_o) );
+ set_memory( _gvn.transform(alloc_mem), raw_adr_type );
+ Node* raw_obj = _gvn.transform(alloc_val);
+
+ if (!stopped()) {
+ // Copy the fastest available way.
+ // (No need for PreserveJVMState, since we're using it all up now.)
+ Node* src = obj;
+ Node* dest = raw_obj;
+ Node* end = dest;
+ Node* size = _gvn.transform(alloc_siz);
+
+ // Exclude the header.
+ int base_off = sizeof(oopDesc);
+ src = basic_plus_adr(src, base_off);
+ dest = basic_plus_adr(dest, base_off);
+ end = basic_plus_adr(end, size);
+
+ // Compute the length also, if needed:
+ Node* countx = size;
+ countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(base_off)) );
+ countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong) ));
+
+ // Select an appropriate instruction to initialize the range.
+ // The CopyArray instruction (if supported) can be optimized
+ // into a discrete set of scalar loads and stores.
+ bool disjoint_bases = true;
+ generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
+ src, NULL, dest, NULL, countx);
+
+ // Now that the object is properly initialized, type it as an oop.
+ // Use a secondary InitializeNode memory barrier.
+ InitializeNode* init = insert_mem_bar_volatile(Op_Initialize, raw_adr_idx,
+ raw_obj)->as_Initialize();
+ init->set_complete(&_gvn); // (there is no corresponding AllocateNode)
+ Node* new_obj = new(C, 2) CheckCastPPNode(control(), raw_obj,
+ TypeInstPtr::NOTNULL);
+ new_obj = _gvn.transform(new_obj);
+
+ // If necessary, emit some card marks afterwards. (Non-arrays only.)
+ if (card_mark) {
+ Node* no_particular_value = NULL;
+ Node* no_particular_field = NULL;
+ post_barrier(control(),
+ memory(raw_adr_type),
+ new_obj,
+ no_particular_field,
+ raw_adr_idx,
+ no_particular_value,
+ T_OBJECT,
+ false);
+ }
+ // Present the results of the slow call.
+ result_reg->init_req(_fast_path, control());
+ result_val->init_req(_fast_path, new_obj);
+ result_i_o ->set_req(_fast_path, i_o());
+ result_mem ->set_req(_fast_path, reset_memory());
+ }
+
+ // Return the combined state.
+ set_control( _gvn.transform(result_reg) );
+ set_i_o( _gvn.transform(result_i_o) );
+ set_all_memory( _gvn.transform(result_mem) );
+
+ // Cast the result to a sharper type, since we know what clone does.
+ Node* new_obj = _gvn.transform(result_val);
+ Node* cast = new (C, 2) CheckCastPPNode(control(), new_obj, toop);
+ push(_gvn.transform(cast));
+
+ return true;
+}
+
+
+// constants for computing the copy function
+enum {
+ COPYFUNC_UNALIGNED = 0,
+ COPYFUNC_ALIGNED = 1, // src, dest aligned to HeapWordSize
+ COPYFUNC_CONJOINT = 0,
+ COPYFUNC_DISJOINT = 2 // src != dest, or transfer can descend
+};
+
+// Note: The condition "disjoint" applies also for overlapping copies
+// where an descending copy is permitted (i.e., dest_offset <= src_offset).
+static address
+select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name) {
+ int selector =
+ (aligned ? COPYFUNC_ALIGNED : COPYFUNC_UNALIGNED) +
+ (disjoint ? COPYFUNC_DISJOINT : COPYFUNC_CONJOINT);
+
+#define RETURN_STUB(xxx_arraycopy) { \
+ name = #xxx_arraycopy; \
+ return StubRoutines::xxx_arraycopy(); }
+
+ switch (t) {
+ case T_BYTE:
+ case T_BOOLEAN:
+ switch (selector) {
+ case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jbyte_arraycopy);
+ case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jbyte_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jbyte_disjoint_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jbyte_disjoint_arraycopy);
+ }
+ case T_CHAR:
+ case T_SHORT:
+ switch (selector) {
+ case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jshort_arraycopy);
+ case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jshort_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jshort_disjoint_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jshort_disjoint_arraycopy);
+ }
+ case T_INT:
+ case T_FLOAT:
+ switch (selector) {
+ case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jint_arraycopy);
+ case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jint_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jint_disjoint_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jint_disjoint_arraycopy);
+ }
+ case T_DOUBLE:
+ case T_LONG:
+ switch (selector) {
+ case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jlong_arraycopy);
+ case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jlong_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(jlong_disjoint_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_jlong_disjoint_arraycopy);
+ }
+ case T_ARRAY:
+ case T_OBJECT:
+ switch (selector) {
+ case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(oop_arraycopy);
+ case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_oop_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED: RETURN_STUB(oop_disjoint_arraycopy);
+ case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED: RETURN_STUB(arrayof_oop_disjoint_arraycopy);
+ }
+ default:
+ ShouldNotReachHere();
+ return NULL;
+ }
+
+#undef RETURN_STUB
+}
+
+//------------------------------basictype2arraycopy----------------------------
+address LibraryCallKit::basictype2arraycopy(BasicType t,
+ Node* src_offset,
+ Node* dest_offset,
+ bool disjoint_bases,
+ const char* &name) {
+ const TypeInt* src_offset_inttype = gvn().find_int_type(src_offset);;
+ const TypeInt* dest_offset_inttype = gvn().find_int_type(dest_offset);;
+
+ bool aligned = false;
+ bool disjoint = disjoint_bases;
+
+ // if the offsets are the same, we can treat the memory regions as
+ // disjoint, because either the memory regions are in different arrays,
+ // or they are identical (which we can treat as disjoint.) We can also
+ // treat a copy with a destination index less that the source index
+ // as disjoint since a low->high copy will work correctly in this case.
+ if (src_offset_inttype != NULL && src_offset_inttype->is_con() &&
+ dest_offset_inttype != NULL && dest_offset_inttype->is_con()) {
+ // both indices are constants
+ int s_offs = src_offset_inttype->get_con();
+ int d_offs = dest_offset_inttype->get_con();
+ int element_size = type2aelembytes[t];
+ aligned = ((arrayOopDesc::base_offset_in_bytes(t) + s_offs * element_size) % HeapWordSize == 0) &&
+ ((arrayOopDesc::base_offset_in_bytes(t) + d_offs * element_size) % HeapWordSize == 0);
+ if (s_offs >= d_offs) disjoint = true;
+ } else if (src_offset == dest_offset && src_offset != NULL) {
+ // This can occur if the offsets are identical non-constants.
+ disjoint = true;
+ }
+
+ return select_arraycopy_function(t, aligned, disjoint, name);
+}
+
+
+//------------------------------inline_arraycopy-----------------------
+bool LibraryCallKit::inline_arraycopy() {
+ // Restore the stack and pop off the arguments.
+ int nargs = 5; // 2 oops, 3 ints, no size_t or long
+ assert(callee()->signature()->size() == nargs, "copy has 5 arguments");
+
+ Node *src = argument(0);
+ Node *src_offset = argument(1);
+ Node *dest = argument(2);
+ Node *dest_offset = argument(3);
+ Node *length = argument(4);
+
+ // Compile time checks. If any of these checks cannot be verified at compile time,
+ // we do not make a fast path for this call. Instead, we let the call remain as it
+ // is. The checks we choose to mandate at compile time are:
+ //
+ // (1) src and dest are arrays.
+ const Type* src_type = src->Value(&_gvn);
+ const Type* dest_type = dest->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+ if (top_src == NULL || top_src->klass() == NULL ||
+ top_dest == NULL || top_dest->klass() == NULL) {
+ // Conservatively insert a memory barrier on all memory slices.
+ // Do not let writes into the source float below the arraycopy.
+ insert_mem_bar(Op_MemBarCPUOrder);
+
+ // Call StubRoutines::generic_arraycopy stub.
+ generate_arraycopy(TypeRawPtr::BOTTOM, T_CONFLICT,
+ src, src_offset, dest, dest_offset, length,
+ nargs);
+
+ // Do not let reads from the destination float above the arraycopy.
+ // Since we cannot type the arrays, we don't know which slices
+ // might be affected. We could restrict this barrier only to those
+ // memory slices which pertain to array elements--but don't bother.
+ if (!InsertMemBarAfterArraycopy)
+ // (If InsertMemBarAfterArraycopy, there is already one in place.)
+ insert_mem_bar(Op_MemBarCPUOrder);
+ return true;
+ }
+
+ // (2) src and dest arrays must have elements of the same BasicType
+ // Figure out the size and type of the elements we will be copying.
+ BasicType src_elem = top_src->klass()->as_array_klass()->element_type()->basic_type();
+ BasicType dest_elem = top_dest->klass()->as_array_klass()->element_type()->basic_type();
+ if (src_elem == T_ARRAY) src_elem = T_OBJECT;
+ if (dest_elem == T_ARRAY) dest_elem = T_OBJECT;
+
+ if (src_elem != dest_elem || dest_elem == T_VOID) {
+ // The component types are not the same or are not recognized. Punt.
+ // (But, avoid the native method wrapper to JVM_ArrayCopy.)
+ generate_slow_arraycopy(TypePtr::BOTTOM,
+ src, src_offset, dest, dest_offset, length,
+ nargs);
+ return true;
+ }
+
+ //---------------------------------------------------------------------------
+ // We will make a fast path for this call to arraycopy.
+
+ // We have the following tests left to perform:
+ //
+ // (3) src and dest must not be null.
+ // (4) src_offset must not be negative.
+ // (5) dest_offset must not be negative.
+ // (6) length must not be negative.
+ // (7) src_offset + length must not exceed length of src.
+ // (8) dest_offset + length must not exceed length of dest.
+ // (9) each element of an oop array must be assignable
+
+ RegionNode* slow_region = new (C, 1) RegionNode(1);
+ record_for_igvn(slow_region);
+
+ // (3) operands must not be null
+ // We currently perform our null checks with the do_null_check routine.
+ // This means that the null exceptions will be reported in the caller
+ // rather than (correctly) reported inside of the native arraycopy call.
+ // This should be corrected, given time. We do our null check with the
+ // stack pointer restored.
+ _sp += nargs;
+ src = do_null_check(src, T_ARRAY);
+ dest = do_null_check(dest, T_ARRAY);
+ _sp -= nargs;
+
+ // (4) src_offset must not be negative.
+ generate_negative_guard(src_offset, slow_region);
+
+ // (5) dest_offset must not be negative.
+ generate_negative_guard(dest_offset, slow_region);
+
+ // (6) length must not be negative (moved to generate_arraycopy()).
+ // generate_negative_guard(length, slow_region);
+
+ // (7) src_offset + length must not exceed length of src.
+ generate_limit_guard(src_offset, length,
+ load_array_length(src),
+ slow_region);
+
+ // (8) dest_offset + length must not exceed length of dest.
+ generate_limit_guard(dest_offset, length,
+ load_array_length(dest),
+ slow_region);
+
+ // (9) each element of an oop array must be assignable
+ // The generate_arraycopy subroutine checks this.
+
+ // This is where the memory effects are placed:
+ const TypePtr* adr_type = TypeAryPtr::get_array_body_type(dest_elem);
+ generate_arraycopy(adr_type, dest_elem,
+ src, src_offset, dest, dest_offset, length,
+ nargs, false, false, slow_region);
+
+ return true;
+}
+
+//-----------------------------generate_arraycopy----------------------
+// Generate an optimized call to arraycopy.
+// Caller must guard against non-arrays.
+// Caller must determine a common array basic-type for both arrays.
+// Caller must validate offsets against array bounds.
+// The slow_region has already collected guard failure paths
+// (such as out of bounds length or non-conformable array types).
+// The generated code has this shape, in general:
+//
+// if (length == 0) return // via zero_path
+// slowval = -1
+// if (types unknown) {
+// slowval = call generic copy loop
+// if (slowval == 0) return // via checked_path
+// } else if (indexes in bounds) {
+// if ((is object array) && !(array type check)) {
+// slowval = call checked copy loop
+// if (slowval == 0) return // via checked_path
+// } else {
+// call bulk copy loop
+// return // via fast_path
+// }
+// }
+// // adjust params for remaining work:
+// if (slowval != -1) {
+// n = -1^slowval; src_offset += n; dest_offset += n; length -= n
+// }
+// slow_region:
+// call slow arraycopy(src, src_offset, dest, dest_offset, length)
+// return // via slow_call_path
+//
+// This routine is used from several intrinsics: System.arraycopy,
+// Object.clone (the array subcase), and Arrays.copyOf[Range].
+//
+void
+LibraryCallKit::generate_arraycopy(const TypePtr* adr_type,
+ BasicType basic_elem_type,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length,
+ int nargs,
+ bool disjoint_bases,
+ bool length_never_negative,
+ RegionNode* slow_region) {
+
+ if (slow_region == NULL) {
+ slow_region = new(C,1) RegionNode(1);
+ record_for_igvn(slow_region);
+ }
+
+ Node* original_dest = dest;
+ AllocateArrayNode* alloc = NULL; // used for zeroing, if needed
+ Node* raw_dest = NULL; // used before zeroing, if needed
+ bool must_clear_dest = false;
+
+ // See if this is the initialization of a newly-allocated array.
+ // If so, we will take responsibility here for initializing it to zero.
+ // (Note: Because tightly_coupled_allocation performs checks on the
+ // out-edges of the dest, we need to avoid making derived pointers
+ // from it until we have checked its uses.)
+ if (ReduceBulkZeroing
+ && !ZeroTLAB // pointless if already zeroed
+ && basic_elem_type != T_CONFLICT // avoid corner case
+ && !_gvn.eqv_uncast(src, dest)
+ && ((alloc = tightly_coupled_allocation(dest, slow_region))
+ != NULL)
+ && alloc->maybe_set_complete(&_gvn)) {
+ // "You break it, you buy it."
+ InitializeNode* init = alloc->initialization();
+ assert(init->is_complete(), "we just did this");
+ assert(dest->Opcode() == Op_CheckCastPP, "sanity");
+ assert(dest->in(0)->in(0) == init, "dest pinned");
+ raw_dest = dest->in(1); // grab the raw pointer!
+ original_dest = dest;
+ dest = raw_dest;
+ adr_type = TypeRawPtr::BOTTOM; // all initializations are into raw memory
+ // Decouple the original InitializeNode, turning it into a simple membar.
+ // We will build a new one at the end of this routine.
+ init->set_req(InitializeNode::RawAddress, top());
+ // From this point on, every exit path is responsible for
+ // initializing any non-copied parts of the object to zero.
+ must_clear_dest = true;
+ } else {
+ // No zeroing elimination here.
+ alloc = NULL;
+ //original_dest = dest;
+ //must_clear_dest = false;
+ }
+
+ // Results are placed here:
+ enum { fast_path = 1, // normal void-returning assembly stub
+ checked_path = 2, // special assembly stub with cleanup
+ slow_call_path = 3, // something went wrong; call the VM
+ zero_path = 4, // bypass when length of copy is zero
+ bcopy_path = 5, // copy primitive array by 64-bit blocks
+ PATH_LIMIT = 6
+ };
+ RegionNode* result_region = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+ PhiNode* result_i_o = new(C, PATH_LIMIT) PhiNode(result_region, Type::ABIO);
+ PhiNode* result_memory = new(C, PATH_LIMIT) PhiNode(result_region, Type::MEMORY, adr_type);
+ record_for_igvn(result_region);
+ _gvn.set_type_bottom(result_i_o);
+ _gvn.set_type_bottom(result_memory);
+ assert(adr_type != TypePtr::BOTTOM, "must be RawMem or a T[] slice");
+
+ // The slow_control path:
+ Node* slow_control;
+ Node* slow_i_o = i_o();
+ Node* slow_mem = memory(adr_type);
+ debug_only(slow_control = (Node*) badAddress);
+
+ // Checked control path:
+ Node* checked_control = top();
+ Node* checked_mem = NULL;
+ Node* checked_i_o = NULL;
+ Node* checked_value = NULL;
+
+ if (basic_elem_type == T_CONFLICT) {
+ assert(!must_clear_dest, "");
+ Node* cv = generate_generic_arraycopy(adr_type,
+ src, src_offset, dest, dest_offset,
+ copy_length, nargs);
+ if (cv == NULL) cv = intcon(-1); // failure (no stub available)
+ checked_control = control();
+ checked_i_o = i_o();
+ checked_mem = memory(adr_type);
+ checked_value = cv;
+ set_control(top()); // no fast path
+ }
+
+ Node* not_pos = generate_nonpositive_guard(copy_length, length_never_negative);
+ if (not_pos != NULL) {
+ PreserveJVMState pjvms(this);
+ set_control(not_pos);
+
+ // (6) length must not be negative.
+ if (!length_never_negative) {
+ generate_negative_guard(copy_length, slow_region);
+ }
+
+ if (!stopped() && must_clear_dest) {
+ Node* dest_length = alloc->in(AllocateNode::ALength);
+ if (_gvn.eqv_uncast(copy_length, dest_length)
+ || _gvn.find_int_con(dest_length, 1) <= 0) {
+ // There is no zeroing to do.
+ } else {
+ // Clear the whole thing since there are no source elements to copy.
+ generate_clear_array(adr_type, dest, basic_elem_type,
+ intcon(0), NULL,
+ alloc->in(AllocateNode::AllocSize));
+ }
+ }
+
+ // Present the results of the fast call.
+ result_region->init_req(zero_path, control());
+ result_i_o ->init_req(zero_path, i_o());
+ result_memory->init_req(zero_path, memory(adr_type));
+ }
+
+ if (!stopped() && must_clear_dest) {
+ // We have to initialize the *uncopied* part of the array to zero.
+ // The copy destination is the slice dest[off..off+len]. The other slices
+ // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
+ Node* dest_size = alloc->in(AllocateNode::AllocSize);
+ Node* dest_length = alloc->in(AllocateNode::ALength);
+ Node* dest_tail = _gvn.transform( new(C,3) AddINode(dest_offset,
+ copy_length) );
+
+ // If there is a head section that needs zeroing, do it now.
+ if (find_int_con(dest_offset, -1) != 0) {
+ generate_clear_array(adr_type, dest, basic_elem_type,
+ intcon(0), dest_offset,
+ NULL);
+ }
+
+ // Next, perform a dynamic check on the tail length.
+ // It is often zero, and we can win big if we prove this.
+ // There are two wins: Avoid generating the ClearArray
+ // with its attendant messy index arithmetic, and upgrade
+ // the copy to a more hardware-friendly word size of 64 bits.
+ Node* tail_ctl = NULL;
+ if (!stopped() && !_gvn.eqv_uncast(dest_tail, dest_length)) {
+ Node* cmp_lt = _gvn.transform( new(C,3) CmpINode(dest_tail, dest_length) );
+ Node* bol_lt = _gvn.transform( new(C,2) BoolNode(cmp_lt, BoolTest::lt) );
+ tail_ctl = generate_slow_guard(bol_lt, NULL);
+ assert(tail_ctl != NULL || !stopped(), "must be an outcome");
+ }
+
+ // At this point, let's assume there is no tail.
+ if (!stopped() && alloc != NULL && basic_elem_type != T_OBJECT) {
+ // There is no tail. Try an upgrade to a 64-bit copy.
+ bool didit = false;
+ { PreserveJVMState pjvms(this);
+ didit = generate_block_arraycopy(adr_type, basic_elem_type, alloc,
+ src, src_offset, dest, dest_offset,
+ dest_size);
+ if (didit) {
+ // Present the results of the block-copying fast call.
+ result_region->init_req(bcopy_path, control());
+ result_i_o ->init_req(bcopy_path, i_o());
+ result_memory->init_req(bcopy_path, memory(adr_type));
+ }
+ }
+ if (didit)
+ set_control(top()); // no regular fast path
+ }
+
+ // Clear the tail, if any.
+ if (tail_ctl != NULL) {
+ Node* notail_ctl = stopped() ? NULL : control();
+ set_control(tail_ctl);
+ if (notail_ctl == NULL) {
+ generate_clear_array(adr_type, dest, basic_elem_type,
+ dest_tail, NULL,
+ dest_size);
+ } else {
+ // Make a local merge.
+ Node* done_ctl = new(C,3) RegionNode(3);
+ Node* done_mem = new(C,3) PhiNode(done_ctl, Type::MEMORY, adr_type);
+ done_ctl->init_req(1, notail_ctl);
+ done_mem->init_req(1, memory(adr_type));
+ generate_clear_array(adr_type, dest, basic_elem_type,
+ dest_tail, NULL,
+ dest_size);
+ done_ctl->init_req(2, control());
+ done_mem->init_req(2, memory(adr_type));
+ set_control( _gvn.transform(done_ctl) );
+ set_memory( _gvn.transform(done_mem), adr_type );
+ }
+ }
+ }
+
+ BasicType copy_type = basic_elem_type;
+ assert(basic_elem_type != T_ARRAY, "caller must fix this");
+ if (!stopped() && copy_type == T_OBJECT) {
+ // If src and dest have compatible element types, we can copy bits.
+ // Types S[] and D[] are compatible if D is a supertype of S.
+ //
+ // If they are not, we will use checked_oop_disjoint_arraycopy,
+ // which performs a fast optimistic per-oop check, and backs off
+ // further to JVM_ArrayCopy on the first per-oop check that fails.
+ // (Actually, we don't move raw bits only; the GC requires card marks.)
+
+ // Get the klassOop for both src and dest
+ Node* src_klass = load_object_klass(src);
+ Node* dest_klass = load_object_klass(dest);
+
+ // Generate the subtype check.
+ // This might fold up statically, or then again it might not.
+ //
+ // Non-static example: Copying List<String>.elements to a new String[].
+ // The backing store for a List<String> is always an Object[],
+ // but its elements are always type String, if the generic types
+ // are correct at the source level.
+ //
+ // Test S[] against D[], not S against D, because (probably)
+ // the secondary supertype cache is less busy for S[] than S.
+ // This usually only matters when D is an interface.
+ Node* not_subtype_ctrl = gen_subtype_check(src_klass, dest_klass);
+ // Plug failing path into checked_oop_disjoint_arraycopy
+ if (not_subtype_ctrl != top()) {
+ PreserveJVMState pjvms(this);
+ set_control(not_subtype_ctrl);
+ // (At this point we can assume disjoint_bases, since types differ.)
+ int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+ Node* p1 = basic_plus_adr(dest_klass, ek_offset);
+ Node* n1 = new (C, 3) LoadKlassNode(0, immutable_memory(), p1, TypeRawPtr::BOTTOM);
+ Node* dest_elem_klass = _gvn.transform(n1);
+ Node* cv = generate_checkcast_arraycopy(adr_type,
+ dest_elem_klass,
+ src, src_offset, dest, dest_offset,
+ copy_length,
+ nargs);
+ if (cv == NULL) cv = intcon(-1); // failure (no stub available)
+ checked_control = control();
+ checked_i_o = i_o();
+ checked_mem = memory(adr_type);
+ checked_value = cv;
+ }
+ // At this point we know we do not need type checks on oop stores.
+
+ // Let's see if we need card marks:
+ if (alloc != NULL && use_ReduceInitialCardMarks()) {
+ // If we do not need card marks, copy using the jint or jlong stub.
+ copy_type = LP64_ONLY(T_LONG) NOT_LP64(T_INT);
+ assert(type2aelembytes[basic_elem_type] == type2aelembytes[copy_type],
+ "sizes agree");
+ }
+ }
+
+ if (!stopped()) {
+ // Generate the fast path, if possible.
+ PreserveJVMState pjvms(this);
+ generate_unchecked_arraycopy(adr_type, copy_type, disjoint_bases,
+ src, src_offset, dest, dest_offset,
+ ConvI2X(copy_length));
+
+ // Present the results of the fast call.
+ result_region->init_req(fast_path, control());
+ result_i_o ->init_req(fast_path, i_o());
+ result_memory->init_req(fast_path, memory(adr_type));
+ }
+
+ // Here are all the slow paths up to this point, in one bundle:
+ slow_control = top();
+ if (slow_region != NULL)
+ slow_control = _gvn.transform(slow_region);
+ debug_only(slow_region = (RegionNode*)badAddress);
+
+ set_control(checked_control);
+ if (!stopped()) {
+ // Clean up after the checked call.
+ // The returned value is either 0 or -1^K,
+ // where K = number of partially transferred array elements.
+ Node* cmp = _gvn.transform( new(C, 3) CmpINode(checked_value, intcon(0)) );
+ Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
+ IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
+
+ // If it is 0, we are done, so transfer to the end.
+ Node* checks_done = _gvn.transform( new(C, 1) IfTrueNode(iff) );
+ result_region->init_req(checked_path, checks_done);
+ result_i_o ->init_req(checked_path, checked_i_o);
+ result_memory->init_req(checked_path, checked_mem);
+
+ // If it is not zero, merge into the slow call.
+ set_control( _gvn.transform( new(C, 1) IfFalseNode(iff) ));
+ RegionNode* slow_reg2 = new(C, 3) RegionNode(3);
+ PhiNode* slow_i_o2 = new(C, 3) PhiNode(slow_reg2, Type::ABIO);
+ PhiNode* slow_mem2 = new(C, 3) PhiNode(slow_reg2, Type::MEMORY, adr_type);
+ record_for_igvn(slow_reg2);
+ slow_reg2 ->init_req(1, slow_control);
+ slow_i_o2 ->init_req(1, slow_i_o);
+ slow_mem2 ->init_req(1, slow_mem);
+ slow_reg2 ->init_req(2, control());
+ slow_i_o2 ->init_req(2, i_o());
+ slow_mem2 ->init_req(2, memory(adr_type));
+
+ slow_control = _gvn.transform(slow_reg2);
+ slow_i_o = _gvn.transform(slow_i_o2);
+ slow_mem = _gvn.transform(slow_mem2);
+
+ if (alloc != NULL) {
+ // We'll restart from the very beginning, after zeroing the whole thing.
+ // This can cause double writes, but that's OK since dest is brand new.
+ // So we ignore the low 31 bits of the value returned from the stub.
+ } else {
+ // We must continue the copy exactly where it failed, or else
+ // another thread might see the wrong number of writes to dest.
+ Node* checked_offset = _gvn.transform( new(C, 3) XorINode(checked_value, intcon(-1)) );
+ Node* slow_offset = new(C, 3) PhiNode(slow_reg2, TypeInt::INT);
+ slow_offset->init_req(1, intcon(0));
+ slow_offset->init_req(2, checked_offset);
+ slow_offset = _gvn.transform(slow_offset);
+
+ // Adjust the arguments by the conditionally incoming offset.
+ Node* src_off_plus = _gvn.transform( new(C, 3) AddINode(src_offset, slow_offset) );
+ Node* dest_off_plus = _gvn.transform( new(C, 3) AddINode(dest_offset, slow_offset) );
+ Node* length_minus = _gvn.transform( new(C, 3) SubINode(copy_length, slow_offset) );
+
+ // Tweak the node variables to adjust the code produced below:
+ src_offset = src_off_plus;
+ dest_offset = dest_off_plus;
+ copy_length = length_minus;
+ }
+ }
+
+ set_control(slow_control);
+ if (!stopped()) {
+ // Generate the slow path, if needed.
+ PreserveJVMState pjvms(this); // replace_in_map may trash the map
+
+ set_memory(slow_mem, adr_type);
+ set_i_o(slow_i_o);
+
+ if (must_clear_dest) {
+ generate_clear_array(adr_type, dest, basic_elem_type,
+ intcon(0), NULL,
+ alloc->in(AllocateNode::AllocSize));
+ }
+
+ if (dest != original_dest) {
+ // Promote from rawptr to oop, so it looks right in the call's GC map.
+ dest = _gvn.transform( new(C,2) CheckCastPPNode(control(), dest,
+ TypeInstPtr::NOTNULL) );
+
+ // Edit the call's debug-info to avoid referring to original_dest.
+ // (The problem with original_dest is that it isn't ready until
+ // after the InitializeNode completes, but this stuff is before.)
+ // Substitute in the locally valid dest_oop.
+ replace_in_map(original_dest, dest);
+ }
+
+ generate_slow_arraycopy(adr_type,
+ src, src_offset, dest, dest_offset,
+ copy_length, nargs);
+
+ result_region->init_req(slow_call_path, control());
+ result_i_o ->init_req(slow_call_path, i_o());
+ result_memory->init_req(slow_call_path, memory(adr_type));
+ }
+
+ // Remove unused edges.
+ for (uint i = 1; i < result_region->req(); i++) {
+ if (result_region->in(i) == NULL)
+ result_region->init_req(i, top());
+ }
+
+ // Finished; return the combined state.
+ set_control( _gvn.transform(result_region) );
+ set_i_o( _gvn.transform(result_i_o) );
+ set_memory( _gvn.transform(result_memory), adr_type );
+
+ if (dest != original_dest) {
+ // Pin the "finished" array node after the arraycopy/zeroing operations.
+ // Use a secondary InitializeNode memory barrier.
+ InitializeNode* init = insert_mem_bar_volatile(Op_Initialize,
+ Compile::AliasIdxRaw,
+ raw_dest)->as_Initialize();
+ init->set_complete(&_gvn); // (there is no corresponding AllocateNode)
+ _gvn.hash_delete(original_dest);
+ original_dest->set_req(0, control());
+ _gvn.hash_find_insert(original_dest); // put back into GVN table
+ }
+
+ // The memory edges above are precise in order to model effects around
+ // array copyies accurately to allow value numbering of field loads around
+ // arraycopy. Such field loads, both before and after, are common in Java
+ // collections and similar classes involving header/array data structures.
+ //
+ // But with low number of register or when some registers are used or killed
+ // by arraycopy calls it causes registers spilling on stack. See 6544710.
+ // The next memory barrier is added to avoid it. If the arraycopy can be
+ // optimized away (which it can, sometimes) then we can manually remove
+ // the membar also.
+ if (InsertMemBarAfterArraycopy)
+ insert_mem_bar(Op_MemBarCPUOrder);
+}
+
+
+// Helper function which determines if an arraycopy immediately follows
+// an allocation, with no intervening tests or other escapes for the object.
+AllocateArrayNode*
+LibraryCallKit::tightly_coupled_allocation(Node* ptr,
+ RegionNode* slow_region) {
+ if (stopped()) return NULL; // no fast path
+ if (C->AliasLevel() == 0) return NULL; // no MergeMems around
+
+ AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(ptr, &_gvn);
+ if (alloc == NULL) return NULL;
+
+ Node* rawmem = memory(Compile::AliasIdxRaw);
+ // Is the allocation's memory state untouched?
+ if (!(rawmem->is_Proj() && rawmem->in(0)->is_Initialize())) {
+ // Bail out if there have been raw-memory effects since the allocation.
+ // (Example: There might have been a call or safepoint.)
+ return NULL;
+ }
+ rawmem = rawmem->in(0)->as_Initialize()->memory(Compile::AliasIdxRaw);
+ if (!(rawmem->is_Proj() && rawmem->in(0) == alloc)) {
+ return NULL;
+ }
+
+ // There must be no unexpected observers of this allocation.
+ for (DUIterator_Fast imax, i = ptr->fast_outs(imax); i < imax; i++) {
+ Node* obs = ptr->fast_out(i);
+ if (obs != this->map()) {
+ return NULL;
+ }
+ }
+
+ // This arraycopy must unconditionally follow the allocation of the ptr.
+ Node* alloc_ctl = ptr->in(0);
+ assert(just_allocated_object(alloc_ctl) == ptr, "most recent allo");
+
+ Node* ctl = control();
+ while (ctl != alloc_ctl) {
+ // There may be guards which feed into the slow_region.
+ // Any other control flow means that we might not get a chance
+ // to finish initializing the allocated object.
+ if ((ctl->is_IfFalse() || ctl->is_IfTrue()) && ctl->in(0)->is_If()) {
+ IfNode* iff = ctl->in(0)->as_If();
+ Node* not_ctl = iff->proj_out(1 - ctl->as_Proj()->_con);
+ assert(not_ctl != NULL && not_ctl != ctl, "found alternate");
+ if (slow_region != NULL && slow_region->find_edge(not_ctl) >= 1) {
+ ctl = iff->in(0); // This test feeds the known slow_region.
+ continue;
+ }
+ // One more try: Various low-level checks bottom out in
+ // uncommon traps. If the debug-info of the trap omits
+ // any reference to the allocation, as we've already
+ // observed, then there can be no objection to the trap.
+ bool found_trap = false;
+ for (DUIterator_Fast jmax, j = not_ctl->fast_outs(jmax); j < jmax; j++) {
+ Node* obs = not_ctl->fast_out(j);
+ if (obs->in(0) == not_ctl && obs->is_Call() &&
+ (obs->as_Call()->entry_point() ==
+ SharedRuntime::uncommon_trap_blob()->instructions_begin())) {
+ found_trap = true; break;
+ }
+ }
+ if (found_trap) {
+ ctl = iff->in(0); // This test feeds a harmless uncommon trap.
+ continue;
+ }
+ }
+ return NULL;
+ }
+
+ // If we get this far, we have an allocation which immediately
+ // precedes the arraycopy, and we can take over zeroing the new object.
+ // The arraycopy will finish the initialization, and provide
+ // a new control state to which we will anchor the destination pointer.
+
+ return alloc;
+}
+
+// Helper for initialization of arrays, creating a ClearArray.
+// It writes zero bits in [start..end), within the body of an array object.
+// The memory effects are all chained onto the 'adr_type' alias category.
+//
+// Since the object is otherwise uninitialized, we are free
+// to put a little "slop" around the edges of the cleared area,
+// as long as it does not go back into the array's header,
+// or beyond the array end within the heap.
+//
+// The lower edge can be rounded down to the nearest jint and the
+// upper edge can be rounded up to the nearest MinObjAlignmentInBytes.
+//
+// Arguments:
+// adr_type memory slice where writes are generated
+// dest oop of the destination array
+// basic_elem_type element type of the destination
+// slice_idx array index of first element to store
+// slice_len number of elements to store (or NULL)
+// dest_size total size in bytes of the array object
+//
+// Exactly one of slice_len or dest_size must be non-NULL.
+// If dest_size is non-NULL, zeroing extends to the end of the object.
+// If slice_len is non-NULL, the slice_idx value must be a constant.
+void
+LibraryCallKit::generate_clear_array(const TypePtr* adr_type,
+ Node* dest,
+ BasicType basic_elem_type,
+ Node* slice_idx,
+ Node* slice_len,
+ Node* dest_size) {
+ // one or the other but not both of slice_len and dest_size:
+ assert((slice_len != NULL? 1: 0) + (dest_size != NULL? 1: 0) == 1, "");
+ if (slice_len == NULL) slice_len = top();
+ if (dest_size == NULL) dest_size = top();
+
+ // operate on this memory slice:
+ Node* mem = memory(adr_type); // memory slice to operate on
+
+ // scaling and rounding of indexes:
+ int scale = exact_log2(type2aelembytes[basic_elem_type]);
+ int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
+ int clear_low = (-1 << scale) & (BytesPerInt - 1);
+ int bump_bit = (-1 << scale) & BytesPerInt;
+
+ // determine constant starts and ends
+ const intptr_t BIG_NEG = -128;
+ assert(BIG_NEG + 2*abase < 0, "neg enough");
+ intptr_t slice_idx_con = (intptr_t) find_int_con(slice_idx, BIG_NEG);
+ intptr_t slice_len_con = (intptr_t) find_int_con(slice_len, BIG_NEG);
+ if (slice_len_con == 0) {
+ return; // nothing to do here
+ }
+ intptr_t start_con = (abase + (slice_idx_con << scale)) & ~clear_low;
+ intptr_t end_con = find_intptr_t_con(dest_size, -1);
+ if (slice_idx_con >= 0 && slice_len_con >= 0) {
+ assert(end_con < 0, "not two cons");
+ end_con = round_to(abase + ((slice_idx_con + slice_len_con) << scale),
+ BytesPerLong);
+ }
+
+ if (start_con >= 0 && end_con >= 0) {
+ // Constant start and end. Simple.
+ mem = ClearArrayNode::clear_memory(control(), mem, dest,
+ start_con, end_con, &_gvn);
+ } else if (start_con >= 0 && dest_size != top()) {
+ // Constant start, pre-rounded end after the tail of the array.
+ Node* end = dest_size;
+ mem = ClearArrayNode::clear_memory(control(), mem, dest,
+ start_con, end, &_gvn);
+ } else if (start_con >= 0 && slice_len != top()) {
+ // Constant start, non-constant end. End needs rounding up.
+ // End offset = round_up(abase + ((slice_idx_con + slice_len) << scale), 8)
+ intptr_t end_base = abase + (slice_idx_con << scale);
+ int end_round = (-1 << scale) & (BytesPerLong - 1);
+ Node* end = ConvI2X(slice_len);
+ if (scale != 0)
+ end = _gvn.transform( new(C,3) LShiftXNode(end, intcon(scale) ));
+ end_base += end_round;
+ end = _gvn.transform( new(C,3) AddXNode(end, MakeConX(end_base)) );
+ end = _gvn.transform( new(C,3) AndXNode(end, MakeConX(~end_round)) );
+ mem = ClearArrayNode::clear_memory(control(), mem, dest,
+ start_con, end, &_gvn);
+ } else if (start_con < 0 && dest_size != top()) {
+ // Non-constant start, pre-rounded end after the tail of the array.
+ // This is almost certainly a "round-to-end" operation.
+ Node* start = slice_idx;
+ start = ConvI2X(start);
+ if (scale != 0)
+ start = _gvn.transform( new(C,3) LShiftXNode( start, intcon(scale) ));
+ start = _gvn.transform( new(C,3) AddXNode(start, MakeConX(abase)) );
+ if ((bump_bit | clear_low) != 0) {
+ int to_clear = (bump_bit | clear_low);
+ // Align up mod 8, then store a jint zero unconditionally
+ // just before the mod-8 boundary.
+ // This would only fail if the first array element were immediately
+ // after the length field, and were also at an even offset mod 8.
+ assert(((abase + bump_bit) & ~to_clear) - BytesPerInt
+ >= arrayOopDesc::length_offset_in_bytes() + BytesPerInt,
+ "store must not trash length field");
+
+ // Bump 'start' up to (or past) the next jint boundary:
+ start = _gvn.transform( new(C,3) AddXNode(start, MakeConX(bump_bit)) );
+ // Round bumped 'start' down to jlong boundary in body of array.
+ start = _gvn.transform( new(C,3) AndXNode(start, MakeConX(~to_clear)) );
+ // Store a zero to the immediately preceding jint:
+ Node* x1 = _gvn.transform( new(C,3) AddXNode(start, MakeConX(-BytesPerInt)) );
+ Node* p1 = basic_plus_adr(dest, x1);
+ mem = StoreNode::make(C, control(), mem, p1, adr_type, intcon(0), T_INT);
+ mem = _gvn.transform(mem);
+ }
+
+ Node* end = dest_size; // pre-rounded
+ mem = ClearArrayNode::clear_memory(control(), mem, dest,
+ start, end, &_gvn);
+ } else {
+ // Non-constant start, unrounded non-constant end.
+ // (Nobody zeroes a random midsection of an array using this routine.)
+ ShouldNotReachHere(); // fix caller
+ }
+
+ // Done.
+ set_memory(mem, adr_type);
+}
+
+
+bool
+LibraryCallKit::generate_block_arraycopy(const TypePtr* adr_type,
+ BasicType basic_elem_type,
+ AllocateNode* alloc,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* dest_size) {
+ // See if there is an advantage from block transfer.
+ int scale = exact_log2(type2aelembytes[basic_elem_type]);
+ if (scale >= LogBytesPerLong)
+ return false; // it is already a block transfer
+
+ // Look at the alignment of the starting offsets.
+ int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
+ const intptr_t BIG_NEG = -128;
+ assert(BIG_NEG + 2*abase < 0, "neg enough");
+
+ intptr_t src_off = abase + ((intptr_t) find_int_con(src_offset, -1) << scale);
+ intptr_t dest_off = abase + ((intptr_t) find_int_con(dest_offset, -1) << scale);
+ if (src_off < 0 || dest_off < 0)
+ // At present, we can only understand constants.
+ return false;
+
+ if (((src_off | dest_off) & (BytesPerLong-1)) != 0) {
+ // Non-aligned; too bad.
+ // One more chance: Pick off an initial 32-bit word.
+ // This is a common case, since abase can be odd mod 8.
+ if (((src_off | dest_off) & (BytesPerLong-1)) == BytesPerInt &&
+ ((src_off ^ dest_off) & (BytesPerLong-1)) == 0) {
+ Node* sptr = basic_plus_adr(src, src_off);
+ Node* dptr = basic_plus_adr(dest, dest_off);
+ Node* sval = make_load(control(), sptr, TypeInt::INT, T_INT, adr_type);
+ store_to_memory(control(), dptr, sval, T_INT, adr_type);
+ src_off += BytesPerInt;
+ dest_off += BytesPerInt;
+ } else {
+ return false;
+ }
+ }
+ assert(src_off % BytesPerLong == 0, "");
+ assert(dest_off % BytesPerLong == 0, "");
+
+ // Do this copy by giant steps.
+ Node* sptr = basic_plus_adr(src, src_off);
+ Node* dptr = basic_plus_adr(dest, dest_off);
+ Node* countx = dest_size;
+ countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(dest_off)) );
+ countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong)) );
+
+ bool disjoint_bases = true; // since alloc != NULL
+ generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
+ sptr, NULL, dptr, NULL, countx);
+
+ return true;
+}
+
+
+// Helper function; generates code for the slow case.
+// We make a call to a runtime method which emulates the native method,
+// but without the native wrapper overhead.
+void
+LibraryCallKit::generate_slow_arraycopy(const TypePtr* adr_type,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length,
+ int nargs) {
+ _sp += nargs; // any deopt will start just before call to enclosing method
+ Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON,
+ OptoRuntime::slow_arraycopy_Type(),
+ OptoRuntime::slow_arraycopy_Java(),
+ "slow_arraycopy", adr_type,
+ src, src_offset, dest, dest_offset,
+ copy_length);
+ _sp -= nargs;
+
+ // Handle exceptions thrown by this fellow:
+ make_slow_call_ex(call, env()->Throwable_klass(), false);
+}
+
+// Helper function; generates code for cases requiring runtime checks.
+Node*
+LibraryCallKit::generate_checkcast_arraycopy(const TypePtr* adr_type,
+ Node* dest_elem_klass,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length,
+ int nargs) {
+ if (stopped()) return NULL;
+
+ address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+ if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
+ return NULL;
+ }
+
+ // Pick out the parameters required to perform a store-check
+ // for the target array. This is an optimistic check. It will
+ // look in each non-null element's class, at the desired klass's
+ // super_check_offset, for the desired klass.
+ int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc);
+ Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
+ Node* n3 = new(C, 3) LoadINode(NULL, immutable_memory(), p3, TypeRawPtr::BOTTOM);
+ Node* check_offset = _gvn.transform(n3);
+ Node* check_value = dest_elem_klass;
+
+ Node* src_start = array_element_address(src, src_offset, T_OBJECT);
+ Node* dest_start = array_element_address(dest, dest_offset, T_OBJECT);
+
+ // (We know the arrays are never conjoint, because their types differ.)
+ Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::checkcast_arraycopy_Type(),
+ copyfunc_addr, "checkcast_arraycopy", adr_type,
+ // five arguments, of which two are
+ // intptr_t (jlong in LP64)
+ src_start, dest_start,
+ copy_length XTOP,
+ check_offset XTOP,
+ check_value);
+
+ return _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
+}
+
+
+// Helper function; generates code for cases requiring runtime checks.
+Node*
+LibraryCallKit::generate_generic_arraycopy(const TypePtr* adr_type,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length,
+ int nargs) {
+ if (stopped()) return NULL;
+
+ address copyfunc_addr = StubRoutines::generic_arraycopy();
+ if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
+ return NULL;
+ }
+
+ Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::generic_arraycopy_Type(),
+ copyfunc_addr, "generic_arraycopy", adr_type,
+ src, src_offset, dest, dest_offset, copy_length);
+
+ return _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
+}
+
+// Helper function; generates the fast out-of-line call to an arraycopy stub.
+void
+LibraryCallKit::generate_unchecked_arraycopy(const TypePtr* adr_type,
+ BasicType basic_elem_type,
+ bool disjoint_bases,
+ Node* src, Node* src_offset,
+ Node* dest, Node* dest_offset,
+ Node* copy_length) {
+ if (stopped()) return; // nothing to do
+
+ Node* src_start = src;
+ Node* dest_start = dest;
+ if (src_offset != NULL || dest_offset != NULL) {
+ assert(src_offset != NULL && dest_offset != NULL, "");
+ src_start = array_element_address(src, src_offset, basic_elem_type);
+ dest_start = array_element_address(dest, dest_offset, basic_elem_type);
+ }
+
+ // Figure out which arraycopy runtime method to call.
+ const char* copyfunc_name = "arraycopy";
+ address copyfunc_addr =
+ basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
+ disjoint_bases, copyfunc_name);
+
+ // Call it. Note that the count_ix value is not scaled to a byte-size.
+ make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::fast_arraycopy_Type(),
+ copyfunc_addr, copyfunc_name, adr_type,
+ src_start, dest_start, copy_length XTOP);
+}
diff --git a/src/share/vm/opto/live.cpp b/src/share/vm/opto/live.cpp
new file mode 100644
index 000000000..4127f67e1
--- /dev/null
+++ b/src/share/vm/opto/live.cpp
@@ -0,0 +1,314 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_live.cpp.incl"
+
+
+
+//=============================================================================
+//------------------------------PhaseLive--------------------------------------
+// Compute live-in/live-out. We use a totally incremental algorithm. The LIVE
+// problem is monotonic. The steady-state solution looks like this: pull a
+// block from the worklist. It has a set of delta's - values which are newly
+// live-in from the block. Push these to the live-out sets of all predecessor
+// blocks. At each predecessor, the new live-out values are ANDed with what is
+// already live-out (extra stuff is added to the live-out sets). Then the
+// remaining new live-out values are ANDed with what is locally defined.
+// Leftover bits become the new live-in for the predecessor block, and the pred
+// block is put on the worklist.
+// The locally live-in stuff is computed once and added to predecessor
+// live-out sets. This seperate compilation is done in the outer loop below.
+PhaseLive::PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena ) : Phase(LIVE), _cfg(cfg), _names(names), _arena(arena), _live(0) {
+}
+
+void PhaseLive::compute(uint maxlrg) {
+ _maxlrg = maxlrg;
+ _worklist = new (_arena) Block_List();
+
+ // Init the sparse live arrays. This data is live on exit from here!
+ // The _live info is the live-out info.
+ _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
+ uint i;
+ for( i=0; i<_cfg._num_blocks; i++ ) {
+ _live[i].initialize(_maxlrg);
+ }
+
+ // Init the sparse arrays for delta-sets.
+ ResourceMark rm; // Nuke temp storage on exit
+
+ // Does the memory used by _defs and _deltas get reclaimed? Does it matter? TT
+
+ // Array of values defined locally in blocks
+ _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
+ for( i=0; i<_cfg._num_blocks; i++ ) {
+ _defs[i].initialize(_maxlrg);
+ }
+
+ // Array of delta-set pointers, indexed by block pre_order-1.
+ _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
+ memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);
+
+ _free_IndexSet = NULL;
+
+ // Blocks having done pass-1
+ VectorSet first_pass(Thread::current()->resource_area());
+
+ // Outer loop: must compute local live-in sets and push into predecessors.
+ uint iters = _cfg._num_blocks; // stat counters
+ for( uint j=_cfg._num_blocks; j>0; j-- ) {
+ Block *b = _cfg._blocks[j-1];
+
+ // Compute the local live-in set. Start with any new live-out bits.
+ IndexSet *use = getset( b );
+ IndexSet *def = &_defs[b->_pre_order-1];
+ DEBUG_ONLY(IndexSet *def_outside = getfreeset();)
+ uint i;
+ for( i=b->_nodes.size(); i>1; i-- ) {
+ Node *n = b->_nodes[i-1];
+ if( n->is_Phi() ) break;
+
+ uint r = _names[n->_idx];
+ assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
+ def->insert( r );
+ use->remove( r );
+ uint cnt = n->req();
+ for( uint k=1; k<cnt; k++ ) {
+ Node *nk = n->in(k);
+ uint nkidx = nk->_idx;
+ if( _cfg._bbs[nkidx] != b ) {
+ uint u = _names[nkidx];
+ use->insert( u );
+ DEBUG_ONLY(def_outside->insert( u );)
+ }
+ }
+ }
+#ifdef ASSERT
+ def_outside->set_next(_free_IndexSet);
+ _free_IndexSet = def_outside; // Drop onto free list
+#endif
+ // Remove anything defined by Phis and the block start instruction
+ for( uint k=i; k>0; k-- ) {
+ uint r = _names[b->_nodes[k-1]->_idx];
+ def->insert( r );
+ use->remove( r );
+ }
+
+ // Push these live-in things to predecessors
+ for( uint l=1; l<b->num_preds(); l++ ) {
+ Block *p = _cfg._bbs[b->pred(l)->_idx];
+ add_liveout( p, use, first_pass );
+
+ // PhiNode uses go in the live-out set of prior blocks.
+ for( uint k=i; k>0; k-- )
+ add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
+ }
+ freeset( b );
+ first_pass.set(b->_pre_order);
+
+ // Inner loop: blocks that picked up new live-out values to be propagated
+ while( _worklist->size() ) {
+ // !!!!!
+// #ifdef ASSERT
+ iters++;
+// #endif
+ Block *b = _worklist->pop();
+ IndexSet *delta = getset(b);
+ assert( delta->count(), "missing delta set" );
+
+ // Add new-live-in to predecessors live-out sets
+ for( uint l=1; l<b->num_preds(); l++ )
+ add_liveout( _cfg._bbs[b->pred(l)->_idx], delta, first_pass );
+
+ freeset(b);
+ } // End of while-worklist-not-empty
+
+ } // End of for-all-blocks-outer-loop
+
+ // We explicitly clear all of the IndexSets which we are about to release.
+ // This allows us to recycle their internal memory into IndexSet's free list.
+
+ for( i=0; i<_cfg._num_blocks; i++ ) {
+ _defs[i].clear();
+ if (_deltas[i]) {
+ // Is this always true?
+ _deltas[i]->clear();
+ }
+ }
+ IndexSet *free = _free_IndexSet;
+ while (free != NULL) {
+ IndexSet *temp = free;
+ free = free->next();
+ temp->clear();
+ }
+
+}
+
+//------------------------------stats------------------------------------------
+#ifndef PRODUCT
+void PhaseLive::stats(uint iters) const {
+}
+#endif
+
+//------------------------------getset-----------------------------------------
+// Get an IndexSet for a block. Return existing one, if any. Make a new
+// empty one if a prior one does not exist.
+IndexSet *PhaseLive::getset( Block *p ) {
+ IndexSet *delta = _deltas[p->_pre_order-1];
+ if( !delta ) // Not on worklist?
+ // Get a free set; flag as being on worklist
+ delta = _deltas[p->_pre_order-1] = getfreeset();
+ return delta; // Return set of new live-out items
+}
+
+//------------------------------getfreeset-------------------------------------
+// Pull from free list, or allocate. Internal allocation on the returned set
+// is always from thread local storage.
+IndexSet *PhaseLive::getfreeset( ) {
+ IndexSet *f = _free_IndexSet;
+ if( !f ) {
+ f = new IndexSet;
+// f->set_arena(Thread::current()->resource_area());
+ f->initialize(_maxlrg, Thread::current()->resource_area());
+ } else {
+ // Pull from free list
+ _free_IndexSet = f->next();
+ //f->_cnt = 0; // Reset to empty
+// f->set_arena(Thread::current()->resource_area());
+ f->initialize(_maxlrg, Thread::current()->resource_area());
+ }
+ return f;
+}
+
+//------------------------------freeset----------------------------------------
+// Free an IndexSet from a block.
+void PhaseLive::freeset( const Block *p ) {
+ IndexSet *f = _deltas[p->_pre_order-1];
+ f->set_next(_free_IndexSet);
+ _free_IndexSet = f; // Drop onto free list
+ _deltas[p->_pre_order-1] = NULL;
+}
+
+//------------------------------add_liveout------------------------------------
+// Add a live-out value to a given blocks live-out set. If it is new, then
+// also add it to the delta set and stick the block on the worklist.
+void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
+ IndexSet *live = &_live[p->_pre_order-1];
+ if( live->insert(r) ) { // If actually inserted...
+ // We extended the live-out set. See if the value is generated locally.
+ // If it is not, then we must extend the live-in set.
+ if( !_defs[p->_pre_order-1].member( r ) ) {
+ if( !_deltas[p->_pre_order-1] && // Not on worklist?
+ first_pass.test(p->_pre_order) )
+ _worklist->push(p); // Actually go on worklist if already 1st pass
+ getset(p)->insert(r);
+ }
+ }
+}
+
+
+//------------------------------add_liveout------------------------------------
+// Add a vector of live-out values to a given blocks live-out set.
+void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
+ IndexSet *live = &_live[p->_pre_order-1];
+ IndexSet *defs = &_defs[p->_pre_order-1];
+ IndexSet *on_worklist = _deltas[p->_pre_order-1];
+ IndexSet *delta = on_worklist ? on_worklist : getfreeset();
+
+ IndexSetIterator elements(lo);
+ uint r;
+ while ((r = elements.next()) != 0) {
+ if( live->insert(r) && // If actually inserted...
+ !defs->member( r ) ) // and not defined locally
+ delta->insert(r); // Then add to live-in set
+ }
+
+ if( delta->count() ) { // If actually added things
+ _deltas[p->_pre_order-1] = delta; // Flag as on worklist now
+ if( !on_worklist && // Not on worklist?
+ first_pass.test(p->_pre_order) )
+ _worklist->push(p); // Actually go on worklist if already 1st pass
+ } else { // Nothing there; just free it
+ delta->set_next(_free_IndexSet);
+ _free_IndexSet = delta; // Drop onto free list
+ }
+}
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+// Dump the live-out set for a block
+void PhaseLive::dump( const Block *b ) const {
+ tty->print("Block %d: ",b->_pre_order);
+ tty->print("LiveOut: "); _live[b->_pre_order-1].dump();
+ uint cnt = b->_nodes.size();
+ for( uint i=0; i<cnt; i++ ) {
+ tty->print("L%d/", _names[b->_nodes[i]->_idx] );
+ b->_nodes[i]->dump();
+ }
+ tty->print("\n");
+}
+
+//------------------------------verify_base_ptrs-------------------------------
+// Verify that base pointers and derived pointers are still sane.
+// Basically, if a derived pointer is live at a safepoint, then its
+// base pointer must be live also.
+void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ Block *b = _cfg._blocks[i];
+ for( uint j = b->end_idx() + 1; j > 1; j-- ) {
+ Node *n = b->_nodes[j-1];
+ if( n->is_Phi() ) break;
+ // Found a safepoint?
+ if( n->is_MachSafePoint() ) {
+ MachSafePointNode *sfpt = n->as_MachSafePoint();
+ JVMState* jvms = sfpt->jvms();
+ if (jvms != NULL) {
+ // Now scan for a live derived pointer
+ if (jvms->oopoff() < sfpt->req()) {
+ // Check each derived/base pair
+ for (uint idx = jvms->oopoff(); idx < sfpt->req(); idx += 2) {
+ Node *check = sfpt->in(idx);
+ uint j = 0;
+ // search upwards through spills and spill phis for AddP
+ while(true) {
+ if( !check ) break;
+ int idx = check->is_Copy();
+ if( idx ) {
+ check = check->in(idx);
+ } else if( check->is_Phi() && check->_idx >= _oldphi ) {
+ check = check->in(1);
+ } else
+ break;
+ j++;
+ assert(j < 100000,"Derived pointer checking in infinite loop");
+ } // End while
+ assert(check->is_Mach() && check->as_Mach()->ideal_Opcode() == Op_AddP,"Bad derived pointer")
+ }
+ } // End of check for derived pointers
+ } // End of Kcheck for debug info
+ } // End of if found a safepoint
+ } // End of forall instructions in block
+ } // End of forall blocks
+}
+#endif
diff --git a/src/share/vm/opto/live.hpp b/src/share/vm/opto/live.hpp
new file mode 100644
index 000000000..886f28f57
--- /dev/null
+++ b/src/share/vm/opto/live.hpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Block;
+class LRG_List;
+class PhaseCFG;
+class VectorSet;
+class IndexSet;
+
+//------------------------------PhaseLive--------------------------------------
+// Compute live-in/live-out
+class PhaseLive : public Phase {
+ // Array of Sets of values live at the start of a block.
+ // Indexed by block pre-order number.
+ IndexSet *_live;
+
+ // Array of Sets of values defined locally in the block
+ // Indexed by block pre-order number.
+ IndexSet *_defs;
+
+ // Array of delta-set pointers, indexed by block pre-order number
+ IndexSet **_deltas;
+ IndexSet *_free_IndexSet; // Free list of same
+
+ Block_List *_worklist; // Worklist for iterative solution
+
+ const PhaseCFG &_cfg; // Basic blocks
+ LRG_List &_names; // Mapping from Nodes to live ranges
+ uint _maxlrg; // Largest live-range number
+ Arena *_arena;
+
+ IndexSet *getset( Block *p );
+ IndexSet *getfreeset( );
+ void freeset( const Block *p );
+ void add_liveout( Block *p, uint r, VectorSet &first_pass );
+ void add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass );
+
+public:
+ PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena );
+ ~PhaseLive() {}
+ // Compute liveness info
+ void compute(uint maxlrg);
+ // Reset arena storage
+ void reset() { _live = NULL; }
+
+ // Return the live-out set for this block
+ IndexSet *live( const Block * b ) { return &_live[b->_pre_order-1]; }
+
+#ifndef PRODUCT
+ void dump( const Block *b ) const;
+ void stats(uint iters) const;
+#endif
+};
diff --git a/src/share/vm/opto/locknode.cpp b/src/share/vm/opto/locknode.cpp
new file mode 100644
index 000000000..90da8efaa
--- /dev/null
+++ b/src/share/vm/opto/locknode.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_locknode.cpp.incl"
+
+//=============================================================================
+const RegMask &BoxLockNode::in_RegMask(uint i) const {
+ return _inmask;
+}
+
+const RegMask &BoxLockNode::out_RegMask() const {
+ return *Matcher::idealreg2regmask[Op_RegP];
+}
+
+uint BoxLockNode::size_of() const { return sizeof(*this); }
+
+BoxLockNode::BoxLockNode( int slot ) : Node( Compile::current()->root() ), _slot(slot) {
+ init_class_id(Class_BoxLock);
+ init_flags(Flag_rematerialize);
+ OptoReg::Name reg = OptoReg::stack2reg(_slot);
+ _inmask.Insert(reg);
+}
+
+//------------------------------cmp--------------------------------------------
+uint BoxLockNode::cmp( const Node &n ) const {
+ const BoxLockNode &bn = (const BoxLockNode &)n;
+ return bn._slot == _slot;
+}
+
+OptoReg::Name BoxLockNode::stack_slot(Node* box_node) {
+ // Chase down the BoxNode
+ while (!box_node->is_BoxLock()) {
+ // if (box_node->is_SpillCopy()) {
+ // Node *m = box_node->in(1);
+ // if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_StoreP) {
+ // box_node = m->in(m->as_Mach()->operand_index(2));
+ // continue;
+ // }
+ // }
+ assert(box_node->is_SpillCopy() || box_node->is_Phi(), "Bad spill of Lock.");
+ box_node = box_node->in(1);
+ }
+ return box_node->in_RegMask(0).find_first_elem();
+}
+
+//=============================================================================
+//-----------------------------hash--------------------------------------------
+uint FastLockNode::hash() const { return NO_HASH; }
+
+//------------------------------cmp--------------------------------------------
+uint FastLockNode::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+//=============================================================================
+//-----------------------------hash--------------------------------------------
+uint FastUnlockNode::hash() const { return NO_HASH; }
+
+//------------------------------cmp--------------------------------------------
+uint FastUnlockNode::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+//
+// Create a counter which counts the number of times this lock is acquired
+//
+void FastLockNode::create_lock_counter(JVMState* state) {
+ BiasedLockingNamedCounter* blnc = (BiasedLockingNamedCounter*)
+ OptoRuntime::new_named_counter(state, NamedCounter::BiasedLockingCounter);
+ _counters = blnc->counters();
+}
+
+//=============================================================================
+//------------------------------do_monitor_enter-------------------------------
+void Parse::do_monitor_enter() {
+ kill_dead_locals();
+
+ // Null check; get casted pointer.
+ Node *obj = do_null_check(peek(), T_OBJECT);
+ // Check for locking null object
+ if (stopped()) return;
+
+ // the monitor object is not part of debug info expression stack
+ pop();
+
+ // Insert a FastLockNode which takes as arguments the current thread pointer,
+ // the obj pointer & the address of the stack slot pair used for the lock.
+ shared_lock(obj);
+}
+
+//------------------------------do_monitor_exit--------------------------------
+void Parse::do_monitor_exit() {
+ kill_dead_locals();
+
+ pop(); // Pop oop to unlock
+ // Because monitors are guarenteed paired (else we bail out), we know
+ // the matching Lock for this Unlock. Hence we know there is no need
+ // for a null check on Unlock.
+ shared_unlock(map()->peek_monitor_box(), map()->peek_monitor_obj());
+}
diff --git a/src/share/vm/opto/locknode.hpp b/src/share/vm/opto/locknode.hpp
new file mode 100644
index 000000000..6b1a8883c
--- /dev/null
+++ b/src/share/vm/opto/locknode.hpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------BoxLockNode------------------------------------
+class BoxLockNode : public Node {
+public:
+ const int _slot;
+ RegMask _inmask;
+
+ BoxLockNode( int lock );
+ virtual int Opcode() const;
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual uint size(PhaseRegAlloc *ra_) const;
+ virtual const RegMask &in_RegMask(uint) const;
+ virtual const RegMask &out_RegMask() const;
+ virtual uint size_of() const;
+ virtual uint hash() const { return Node::hash() + _slot; }
+ virtual uint cmp( const Node &n ) const;
+ virtual const class Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
+ virtual uint ideal_reg() const { return Op_RegP; }
+
+ static OptoReg::Name stack_slot(Node* box_node);
+
+#ifndef PRODUCT
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+ virtual void dump_spec(outputStream *st) const { st->print(" Lock %d",_slot); }
+#endif
+};
+
+//------------------------------FastLockNode-----------------------------------
+class FastLockNode: public CmpNode {
+private:
+ BiasedLockingCounters* _counters;
+
+public:
+ FastLockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) {
+ init_req(0,ctrl);
+ init_class_id(Class_FastLock);
+ _counters = NULL;
+ }
+ Node* obj_node() const { return in(1); }
+ Node* box_node() const { return in(2); }
+
+ // FastLock and FastUnlockNode do not hash, we need one for each correspoding
+ // LockNode/UnLockNode to avoid creating Phi's.
+ virtual uint hash() const ; // { return NO_HASH; }
+ virtual uint cmp( const Node &n ) const ; // Always fail, except on self
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; }
+ const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
+
+ void create_lock_counter(JVMState* s);
+ BiasedLockingCounters* counters() const { return _counters; }
+};
+
+
+//------------------------------FastUnlockNode---------------------------------
+class FastUnlockNode: public CmpNode {
+public:
+ FastUnlockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) {
+ init_req(0,ctrl);
+ init_class_id(Class_FastUnlock);
+ }
+ Node* obj_node() const { return in(1); }
+ Node* box_node() const { return in(2); }
+
+
+ // FastLock and FastUnlockNode do not hash, we need one for each correspoding
+ // LockNode/UnLockNode to avoid creating Phi's.
+ virtual uint hash() const ; // { return NO_HASH; }
+ virtual uint cmp( const Node &n ) const ; // Always fail, except on self
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; }
+ const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
+
+};
diff --git a/src/share/vm/opto/loopTransform.cpp b/src/share/vm/opto/loopTransform.cpp
new file mode 100644
index 000000000..3de4e0cd7
--- /dev/null
+++ b/src/share/vm/opto/loopTransform.cpp
@@ -0,0 +1,1729 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopTransform.cpp.incl"
+
+//------------------------------is_loop_exit-----------------------------------
+// Given an IfNode, return the loop-exiting projection or NULL if both
+// arms remain in the loop.
+Node *IdealLoopTree::is_loop_exit(Node *iff) const {
+ if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests
+ PhaseIdealLoop *phase = _phase;
+ // Test is an IfNode, has 2 projections. If BOTH are in the loop
+ // we need loop unswitching instead of peeling.
+ if( !is_member(phase->get_loop( iff->raw_out(0) )) )
+ return iff->raw_out(0);
+ if( !is_member(phase->get_loop( iff->raw_out(1) )) )
+ return iff->raw_out(1);
+ return NULL;
+}
+
+
+//=============================================================================
+
+
+//------------------------------record_for_igvn----------------------------
+// Put loop body on igvn work list
+void IdealLoopTree::record_for_igvn() {
+ for( uint i = 0; i < _body.size(); i++ ) {
+ Node *n = _body.at(i);
+ _phase->_igvn._worklist.push(n);
+ }
+}
+
+//------------------------------compute_profile_trip_cnt----------------------------
+// Compute loop trip count from profile data as
+// (backedge_count + loop_exit_count) / loop_exit_count
+void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
+ if (!_head->is_CountedLoop()) {
+ return;
+ }
+ CountedLoopNode* head = _head->as_CountedLoop();
+ if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
+ return; // Already computed
+ }
+ float trip_cnt = (float)max_jint; // default is big
+
+ Node* back = head->in(LoopNode::LoopBackControl);
+ while (back != head) {
+ if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
+ back->in(0) &&
+ back->in(0)->is_If() &&
+ back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
+ back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
+ break;
+ }
+ back = phase->idom(back);
+ }
+ if (back != head) {
+ assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
+ back->in(0), "if-projection exists");
+ IfNode* back_if = back->in(0)->as_If();
+ float loop_back_cnt = back_if->_fcnt * back_if->_prob;
+
+ // Now compute a loop exit count
+ float loop_exit_cnt = 0.0f;
+ for( uint i = 0; i < _body.size(); i++ ) {
+ Node *n = _body[i];
+ if( n->is_If() ) {
+ IfNode *iff = n->as_If();
+ if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
+ Node *exit = is_loop_exit(iff);
+ if( exit ) {
+ float exit_prob = iff->_prob;
+ if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
+ if (exit_prob > PROB_MIN) {
+ float exit_cnt = iff->_fcnt * exit_prob;
+ loop_exit_cnt += exit_cnt;
+ }
+ }
+ }
+ }
+ }
+ if (loop_exit_cnt > 0.0f) {
+ trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
+ } else {
+ // No exit count so use
+ trip_cnt = loop_back_cnt;
+ }
+ }
+#ifndef PRODUCT
+ if (TraceProfileTripCount) {
+ tty->print_cr("compute_profile_trip_cnt lp: %d cnt: %f\n", head->_idx, trip_cnt);
+ }
+#endif
+ head->set_profile_trip_cnt(trip_cnt);
+}
+
+//---------------------is_invariant_addition-----------------------------
+// Return nonzero index of invariant operand for an Add or Sub
+// of (nonconstant) invariant and variant values. Helper for reassoicate_invariants.
+int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
+ int op = n->Opcode();
+ if (op == Op_AddI || op == Op_SubI) {
+ bool in1_invar = this->is_invariant(n->in(1));
+ bool in2_invar = this->is_invariant(n->in(2));
+ if (in1_invar && !in2_invar) return 1;
+ if (!in1_invar && in2_invar) return 2;
+ }
+ return 0;
+}
+
+//---------------------reassociate_add_sub-----------------------------
+// Reassociate invariant add and subtract expressions:
+//
+// inv1 + (x + inv2) => ( inv1 + inv2) + x
+// (x + inv2) + inv1 => ( inv1 + inv2) + x
+// inv1 + (x - inv2) => ( inv1 - inv2) + x
+// inv1 - (inv2 - x) => ( inv1 - inv2) + x
+// (x + inv2) - inv1 => (-inv1 + inv2) + x
+// (x - inv2) + inv1 => ( inv1 - inv2) + x
+// (x - inv2) - inv1 => (-inv1 - inv2) + x
+// inv1 + (inv2 - x) => ( inv1 + inv2) - x
+// inv1 - (x - inv2) => ( inv1 + inv2) - x
+// (inv2 - x) + inv1 => ( inv1 + inv2) - x
+// (inv2 - x) - inv1 => (-inv1 + inv2) - x
+// inv1 - (x + inv2) => ( inv1 - inv2) - x
+//
+Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) {
+ if (!n1->is_Add() && !n1->is_Sub() || n1->outcnt() == 0) return NULL;
+ if (is_invariant(n1)) return NULL;
+ int inv1_idx = is_invariant_addition(n1, phase);
+ if (!inv1_idx) return NULL;
+ // Don't mess with add of constant (igvn moves them to expression tree root.)
+ if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
+ Node* inv1 = n1->in(inv1_idx);
+ Node* n2 = n1->in(3 - inv1_idx);
+ int inv2_idx = is_invariant_addition(n2, phase);
+ if (!inv2_idx) return NULL;
+ Node* x = n2->in(3 - inv2_idx);
+ Node* inv2 = n2->in(inv2_idx);
+
+ bool neg_x = n2->is_Sub() && inv2_idx == 1;
+ bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
+ bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
+ if (n1->is_Sub() && inv1_idx == 1) {
+ neg_x = !neg_x;
+ neg_inv2 = !neg_inv2;
+ }
+ Node* inv1_c = phase->get_ctrl(inv1);
+ Node* inv2_c = phase->get_ctrl(inv2);
+ Node* n_inv1;
+ if (neg_inv1) {
+ Node *zero = phase->_igvn.intcon(0);
+ phase->set_ctrl(zero, phase->C->root());
+ n_inv1 = new (phase->C, 3) SubINode(zero, inv1);
+ phase->register_new_node(n_inv1, inv1_c);
+ } else {
+ n_inv1 = inv1;
+ }
+ Node* inv;
+ if (neg_inv2) {
+ inv = new (phase->C, 3) SubINode(n_inv1, inv2);
+ } else {
+ inv = new (phase->C, 3) AddINode(n_inv1, inv2);
+ }
+ phase->register_new_node(inv, phase->get_early_ctrl(inv));
+
+ Node* addx;
+ if (neg_x) {
+ addx = new (phase->C, 3) SubINode(inv, x);
+ } else {
+ addx = new (phase->C, 3) AddINode(x, inv);
+ }
+ phase->register_new_node(addx, phase->get_ctrl(x));
+ phase->_igvn.hash_delete(n1);
+ phase->_igvn.subsume_node(n1, addx);
+ return addx;
+}
+
+//---------------------reassociate_invariants-----------------------------
+// Reassociate invariant expressions:
+void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
+ for (int i = _body.size() - 1; i >= 0; i--) {
+ Node *n = _body.at(i);
+ for (int j = 0; j < 5; j++) {
+ Node* nn = reassociate_add_sub(n, phase);
+ if (nn == NULL) break;
+ n = nn; // again
+ };
+ }
+}
+
+//------------------------------policy_peeling---------------------------------
+// Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
+// make some loop-invariant test (usually a null-check) happen before the loop.
+bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
+ Node *test = ((IdealLoopTree*)this)->tail();
+ int body_size = ((IdealLoopTree*)this)->_body.size();
+ int uniq = phase->C->unique();
+ // Peeling does loop cloning which can result in O(N^2) node construction
+ if( body_size > 255 /* Prevent overflow for large body_size */
+ || (body_size * body_size + uniq > MaxNodeLimit) ) {
+ return false; // too large to safely clone
+ }
+ while( test != _head ) { // Scan till run off top of loop
+ if( test->is_If() ) { // Test?
+ Node *ctrl = phase->get_ctrl(test->in(1));
+ if (ctrl->is_top())
+ return false; // Found dead test on live IF? No peeling!
+ // Standard IF only has one input value to check for loop invariance
+ assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+ // Condition is not a member of this loop?
+ if( !is_member(phase->get_loop(ctrl)) &&
+ is_loop_exit(test) )
+ return true; // Found reason to peel!
+ }
+ // Walk up dominators to loop _head looking for test which is
+ // executed on every path thru loop.
+ test = phase->idom(test);
+ }
+ return false;
+}
+
+//------------------------------peeled_dom_test_elim---------------------------
+// If we got the effect of peeling, either by actually peeling or by making
+// a pre-loop which must execute at least once, we can remove all
+// loop-invariant dominated tests in the main body.
+void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) {
+ bool progress = true;
+ while( progress ) {
+ progress = false; // Reset for next iteration
+ Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
+ Node *test = prev->in(0);
+ while( test != loop->_head ) { // Scan till run off top of loop
+
+ int p_op = prev->Opcode();
+ if( (p_op == Op_IfFalse || p_op == Op_IfTrue) &&
+ test->is_If() && // Test?
+ !test->in(1)->is_Con() && // And not already obvious?
+ // Condition is not a member of this loop?
+ !loop->is_member(get_loop(get_ctrl(test->in(1))))){
+ // Walk loop body looking for instances of this test
+ for( uint i = 0; i < loop->_body.size(); i++ ) {
+ Node *n = loop->_body.at(i);
+ if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) {
+ // IfNode was dominated by version in peeled loop body
+ progress = true;
+ dominated_by( old_new[prev->_idx], n );
+ }
+ }
+ }
+ prev = test;
+ test = idom(test);
+ } // End of scan tests in loop
+
+ } // End of while( progress )
+}
+
+//------------------------------do_peeling-------------------------------------
+// Peel the first iteration of the given loop.
+// Step 1: Clone the loop body. The clone becomes the peeled iteration.
+// The pre-loop illegally has 2 control users (old & new loops).
+// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
+// Do this by making the old-loop fall-in edges act as if they came
+// around the loopback from the prior iteration (follow the old-loop
+// backedges) and then map to the new peeled iteration. This leaves
+// the pre-loop with only 1 user (the new peeled iteration), but the
+// peeled-loop backedge has 2 users.
+// Step 3: Cut the backedge on the clone (so its not a loop) and remove the
+// extra backedge user.
+void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
+
+ C->set_major_progress();
+ // Peeling a 'main' loop in a pre/main/post situation obfuscates the
+ // 'pre' loop from the main and the 'pre' can no longer have it's
+ // iterations adjusted. Therefore, we need to declare this loop as
+ // no longer a 'main' loop; it will need new pre and post loops before
+ // we can do further RCE.
+ Node *h = loop->_head;
+ if( h->is_CountedLoop() ) {
+ CountedLoopNode *cl = h->as_CountedLoop();
+ assert(cl->trip_count() > 0, "peeling a fully unrolled loop");
+ cl->set_trip_count(cl->trip_count() - 1);
+ if( cl->is_main_loop() ) {
+ cl->set_normal_loop();
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) {
+ tty->print("Peeling a 'main' loop; resetting to 'normal' ");
+ loop->dump_head();
+ }
+#endif
+ }
+ }
+
+ // Step 1: Clone the loop body. The clone becomes the peeled iteration.
+ // The pre-loop illegally has 2 control users (old & new loops).
+ clone_loop( loop, old_new, dom_depth(loop->_head) );
+
+
+ // Step 2: Make the old-loop fall-in edges point to the peeled iteration.
+ // Do this by making the old-loop fall-in edges act as if they came
+ // around the loopback from the prior iteration (follow the old-loop
+ // backedges) and then map to the new peeled iteration. This leaves
+ // the pre-loop with only 1 user (the new peeled iteration), but the
+ // peeled-loop backedge has 2 users.
+ for (DUIterator_Fast jmax, j = loop->_head->fast_outs(jmax); j < jmax; j++) {
+ Node* old = loop->_head->fast_out(j);
+ if( old->in(0) == loop->_head && old->req() == 3 &&
+ (old->is_Loop() || old->is_Phi()) ) {
+ Node *new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx];
+ if( !new_exit_value ) // Backedge value is ALSO loop invariant?
+ // Then loop body backedge value remains the same.
+ new_exit_value = old->in(LoopNode::LoopBackControl);
+ _igvn.hash_delete(old);
+ old->set_req(LoopNode::EntryControl, new_exit_value);
+ }
+ }
+
+
+ // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
+ // extra backedge user.
+ Node *nnn = old_new[loop->_head->_idx];
+ _igvn.hash_delete(nnn);
+ nnn->set_req(LoopNode::LoopBackControl, C->top());
+ for (DUIterator_Fast j2max, j2 = nnn->fast_outs(j2max); j2 < j2max; j2++) {
+ Node* use = nnn->fast_out(j2);
+ if( use->in(0) == nnn && use->req() == 3 && use->is_Phi() ) {
+ _igvn.hash_delete(use);
+ use->set_req(LoopNode::LoopBackControl, C->top());
+ }
+ }
+
+
+ // Step 4: Correct dom-depth info. Set to loop-head depth.
+ int dd = dom_depth(loop->_head);
+ set_idom(loop->_head, loop->_head->in(1), dd);
+ for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
+ Node *old = loop->_body.at(j3);
+ Node *nnn = old_new[old->_idx];
+ if (!has_ctrl(nnn))
+ set_idom(nnn, idom(nnn), dd-1);
+ // While we're at it, remove any SafePoints from the peeled code
+ if( old->Opcode() == Op_SafePoint ) {
+ Node *nnn = old_new[old->_idx];
+ lazy_replace(nnn,nnn->in(TypeFunc::Control));
+ }
+ }
+
+ // Now force out all loop-invariant dominating tests. The optimizer
+ // finds some, but we _know_ they are all useless.
+ peeled_dom_test_elim(loop,old_new);
+
+ loop->record_for_igvn();
+}
+
+//------------------------------policy_maximally_unroll------------------------
+// Return exact loop trip count, or 0 if not maximally unrolling
+bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ assert( cl->is_normal_loop(), "" );
+
+ Node *init_n = cl->init_trip();
+ Node *limit_n = cl->limit();
+
+ // Non-constant bounds
+ if( init_n == NULL || !init_n->is_Con() ||
+ limit_n == NULL || !limit_n->is_Con() ||
+ // protect against stride not being a constant
+ !cl->stride_is_con() ) {
+ return false;
+ }
+ int init = init_n->get_int();
+ int limit = limit_n->get_int();
+ int span = limit - init;
+ int stride = cl->stride_con();
+
+ if (init >= limit || stride > span) {
+ // return a false (no maximally unroll) and the regular unroll/peel
+ // route will make a small mess which CCP will fold away.
+ return false;
+ }
+ uint trip_count = span/stride; // trip_count can be greater than 2 Gig.
+ assert( (int)trip_count*stride == span, "must divide evenly" );
+
+ // Real policy: if we maximally unroll, does it get too big?
+ // Allow the unrolled mess to get larger than standard loop
+ // size. After all, it will no longer be a loop.
+ uint body_size = _body.size();
+ uint unroll_limit = (uint)LoopUnrollLimit * 4;
+ assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
+ cl->set_trip_count(trip_count);
+ if( trip_count <= unroll_limit && body_size <= unroll_limit ) {
+ uint new_body_size = body_size * trip_count;
+ if (new_body_size <= unroll_limit &&
+ body_size == new_body_size / trip_count &&
+ // Unrolling can result in a large amount of node construction
+ new_body_size < MaxNodeLimit - phase->C->unique()) {
+ return true; // maximally unroll
+ }
+ }
+
+ return false; // Do not maximally unroll
+}
+
+
+//------------------------------policy_unroll----------------------------------
+// Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
+// the loop is a CountedLoop and the body is small enough.
+bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
+
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ assert( cl->is_normal_loop() || cl->is_main_loop(), "" );
+
+ // protect against stride not being a constant
+ if( !cl->stride_is_con() ) return false;
+
+ // protect against over-unrolling
+ if( cl->trip_count() <= 1 ) return false;
+
+ int future_unroll_ct = cl->unrolled_count() * 2;
+
+ // Don't unroll if the next round of unrolling would push us
+ // over the expected trip count of the loop. One is subtracted
+ // from the expected trip count because the pre-loop normally
+ // executes 1 iteration.
+ if (UnrollLimitForProfileCheck > 0 &&
+ cl->profile_trip_cnt() != COUNT_UNKNOWN &&
+ future_unroll_ct > UnrollLimitForProfileCheck &&
+ (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
+ return false;
+ }
+
+ // When unroll count is greater than LoopUnrollMin, don't unroll if:
+ // the residual iterations are more than 10% of the trip count
+ // and rounds of "unroll,optimize" are not making significant progress
+ // Progress defined as current size less than 20% larger than previous size.
+ if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
+ future_unroll_ct > LoopUnrollMin &&
+ (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() &&
+ 1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
+ return false;
+ }
+
+ Node *init_n = cl->init_trip();
+ Node *limit_n = cl->limit();
+ // Non-constant bounds.
+ // Protect against over-unrolling when init or/and limit are not constant
+ // (so that trip_count's init value is maxint) but iv range is known.
+ if( init_n == NULL || !init_n->is_Con() ||
+ limit_n == NULL || !limit_n->is_Con() ) {
+ Node* phi = cl->phi();
+ if( phi != NULL ) {
+ assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
+ const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
+ int next_stride = cl->stride_con() * 2; // stride after this unroll
+ if( next_stride > 0 ) {
+ if( iv_type->_lo + next_stride <= iv_type->_lo || // overflow
+ iv_type->_lo + next_stride > iv_type->_hi ) {
+ return false; // over-unrolling
+ }
+ } else if( next_stride < 0 ) {
+ if( iv_type->_hi + next_stride >= iv_type->_hi || // overflow
+ iv_type->_hi + next_stride < iv_type->_lo ) {
+ return false; // over-unrolling
+ }
+ }
+ }
+ }
+
+ // Adjust body_size to determine if we unroll or not
+ uint body_size = _body.size();
+ // Key test to unroll CaffeineMark's Logic test
+ int xors_in_loop = 0;
+ // Also count ModL, DivL and MulL which expand mightly
+ for( uint k = 0; k < _body.size(); k++ ) {
+ switch( _body.at(k)->Opcode() ) {
+ case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test
+ case Op_ModL: body_size += 30; break;
+ case Op_DivL: body_size += 30; break;
+ case Op_MulL: body_size += 10; break;
+ }
+ }
+
+ // Check for being too big
+ if( body_size > (uint)LoopUnrollLimit ) {
+ if( xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
+ // Normal case: loop too big
+ return false;
+ }
+
+ // Check for stride being a small enough constant
+ if( abs(cl->stride_con()) > (1<<3) ) return false;
+
+ // Unroll once! (Each trip will soon do double iterations)
+ return true;
+}
+
+//------------------------------policy_align-----------------------------------
+// Return TRUE or FALSE if the loop should be cache-line aligned. Gather the
+// expression that does the alignment. Note that only one array base can be
+// aligned in a loop (unless the VM guarentees mutual alignment). Note that
+// if we vectorize short memory ops into longer memory ops, we may want to
+// increase alignment.
+bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
+ return false;
+}
+
+//------------------------------policy_range_check-----------------------------
+// Return TRUE or FALSE if the loop should be range-check-eliminated.
+// Actually we do iteration-splitting, a more powerful form of RCE.
+bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
+ if( !RangeCheckElimination ) return false;
+
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ // If we unrolled with no intention of doing RCE and we later
+ // changed our minds, we got no pre-loop. Either we need to
+ // make a new pre-loop, or we gotta disallow RCE.
+ if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now.
+ Node *trip_counter = cl->phi();
+
+ // Check loop body for tests of trip-counter plus loop-invariant vs
+ // loop-invariant.
+ for( uint i = 0; i < _body.size(); i++ ) {
+ Node *iff = _body[i];
+ if( iff->Opcode() == Op_If ) { // Test?
+
+ // Comparing trip+off vs limit
+ Node *bol = iff->in(1);
+ if( bol->req() != 2 ) continue; // dead constant test
+ Node *cmp = bol->in(1);
+
+ Node *rc_exp = cmp->in(1);
+ Node *limit = cmp->in(2);
+
+ Node *limit_c = phase->get_ctrl(limit);
+ if( limit_c == phase->C->top() )
+ return false; // Found dead test on live IF? No RCE!
+ if( is_member(phase->get_loop(limit_c) ) ) {
+ // Compare might have operands swapped; commute them
+ rc_exp = cmp->in(2);
+ limit = cmp->in(1);
+ limit_c = phase->get_ctrl(limit);
+ if( is_member(phase->get_loop(limit_c) ) )
+ continue; // Both inputs are loop varying; cannot RCE
+ }
+
+ if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
+ continue;
+ }
+ // Yeah! Found a test like 'trip+off vs limit'
+ // Test is an IfNode, has 2 projections. If BOTH are in the loop
+ // we need loop unswitching instead of iteration splitting.
+ if( is_loop_exit(iff) )
+ return true; // Found reason to split iterations
+ } // End of is IF
+ }
+
+ return false;
+}
+
+//------------------------------policy_peel_only-------------------------------
+// Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful
+// for unrolling loops with NO array accesses.
+bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const {
+
+ for( uint i = 0; i < _body.size(); i++ )
+ if( _body[i]->is_Mem() )
+ return false;
+
+ // No memory accesses at all!
+ return true;
+}
+
+//------------------------------clone_up_backedge_goo--------------------------
+// If Node n lives in the back_ctrl block and cannot float, we clone a private
+// version of n in preheader_ctrl block and return that, otherwise return n.
+Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ) {
+ if( get_ctrl(n) != back_ctrl ) return n;
+
+ Node *x = NULL; // If required, a clone of 'n'
+ // Check for 'n' being pinned in the backedge.
+ if( n->in(0) && n->in(0) == back_ctrl ) {
+ x = n->clone(); // Clone a copy of 'n' to preheader
+ x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader
+ }
+
+ // Recursive fixup any other input edges into x.
+ // If there are no changes we can just return 'n', otherwise
+ // we need to clone a private copy and change it.
+ for( uint i = 1; i < n->req(); i++ ) {
+ Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i) );
+ if( g != n->in(i) ) {
+ if( !x )
+ x = n->clone();
+ x->set_req(i, g);
+ }
+ }
+ if( x ) { // x can legally float to pre-header location
+ register_new_node( x, preheader_ctrl );
+ return x;
+ } else { // raise n to cover LCA of uses
+ set_ctrl( n, find_non_split_ctrl(back_ctrl->in(0)) );
+ }
+ return n;
+}
+
+//------------------------------insert_pre_post_loops--------------------------
+// Insert pre and post loops. If peel_only is set, the pre-loop can not have
+// more iterations added. It acts as a 'peel' only, no lower-bound RCE, no
+// alignment. Useful to unroll loops that do no array accesses.
+void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
+
+ C->set_major_progress();
+
+ // Find common pieces of the loop being guarded with pre & post loops
+ CountedLoopNode *main_head = loop->_head->as_CountedLoop();
+ assert( main_head->is_normal_loop(), "" );
+ CountedLoopEndNode *main_end = main_head->loopexit();
+ assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
+ uint dd_main_head = dom_depth(main_head);
+ uint max = main_head->outcnt();
+
+ Node *pre_header= main_head->in(LoopNode::EntryControl);
+ Node *init = main_head->init_trip();
+ Node *incr = main_end ->incr();
+ Node *limit = main_end ->limit();
+ Node *stride = main_end ->stride();
+ Node *cmp = main_end ->cmp_node();
+ BoolTest::mask b_test = main_end->test_trip();
+
+ // Need only 1 user of 'bol' because I will be hacking the loop bounds.
+ Node *bol = main_end->in(CountedLoopEndNode::TestValue);
+ if( bol->outcnt() != 1 ) {
+ bol = bol->clone();
+ register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
+ _igvn.hash_delete(main_end);
+ main_end->set_req(CountedLoopEndNode::TestValue, bol);
+ }
+ // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
+ if( cmp->outcnt() != 1 ) {
+ cmp = cmp->clone();
+ register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
+ _igvn.hash_delete(bol);
+ bol->set_req(1, cmp);
+ }
+
+ //------------------------------
+ // Step A: Create Post-Loop.
+ Node* main_exit = main_end->proj_out(false);
+ assert( main_exit->Opcode() == Op_IfFalse, "" );
+ int dd_main_exit = dom_depth(main_exit);
+
+ // Step A1: Clone the loop body. The clone becomes the post-loop. The main
+ // loop pre-header illegally has 2 control users (old & new loops).
+ clone_loop( loop, old_new, dd_main_exit );
+ assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
+ CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
+ post_head->set_post_loop(main_head);
+
+ // Build the main-loop normal exit.
+ IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end);
+ _igvn.register_new_node_with_optimizer( new_main_exit );
+ set_idom(new_main_exit, main_end, dd_main_exit );
+ set_loop(new_main_exit, loop->_parent);
+
+ // Step A2: Build a zero-trip guard for the post-loop. After leaving the
+ // main-loop, the post-loop may not execute at all. We 'opaque' the incr
+ // (the main-loop trip-counter exit value) because we will be changing
+ // the exit value (via unrolling) so we cannot constant-fold away the zero
+ // trip guard until all unrolling is done.
+ Node *zer_opaq = new (C, 2) Opaque1Node(incr);
+ Node *zer_cmp = new (C, 3) CmpINode( zer_opaq, limit );
+ Node *zer_bol = new (C, 2) BoolNode( zer_cmp, b_test );
+ register_new_node( zer_opaq, new_main_exit );
+ register_new_node( zer_cmp , new_main_exit );
+ register_new_node( zer_bol , new_main_exit );
+
+ // Build the IfNode
+ IfNode *zer_iff = new (C, 2) IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
+ _igvn.register_new_node_with_optimizer( zer_iff );
+ set_idom(zer_iff, new_main_exit, dd_main_exit);
+ set_loop(zer_iff, loop->_parent);
+
+ // Plug in the false-path, taken if we need to skip post-loop
+ _igvn.hash_delete( main_exit );
+ main_exit->set_req(0, zer_iff);
+ _igvn._worklist.push(main_exit);
+ set_idom(main_exit, zer_iff, dd_main_exit);
+ set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
+ // Make the true-path, must enter the post loop
+ Node *zer_taken = new (C, 1) IfTrueNode( zer_iff );
+ _igvn.register_new_node_with_optimizer( zer_taken );
+ set_idom(zer_taken, zer_iff, dd_main_exit);
+ set_loop(zer_taken, loop->_parent);
+ // Plug in the true path
+ _igvn.hash_delete( post_head );
+ post_head->set_req(LoopNode::EntryControl, zer_taken);
+ set_idom(post_head, zer_taken, dd_main_exit);
+
+ // Step A3: Make the fall-in values to the post-loop come from the
+ // fall-out values of the main-loop.
+ for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
+ Node* main_phi = main_head->fast_out(i);
+ if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
+ Node *post_phi = old_new[main_phi->_idx];
+ Node *fallmain = clone_up_backedge_goo(main_head->back_control(),
+ post_head->init_control(),
+ main_phi->in(LoopNode::LoopBackControl));
+ _igvn.hash_delete(post_phi);
+ post_phi->set_req( LoopNode::EntryControl, fallmain );
+ }
+ }
+
+ // Update local caches for next stanza
+ main_exit = new_main_exit;
+
+
+ //------------------------------
+ // Step B: Create Pre-Loop.
+
+ // Step B1: Clone the loop body. The clone becomes the pre-loop. The main
+ // loop pre-header illegally has 2 control users (old & new loops).
+ clone_loop( loop, old_new, dd_main_head );
+ CountedLoopNode* pre_head = old_new[main_head->_idx]->as_CountedLoop();
+ CountedLoopEndNode* pre_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
+ pre_head->set_pre_loop(main_head);
+ Node *pre_incr = old_new[incr->_idx];
+
+ // Find the pre-loop normal exit.
+ Node* pre_exit = pre_end->proj_out(false);
+ assert( pre_exit->Opcode() == Op_IfFalse, "" );
+ IfFalseNode *new_pre_exit = new (C, 1) IfFalseNode(pre_end);
+ _igvn.register_new_node_with_optimizer( new_pre_exit );
+ set_idom(new_pre_exit, pre_end, dd_main_head);
+ set_loop(new_pre_exit, loop->_parent);
+
+ // Step B2: Build a zero-trip guard for the main-loop. After leaving the
+ // pre-loop, the main-loop may not execute at all. Later in life this
+ // zero-trip guard will become the minimum-trip guard when we unroll
+ // the main-loop.
+ Node *min_opaq = new (C, 2) Opaque1Node(limit);
+ Node *min_cmp = new (C, 3) CmpINode( pre_incr, min_opaq );
+ Node *min_bol = new (C, 2) BoolNode( min_cmp, b_test );
+ register_new_node( min_opaq, new_pre_exit );
+ register_new_node( min_cmp , new_pre_exit );
+ register_new_node( min_bol , new_pre_exit );
+
+ // Build the IfNode
+ IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_FAIR, COUNT_UNKNOWN );
+ _igvn.register_new_node_with_optimizer( min_iff );
+ set_idom(min_iff, new_pre_exit, dd_main_head);
+ set_loop(min_iff, loop->_parent);
+
+ // Plug in the false-path, taken if we need to skip main-loop
+ _igvn.hash_delete( pre_exit );
+ pre_exit->set_req(0, min_iff);
+ set_idom(pre_exit, min_iff, dd_main_head);
+ set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
+ // Make the true-path, must enter the main loop
+ Node *min_taken = new (C, 1) IfTrueNode( min_iff );
+ _igvn.register_new_node_with_optimizer( min_taken );
+ set_idom(min_taken, min_iff, dd_main_head);
+ set_loop(min_taken, loop->_parent);
+ // Plug in the true path
+ _igvn.hash_delete( main_head );
+ main_head->set_req(LoopNode::EntryControl, min_taken);
+ set_idom(main_head, min_taken, dd_main_head);
+
+ // Step B3: Make the fall-in values to the main-loop come from the
+ // fall-out values of the pre-loop.
+ for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
+ Node* main_phi = main_head->fast_out(i2);
+ if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
+ Node *pre_phi = old_new[main_phi->_idx];
+ Node *fallpre = clone_up_backedge_goo(pre_head->back_control(),
+ main_head->init_control(),
+ pre_phi->in(LoopNode::LoopBackControl));
+ _igvn.hash_delete(main_phi);
+ main_phi->set_req( LoopNode::EntryControl, fallpre );
+ }
+ }
+
+ // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
+ // RCE and alignment may change this later.
+ Node *cmp_end = pre_end->cmp_node();
+ assert( cmp_end->in(2) == limit, "" );
+ Node *pre_limit = new (C, 3) AddINode( init, stride );
+
+ // Save the original loop limit in this Opaque1 node for
+ // use by range check elimination.
+ Node *pre_opaq = new (C, 3) Opaque1Node(pre_limit, limit);
+
+ register_new_node( pre_limit, pre_head->in(0) );
+ register_new_node( pre_opaq , pre_head->in(0) );
+
+ // Since no other users of pre-loop compare, I can hack limit directly
+ assert( cmp_end->outcnt() == 1, "no other users" );
+ _igvn.hash_delete(cmp_end);
+ cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
+
+ // Special case for not-equal loop bounds:
+ // Change pre loop test, main loop test, and the
+ // main loop guard test to use lt or gt depending on stride
+ // direction:
+ // positive stride use <
+ // negative stride use >
+
+ if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
+
+ BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
+ // Modify pre loop end condition
+ Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
+ BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test);
+ register_new_node( new_bol0, pre_head->in(0) );
+ _igvn.hash_delete(pre_end);
+ pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0);
+ // Modify main loop guard condition
+ assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
+ BoolNode* new_bol1 = new (C, 2) BoolNode(min_bol->in(1), new_test);
+ register_new_node( new_bol1, new_pre_exit );
+ _igvn.hash_delete(min_iff);
+ min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
+ // Modify main loop end condition
+ BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
+ BoolNode* new_bol2 = new (C, 2) BoolNode(main_bol->in(1), new_test);
+ register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
+ _igvn.hash_delete(main_end);
+ main_end->set_req(CountedLoopEndNode::TestValue, new_bol2);
+ }
+
+ // Flag main loop
+ main_head->set_main_loop();
+ if( peel_only ) main_head->set_main_no_pre_loop();
+
+ // It's difficult to be precise about the trip-counts
+ // for the pre/post loops. They are usually very short,
+ // so guess that 4 trips is a reasonable value.
+ post_head->set_profile_trip_cnt(4.0);
+ pre_head->set_profile_trip_cnt(4.0);
+
+ // Now force out all loop-invariant dominating tests. The optimizer
+ // finds some, but we _know_ they are all useless.
+ peeled_dom_test_elim(loop,old_new);
+}
+
+//------------------------------is_invariant-----------------------------
+// Return true if n is invariant
+bool IdealLoopTree::is_invariant(Node* n) const {
+ Node *n_c = _phase->get_ctrl(n);
+ if (n_c->is_top()) return false;
+ return !is_member(_phase->get_loop(n_c));
+}
+
+
+//------------------------------do_unroll--------------------------------------
+// Unroll the loop body one step - make each trip do 2 iterations.
+void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
+ assert( LoopUnrollLimit, "" );
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) {
+ tty->print("Unrolling ");
+ loop->dump_head();
+ }
+#endif
+ CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
+ CountedLoopEndNode *loop_end = loop_head->loopexit();
+ assert( loop_end, "" );
+
+ // Remember loop node count before unrolling to detect
+ // if rounds of unroll,optimize are making progress
+ loop_head->set_node_count_before_unroll(loop->_body.size());
+
+ Node *ctrl = loop_head->in(LoopNode::EntryControl);
+ Node *limit = loop_head->limit();
+ Node *init = loop_head->init_trip();
+ Node *strid = loop_head->stride();
+
+ Node *opaq = NULL;
+ if( adjust_min_trip ) { // If not maximally unrolling, need adjustment
+ assert( loop_head->is_main_loop(), "" );
+ assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
+ Node *iff = ctrl->in(0);
+ assert( iff->Opcode() == Op_If, "" );
+ Node *bol = iff->in(1);
+ assert( bol->Opcode() == Op_Bool, "" );
+ Node *cmp = bol->in(1);
+ assert( cmp->Opcode() == Op_CmpI, "" );
+ opaq = cmp->in(2);
+ // Occasionally it's possible for a pre-loop Opaque1 node to be
+ // optimized away and then another round of loop opts attempted.
+ // We can not optimize this particular loop in that case.
+ if( opaq->Opcode() != Op_Opaque1 )
+ return; // Cannot find pre-loop! Bail out!
+ }
+
+ C->set_major_progress();
+
+ // Adjust max trip count. The trip count is intentionally rounded
+ // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
+ // the main, unrolled, part of the loop will never execute as it is protected
+ // by the min-trip test. See bug 4834191 for a case where we over-unrolled
+ // and later determined that part of the unrolled loop was dead.
+ loop_head->set_trip_count(loop_head->trip_count() / 2);
+
+ // Double the count of original iterations in the unrolled loop body.
+ loop_head->double_unrolled_count();
+
+ // -----------
+ // Step 2: Cut back the trip counter for an unroll amount of 2.
+ // Loop will normally trip (limit - init)/stride_con. Since it's a
+ // CountedLoop this is exact (stride divides limit-init exactly).
+ // We are going to double the loop body, so we want to knock off any
+ // odd iteration: (trip_cnt & ~1). Then back compute a new limit.
+ Node *span = new (C, 3) SubINode( limit, init );
+ register_new_node( span, ctrl );
+ Node *trip = new (C, 3) DivINode( 0, span, strid );
+ register_new_node( trip, ctrl );
+ Node *mtwo = _igvn.intcon(-2);
+ set_ctrl(mtwo, C->root());
+ Node *rond = new (C, 3) AndINode( trip, mtwo );
+ register_new_node( rond, ctrl );
+ Node *spn2 = new (C, 3) MulINode( rond, strid );
+ register_new_node( spn2, ctrl );
+ Node *lim2 = new (C, 3) AddINode( spn2, init );
+ register_new_node( lim2, ctrl );
+
+ // Hammer in the new limit
+ Node *ctrl2 = loop_end->in(0);
+ Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 );
+ register_new_node( cmp2, ctrl2 );
+ Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
+ register_new_node( bol2, ctrl2 );
+ _igvn.hash_delete(loop_end);
+ loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
+
+ // Step 3: Find the min-trip test guaranteed before a 'main' loop.
+ // Make it a 1-trip test (means at least 2 trips).
+ if( adjust_min_trip ) {
+ // Guard test uses an 'opaque' node which is not shared. Hence I
+ // can edit it's inputs directly. Hammer in the new limit for the
+ // minimum-trip guard.
+ assert( opaq->outcnt() == 1, "" );
+ _igvn.hash_delete(opaq);
+ opaq->set_req(1, lim2);
+ }
+
+ // ---------
+ // Step 4: Clone the loop body. Move it inside the loop. This loop body
+ // represents the odd iterations; since the loop trips an even number of
+ // times its backedge is never taken. Kill the backedge.
+ uint dd = dom_depth(loop_head);
+ clone_loop( loop, old_new, dd );
+
+ // Make backedges of the clone equal to backedges of the original.
+ // Make the fall-in from the original come from the fall-out of the clone.
+ for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
+ Node* phi = loop_head->fast_out(j);
+ if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
+ Node *newphi = old_new[phi->_idx];
+ _igvn.hash_delete( phi );
+ _igvn.hash_delete( newphi );
+
+ phi ->set_req(LoopNode:: EntryControl, newphi->in(LoopNode::LoopBackControl));
+ newphi->set_req(LoopNode::LoopBackControl, phi ->in(LoopNode::LoopBackControl));
+ phi ->set_req(LoopNode::LoopBackControl, C->top());
+ }
+ }
+ Node *clone_head = old_new[loop_head->_idx];
+ _igvn.hash_delete( clone_head );
+ loop_head ->set_req(LoopNode:: EntryControl, clone_head->in(LoopNode::LoopBackControl));
+ clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl));
+ loop_head ->set_req(LoopNode::LoopBackControl, C->top());
+ loop->_head = clone_head; // New loop header
+
+ set_idom(loop_head, loop_head ->in(LoopNode::EntryControl), dd);
+ set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd);
+
+ // Kill the clone's backedge
+ Node *newcle = old_new[loop_end->_idx];
+ _igvn.hash_delete( newcle );
+ Node *one = _igvn.intcon(1);
+ set_ctrl(one, C->root());
+ newcle->set_req(1, one);
+ // Force clone into same loop body
+ uint max = loop->_body.size();
+ for( uint k = 0; k < max; k++ ) {
+ Node *old = loop->_body.at(k);
+ Node *nnn = old_new[old->_idx];
+ loop->_body.push(nnn);
+ if (!has_ctrl(old))
+ set_loop(nnn, loop);
+ }
+}
+
+//------------------------------do_maximally_unroll----------------------------
+
+void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) {
+ CountedLoopNode *cl = loop->_head->as_CountedLoop();
+ assert( cl->trip_count() > 0, "");
+
+ // If loop is tripping an odd number of times, peel odd iteration
+ if( (cl->trip_count() & 1) == 1 ) {
+ do_peeling( loop, old_new );
+ }
+
+ // Now its tripping an even number of times remaining. Double loop body.
+ // Do not adjust pre-guards; they are not needed and do not exist.
+ if( cl->trip_count() > 0 ) {
+ do_unroll( loop, old_new, false );
+ }
+}
+
+//------------------------------dominates_backedge---------------------------------
+// Returns true if ctrl is executed on every complete iteration
+bool IdealLoopTree::dominates_backedge(Node* ctrl) {
+ assert(ctrl->is_CFG(), "must be control");
+ Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
+ return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
+}
+
+//------------------------------add_constraint---------------------------------
+// Constrain the main loop iterations so the condition:
+// scale_con * I + offset < limit
+// always holds true. That is, either increase the number of iterations in
+// the pre-loop or the post-loop until the condition holds true in the main
+// loop. Stride, scale, offset and limit are all loop invariant. Further,
+// stride and scale are constants (offset and limit often are).
+void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
+
+ // Compute "I :: (limit-offset)/scale_con"
+ Node *con = new (C, 3) SubINode( limit, offset );
+ register_new_node( con, pre_ctrl );
+ Node *scale = _igvn.intcon(scale_con);
+ set_ctrl(scale, C->root());
+ Node *X = new (C, 3) DivINode( 0, con, scale );
+ register_new_node( X, pre_ctrl );
+
+ // For positive stride, the pre-loop limit always uses a MAX function
+ // and the main loop a MIN function. For negative stride these are
+ // reversed.
+
+ // Also for positive stride*scale the affine function is increasing, so the
+ // pre-loop must check for underflow and the post-loop for overflow.
+ // Negative stride*scale reverses this; pre-loop checks for overflow and
+ // post-loop for underflow.
+ if( stride_con*scale_con > 0 ) {
+ // Compute I < (limit-offset)/scale_con
+ // Adjust main-loop last iteration to be MIN/MAX(main_loop,X)
+ *main_limit = (stride_con > 0)
+ ? (Node*)(new (C, 3) MinINode( *main_limit, X ))
+ : (Node*)(new (C, 3) MaxINode( *main_limit, X ));
+ register_new_node( *main_limit, pre_ctrl );
+
+ } else {
+ // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I
+ // Add the negation of the main-loop constraint to the pre-loop.
+ // See footnote [++] below for a derivation of the limit expression.
+ Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1);
+ set_ctrl(incr, C->root());
+ Node *adj = new (C, 3) AddINode( X, incr );
+ register_new_node( adj, pre_ctrl );
+ *pre_limit = (scale_con > 0)
+ ? (Node*)new (C, 3) MinINode( *pre_limit, adj )
+ : (Node*)new (C, 3) MaxINode( *pre_limit, adj );
+ register_new_node( *pre_limit, pre_ctrl );
+
+// [++] Here's the algebra that justifies the pre-loop limit expression:
+//
+// NOT( scale_con * I + offset < limit )
+// ==
+// scale_con * I + offset >= limit
+// ==
+// SGN(scale_con) * I >= (limit-offset)/|scale_con|
+// ==
+// (limit-offset)/|scale_con| <= I * SGN(scale_con)
+// ==
+// (limit-offset)/|scale_con|-1 < I * SGN(scale_con)
+// ==
+// ( if (scale_con > 0) /*common case*/
+// (limit-offset)/scale_con - 1 < I
+// else
+// (limit-offset)/scale_con + 1 > I
+// )
+// ( if (scale_con > 0) /*common case*/
+// (limit-offset)/scale_con + SGN(-scale_con) < I
+// else
+// (limit-offset)/scale_con + SGN(-scale_con) > I
+ }
+}
+
+
+//------------------------------is_scaled_iv---------------------------------
+// Return true if exp is a constant times an induction var
+bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
+ if (exp == iv) {
+ if (p_scale != NULL) {
+ *p_scale = 1;
+ }
+ return true;
+ }
+ int opc = exp->Opcode();
+ if (opc == Op_MulI) {
+ if (exp->in(1) == iv && exp->in(2)->is_Con()) {
+ if (p_scale != NULL) {
+ *p_scale = exp->in(2)->get_int();
+ }
+ return true;
+ }
+ if (exp->in(2) == iv && exp->in(1)->is_Con()) {
+ if (p_scale != NULL) {
+ *p_scale = exp->in(1)->get_int();
+ }
+ return true;
+ }
+ } else if (opc == Op_LShiftI) {
+ if (exp->in(1) == iv && exp->in(2)->is_Con()) {
+ if (p_scale != NULL) {
+ *p_scale = 1 << exp->in(2)->get_int();
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+//-----------------------------is_scaled_iv_plus_offset------------------------------
+// Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
+bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
+ if (is_scaled_iv(exp, iv, p_scale)) {
+ if (p_offset != NULL) {
+ Node *zero = _igvn.intcon(0);
+ set_ctrl(zero, C->root());
+ *p_offset = zero;
+ }
+ return true;
+ }
+ int opc = exp->Opcode();
+ if (opc == Op_AddI) {
+ if (is_scaled_iv(exp->in(1), iv, p_scale)) {
+ if (p_offset != NULL) {
+ *p_offset = exp->in(2);
+ }
+ return true;
+ }
+ if (exp->in(2)->is_Con()) {
+ Node* offset2 = NULL;
+ if (depth < 2 &&
+ is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
+ p_offset != NULL ? &offset2 : NULL, depth+1)) {
+ if (p_offset != NULL) {
+ Node *ctrl_off2 = get_ctrl(offset2);
+ Node* offset = new (C, 3) AddINode(offset2, exp->in(2));
+ register_new_node(offset, ctrl_off2);
+ *p_offset = offset;
+ }
+ return true;
+ }
+ }
+ } else if (opc == Op_SubI) {
+ if (is_scaled_iv(exp->in(1), iv, p_scale)) {
+ if (p_offset != NULL) {
+ Node *zero = _igvn.intcon(0);
+ set_ctrl(zero, C->root());
+ Node *ctrl_off = get_ctrl(exp->in(2));
+ Node* offset = new (C, 3) SubINode(zero, exp->in(2));
+ register_new_node(offset, ctrl_off);
+ *p_offset = offset;
+ }
+ return true;
+ }
+ if (is_scaled_iv(exp->in(2), iv, p_scale)) {
+ if (p_offset != NULL) {
+ *p_scale *= -1;
+ *p_offset = exp->in(1);
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+//------------------------------do_range_check---------------------------------
+// Eliminate range-checks and other trip-counter vs loop-invariant tests.
+void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) {
+ tty->print("Range Check Elimination ");
+ loop->dump_head();
+ }
+#endif
+ assert( RangeCheckElimination, "" );
+ CountedLoopNode *cl = loop->_head->as_CountedLoop();
+ assert( cl->is_main_loop(), "" );
+
+ // Find the trip counter; we are iteration splitting based on it
+ Node *trip_counter = cl->phi();
+ // Find the main loop limit; we will trim it's iterations
+ // to not ever trip end tests
+ Node *main_limit = cl->limit();
+ // Find the pre-loop limit; we will expand it's iterations to
+ // not ever trip low tests.
+ Node *ctrl = cl->in(LoopNode::EntryControl);
+ assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
+ Node *iffm = ctrl->in(0);
+ assert( iffm->Opcode() == Op_If, "" );
+ Node *p_f = iffm->in(0);
+ assert( p_f->Opcode() == Op_IfFalse, "" );
+ CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
+ assert( pre_end->loopnode()->is_pre_loop(), "" );
+ Node *pre_opaq1 = pre_end->limit();
+ // Occasionally it's possible for a pre-loop Opaque1 node to be
+ // optimized away and then another round of loop opts attempted.
+ // We can not optimize this particular loop in that case.
+ if( pre_opaq1->Opcode() != Op_Opaque1 )
+ return;
+ Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
+ Node *pre_limit = pre_opaq->in(1);
+
+ // Where do we put new limit calculations
+ Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+
+ // Ensure the original loop limit is available from the
+ // pre-loop Opaque1 node.
+ Node *orig_limit = pre_opaq->original_loop_limit();
+ if( orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP )
+ return;
+
+ // Need to find the main-loop zero-trip guard
+ Node *bolzm = iffm->in(1);
+ assert( bolzm->Opcode() == Op_Bool, "" );
+ Node *cmpzm = bolzm->in(1);
+ assert( cmpzm->is_Cmp(), "" );
+ Node *opqzm = cmpzm->in(2);
+ if( opqzm->Opcode() != Op_Opaque1 )
+ return;
+ assert( opqzm->in(1) == main_limit, "do not understand situation" );
+
+ // Must know if its a count-up or count-down loop
+
+ // protect against stride not being a constant
+ if ( !cl->stride_is_con() ) {
+ return;
+ }
+ int stride_con = cl->stride_con();
+ Node *zero = _igvn.intcon(0);
+ Node *one = _igvn.intcon(1);
+ set_ctrl(zero, C->root());
+ set_ctrl(one, C->root());
+
+ // Range checks that do not dominate the loop backedge (ie.
+ // conditionally executed) can lengthen the pre loop limit beyond
+ // the original loop limit. To prevent this, the pre limit is
+ // (for stride > 0) MINed with the original loop limit (MAXed
+ // stride < 0) when some range_check (rc) is conditionally
+ // executed.
+ bool conditional_rc = false;
+
+ // Check loop body for tests of trip-counter plus loop-invariant vs
+ // loop-invariant.
+ for( uint i = 0; i < loop->_body.size(); i++ ) {
+ Node *iff = loop->_body[i];
+ if( iff->Opcode() == Op_If ) { // Test?
+
+ // Test is an IfNode, has 2 projections. If BOTH are in the loop
+ // we need loop unswitching instead of iteration splitting.
+ Node *exit = loop->is_loop_exit(iff);
+ if( !exit ) continue;
+ int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
+
+ // Get boolean condition to test
+ Node *i1 = iff->in(1);
+ if( !i1->is_Bool() ) continue;
+ BoolNode *bol = i1->as_Bool();
+ BoolTest b_test = bol->_test;
+ // Flip sense of test if exit condition is flipped
+ if( flip )
+ b_test = b_test.negate();
+
+ // Get compare
+ Node *cmp = bol->in(1);
+
+ // Look for trip_counter + offset vs limit
+ Node *rc_exp = cmp->in(1);
+ Node *limit = cmp->in(2);
+ jint scale_con= 1; // Assume trip counter not scaled
+
+ Node *limit_c = get_ctrl(limit);
+ if( loop->is_member(get_loop(limit_c) ) ) {
+ // Compare might have operands swapped; commute them
+ b_test = b_test.commute();
+ rc_exp = cmp->in(2);
+ limit = cmp->in(1);
+ limit_c = get_ctrl(limit);
+ if( loop->is_member(get_loop(limit_c) ) )
+ continue; // Both inputs are loop varying; cannot RCE
+ }
+ // Here we know 'limit' is loop invariant
+
+ // 'limit' maybe pinned below the zero trip test (probably from a
+ // previous round of rce), in which case, it can't be used in the
+ // zero trip test expression which must occur before the zero test's if.
+ if( limit_c == ctrl ) {
+ continue; // Don't rce this check but continue looking for other candidates.
+ }
+
+ // Check for scaled induction variable plus an offset
+ Node *offset = NULL;
+
+ if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
+ continue;
+ }
+
+ Node *offset_c = get_ctrl(offset);
+ if( loop->is_member( get_loop(offset_c) ) )
+ continue; // Offset is not really loop invariant
+ // Here we know 'offset' is loop invariant.
+
+ // As above for the 'limit', the 'offset' maybe pinned below the
+ // zero trip test.
+ if( offset_c == ctrl ) {
+ continue; // Don't rce this check but continue looking for other candidates.
+ }
+
+ // At this point we have the expression as:
+ // scale_con * trip_counter + offset :: limit
+ // where scale_con, offset and limit are loop invariant. Trip_counter
+ // monotonically increases by stride_con, a constant. Both (or either)
+ // stride_con and scale_con can be negative which will flip about the
+ // sense of the test.
+
+ // Adjust pre and main loop limits to guard the correct iteration set
+ if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
+ if( b_test._test == BoolTest::lt ) { // Range checks always use lt
+ // The overflow limit: scale*I+offset < limit
+ add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
+ // The underflow limit: 0 <= scale*I+offset.
+ // Some math yields: -scale*I-(offset+1) < 0
+ Node *plus_one = new (C, 3) AddINode( offset, one );
+ register_new_node( plus_one, pre_ctrl );
+ Node *neg_offset = new (C, 3) SubINode( zero, plus_one );
+ register_new_node( neg_offset, pre_ctrl );
+ add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit );
+ if (!conditional_rc) {
+ conditional_rc = !loop->dominates_backedge(iff);
+ }
+ } else {
+#ifndef PRODUCT
+ if( PrintOpto )
+ tty->print_cr("missed RCE opportunity");
+#endif
+ continue; // In release mode, ignore it
+ }
+ } else { // Otherwise work on normal compares
+ switch( b_test._test ) {
+ case BoolTest::ge: // Convert X >= Y to -X <= -Y
+ scale_con = -scale_con;
+ offset = new (C, 3) SubINode( zero, offset );
+ register_new_node( offset, pre_ctrl );
+ limit = new (C, 3) SubINode( zero, limit );
+ register_new_node( limit, pre_ctrl );
+ // Fall into LE case
+ case BoolTest::le: // Convert X <= Y to X < Y+1
+ limit = new (C, 3) AddINode( limit, one );
+ register_new_node( limit, pre_ctrl );
+ // Fall into LT case
+ case BoolTest::lt:
+ add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
+ if (!conditional_rc) {
+ conditional_rc = !loop->dominates_backedge(iff);
+ }
+ break;
+ default:
+#ifndef PRODUCT
+ if( PrintOpto )
+ tty->print_cr("missed RCE opportunity");
+#endif
+ continue; // Unhandled case
+ }
+ }
+
+ // Kill the eliminated test
+ C->set_major_progress();
+ Node *kill_con = _igvn.intcon( 1-flip );
+ set_ctrl(kill_con, C->root());
+ _igvn.hash_delete(iff);
+ iff->set_req(1, kill_con);
+ _igvn._worklist.push(iff);
+ // Find surviving projection
+ assert(iff->is_If(), "");
+ ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
+ // Find loads off the surviving projection; remove their control edge
+ for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
+ Node* cd = dp->fast_out(i); // Control-dependent node
+ if( cd->is_Load() ) { // Loads can now float around in the loop
+ _igvn.hash_delete(cd);
+ // Allow the load to float around in the loop, or before it
+ // but NOT before the pre-loop.
+ cd->set_req(0, ctrl); // ctrl, not NULL
+ _igvn._worklist.push(cd);
+ --i;
+ --imax;
+ }
+ }
+
+ } // End of is IF
+
+ }
+
+ // Update loop limits
+ if (conditional_rc) {
+ pre_limit = (stride_con > 0) ? (Node*)new (C,3) MinINode(pre_limit, orig_limit)
+ : (Node*)new (C,3) MaxINode(pre_limit, orig_limit);
+ register_new_node(pre_limit, pre_ctrl);
+ }
+ _igvn.hash_delete(pre_opaq);
+ pre_opaq->set_req(1, pre_limit);
+
+ // Note:: we are making the main loop limit no longer precise;
+ // need to round up based on stride.
+ if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case
+ // "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init
+ // Hopefully, compiler will optimize for powers of 2.
+ Node *ctrl = get_ctrl(main_limit);
+ Node *stride = cl->stride();
+ Node *init = cl->init_trip();
+ Node *span = new (C, 3) SubINode(main_limit,init);
+ register_new_node(span,ctrl);
+ Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
+ Node *add = new (C, 3) AddINode(span,rndup);
+ register_new_node(add,ctrl);
+ Node *div = new (C, 3) DivINode(0,add,stride);
+ register_new_node(div,ctrl);
+ Node *mul = new (C, 3) MulINode(div,stride);
+ register_new_node(mul,ctrl);
+ Node *newlim = new (C, 3) AddINode(mul,init);
+ register_new_node(newlim,ctrl);
+ main_limit = newlim;
+ }
+
+ Node *main_cle = cl->loopexit();
+ Node *main_bol = main_cle->in(1);
+ // Hacking loop bounds; need private copies of exit test
+ if( main_bol->outcnt() > 1 ) {// BoolNode shared?
+ _igvn.hash_delete(main_cle);
+ main_bol = main_bol->clone();// Clone a private BoolNode
+ register_new_node( main_bol, main_cle->in(0) );
+ main_cle->set_req(1,main_bol);
+ }
+ Node *main_cmp = main_bol->in(1);
+ if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
+ _igvn.hash_delete(main_bol);
+ main_cmp = main_cmp->clone();// Clone a private CmpNode
+ register_new_node( main_cmp, main_cle->in(0) );
+ main_bol->set_req(1,main_cmp);
+ }
+ // Hack the now-private loop bounds
+ _igvn.hash_delete(main_cmp);
+ main_cmp->set_req(2, main_limit);
+ _igvn._worklist.push(main_cmp);
+ // The OpaqueNode is unshared by design
+ _igvn.hash_delete(opqzm);
+ assert( opqzm->outcnt() == 1, "cannot hack shared node" );
+ opqzm->set_req(1,main_limit);
+ _igvn._worklist.push(opqzm);
+}
+
+//------------------------------DCE_loop_body----------------------------------
+// Remove simplistic dead code from loop body
+void IdealLoopTree::DCE_loop_body() {
+ for( uint i = 0; i < _body.size(); i++ )
+ if( _body.at(i)->outcnt() == 0 )
+ _body.map( i--, _body.pop() );
+}
+
+
+//------------------------------adjust_loop_exit_prob--------------------------
+// Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
+// Replace with a 1-in-10 exit guess.
+void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
+ Node *test = tail();
+ while( test != _head ) {
+ uint top = test->Opcode();
+ if( top == Op_IfTrue || top == Op_IfFalse ) {
+ int test_con = ((ProjNode*)test)->_con;
+ assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
+ IfNode *iff = test->in(0)->as_If();
+ if( iff->outcnt() == 2 ) { // Ignore dead tests
+ Node *bol = iff->in(1);
+ if( bol && bol->req() > 1 && bol->in(1) &&
+ ((bol->in(1)->Opcode() == Op_StorePConditional ) ||
+ (bol->in(1)->Opcode() == Op_StoreLConditional ) ||
+ (bol->in(1)->Opcode() == Op_CompareAndSwapI ) ||
+ (bol->in(1)->Opcode() == Op_CompareAndSwapL ) ||
+ (bol->in(1)->Opcode() == Op_CompareAndSwapP )))
+ return; // Allocation loops RARELY take backedge
+ // Find the OTHER exit path from the IF
+ Node* ex = iff->proj_out(1-test_con);
+ float p = iff->_prob;
+ if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
+ if( top == Op_IfTrue ) {
+ if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
+ iff->_prob = PROB_STATIC_FREQUENT;
+ }
+ } else {
+ if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
+ iff->_prob = PROB_STATIC_INFREQUENT;
+ }
+ }
+ }
+ }
+ }
+ test = phase->idom(test);
+ }
+}
+
+
+//------------------------------policy_do_remove_empty_loop--------------------
+// Micro-benchmark spamming. Policy is to always remove empty loops.
+// The 'DO' part is to replace the trip counter with the value it will
+// have on the last iteration. This will break the loop.
+bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
+ // Minimum size must be empty loop
+ if( _body.size() > 7/*number of nodes in an empty loop*/ ) return false;
+
+ if( !_head->is_CountedLoop() ) return false; // Dead loop
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ if( !cl->loopexit() ) return false; // Malformed loop
+ if( !phase->is_member(this,phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)) ) )
+ return false; // Infinite loop
+#ifndef PRODUCT
+ if( PrintOpto )
+ tty->print_cr("Removing empty loop");
+#endif
+#ifdef ASSERT
+ // Ensure only one phi which is the iv.
+ Node* iv = NULL;
+ for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
+ Node* n = cl->fast_out(i);
+ if (n->Opcode() == Op_Phi) {
+ assert(iv == NULL, "Too many phis" );
+ iv = n;
+ }
+ }
+ assert(iv == cl->phi(), "Wrong phi" );
+#endif
+ // Replace the phi at loop head with the final value of the last
+ // iteration. Then the CountedLoopEnd will collapse (backedge never
+ // taken) and all loop-invariant uses of the exit values will be correct.
+ Node *phi = cl->phi();
+ Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() );
+ phase->register_new_node(final,cl->in(LoopNode::EntryControl));
+ phase->_igvn.hash_delete(phi);
+ phase->_igvn.subsume_node(phi,final);
+ phase->C->set_major_progress();
+ return true;
+}
+
+
+//=============================================================================
+//------------------------------iteration_split_impl---------------------------
+void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
+ // Check and remove empty loops (spam micro-benchmarks)
+ if( policy_do_remove_empty_loop(phase) )
+ return; // Here we removed an empty loop
+
+ bool should_peel = policy_peeling(phase); // Should we peel?
+
+ bool should_unswitch = policy_unswitching(phase);
+
+ // Non-counted loops may be peeled; exactly 1 iteration is peeled.
+ // This removes loop-invariant tests (usually null checks).
+ if( !_head->is_CountedLoop() ) { // Non-counted loop
+ if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
+ return;
+ }
+ if( should_peel ) { // Should we peel?
+#ifndef PRODUCT
+ if (PrintOpto) tty->print_cr("should_peel");
+#endif
+ phase->do_peeling(this,old_new);
+ } else if( should_unswitch ) {
+ phase->do_unswitching(this, old_new);
+ }
+ return;
+ }
+ CountedLoopNode *cl = _head->as_CountedLoop();
+
+ if( !cl->loopexit() ) return; // Ignore various kinds of broken loops
+
+ // Do nothing special to pre- and post- loops
+ if( cl->is_pre_loop() || cl->is_post_loop() ) return;
+
+ // Compute loop trip count from profile data
+ compute_profile_trip_cnt(phase);
+
+ // Before attempting fancy unrolling, RCE or alignment, see if we want
+ // to completely unroll this loop or do loop unswitching.
+ if( cl->is_normal_loop() ) {
+ bool should_maximally_unroll = policy_maximally_unroll(phase);
+ if( should_maximally_unroll ) {
+ // Here we did some unrolling and peeling. Eventually we will
+ // completely unroll this loop and it will no longer be a loop.
+ phase->do_maximally_unroll(this,old_new);
+ return;
+ }
+ if (should_unswitch) {
+ phase->do_unswitching(this, old_new);
+ return;
+ }
+ }
+
+
+ // Counted loops may be peeled, may need some iterations run up
+ // front for RCE, and may want to align loop refs to a cache
+ // line. Thus we clone a full loop up front whose trip count is
+ // at least 1 (if peeling), but may be several more.
+
+ // The main loop will start cache-line aligned with at least 1
+ // iteration of the unrolled body (zero-trip test required) and
+ // will have some range checks removed.
+
+ // A post-loop will finish any odd iterations (leftover after
+ // unrolling), plus any needed for RCE purposes.
+
+ bool should_unroll = policy_unroll(phase);
+
+ bool should_rce = policy_range_check(phase);
+
+ bool should_align = policy_align(phase);
+
+ // If not RCE'ing (iteration splitting) or Aligning, then we do not
+ // need a pre-loop. We may still need to peel an initial iteration but
+ // we will not be needing an unknown number of pre-iterations.
+ //
+ // Basically, if may_rce_align reports FALSE first time through,
+ // we will not be able to later do RCE or Aligning on this loop.
+ bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
+
+ // If we have any of these conditions (RCE, alignment, unrolling) met, then
+ // we switch to the pre-/main-/post-loop model. This model also covers
+ // peeling.
+ if( should_rce || should_align || should_unroll ) {
+ if( cl->is_normal_loop() ) // Convert to 'pre/main/post' loops
+ phase->insert_pre_post_loops(this,old_new, !may_rce_align);
+
+ // Adjust the pre- and main-loop limits to let the pre and post loops run
+ // with full checks, but the main-loop with no checks. Remove said
+ // checks from the main body.
+ if( should_rce )
+ phase->do_range_check(this,old_new);
+
+ // Double loop body for unrolling. Adjust the minimum-trip test (will do
+ // twice as many iterations as before) and the main body limit (only do
+ // an even number of trips). If we are peeling, we might enable some RCE
+ // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
+ // peeling.
+ if( should_unroll && !should_peel )
+ phase->do_unroll(this,old_new, true);
+
+ // Adjust the pre-loop limits to align the main body
+ // iterations.
+ if( should_align )
+ Unimplemented();
+
+ } else { // Else we have an unchanged counted loop
+ if( should_peel ) // Might want to peel but do nothing else
+ phase->do_peeling(this,old_new);
+ }
+}
+
+
+//=============================================================================
+//------------------------------iteration_split--------------------------------
+void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
+ // Recursively iteration split nested loops
+ if( _child ) _child->iteration_split( phase, old_new );
+
+ // Clean out prior deadwood
+ DCE_loop_body();
+
+
+ // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
+ // Replace with a 1-in-10 exit guess.
+ if( _parent /*not the root loop*/ &&
+ !_irreducible &&
+ // Also ignore the occasional dead backedge
+ !tail()->is_top() ) {
+ adjust_loop_exit_prob(phase);
+ }
+
+
+ // Gate unrolling, RCE and peeling efforts.
+ if( !_child && // If not an inner loop, do not split
+ !_irreducible &&
+ !tail()->is_top() ) { // Also ignore the occasional dead backedge
+ if (!_has_call) {
+ iteration_split_impl( phase, old_new );
+ } else if (policy_unswitching(phase)) {
+ phase->do_unswitching(this, old_new);
+ }
+ }
+
+ // Minor offset re-organization to remove loop-fallout uses of
+ // trip counter.
+ if( _head->is_CountedLoop() ) phase->reorg_offsets( this );
+ if( _next ) _next->iteration_split( phase, old_new );
+}
diff --git a/src/share/vm/opto/loopUnswitch.cpp b/src/share/vm/opto/loopUnswitch.cpp
new file mode 100644
index 000000000..fcba517e8
--- /dev/null
+++ b/src/share/vm/opto/loopUnswitch.cpp
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopUnswitch.cpp.incl"
+
+//================= Loop Unswitching =====================
+//
+// orig: transformed:
+// if (invariant-test) then
+// loop loop
+// stmt1 stmt1
+// if (invariant-test) then stmt2
+// stmt2 stmt4
+// else endloop
+// stmt3 else
+// endif loop [clone]
+// stmt4 stmt1 [clone]
+// endloop stmt3
+// stmt4 [clone]
+// endloop
+// endif
+//
+// Note: the "else" clause may be empty
+
+//------------------------------policy_unswitching-----------------------------
+// Return TRUE or FALSE if the loop should be unswitched
+// (ie. clone loop with an invariant test that does not exit the loop)
+bool IdealLoopTree::policy_unswitching( PhaseIdealLoop *phase ) const {
+ if( !LoopUnswitching ) {
+ return false;
+ }
+ uint nodes_left = MaxNodeLimit - phase->C->unique();
+ if (2 * _body.size() > nodes_left) {
+ return false; // Too speculative if running low on nodes.
+ }
+ LoopNode* head = _head->as_Loop();
+ if (head->unswitch_count() + 1 > head->unswitch_max()) {
+ return false;
+ }
+ return phase->find_unswitching_candidate(this) != NULL;
+}
+
+//------------------------------find_unswitching_candidate-----------------------------
+// Find candidate "if" for unswitching
+IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) const {
+
+ // Find first invariant test that doesn't exit the loop
+ LoopNode *head = loop->_head->as_Loop();
+ IfNode* unswitch_iff = NULL;
+ Node* n = head->in(LoopNode::LoopBackControl);
+ while (n != head) {
+ Node* n_dom = idom(n);
+ if (n->is_Region()) {
+ if (n_dom->is_If()) {
+ IfNode* iff = n_dom->as_If();
+ if (iff->in(1)->is_Bool()) {
+ BoolNode* bol = iff->in(1)->as_Bool();
+ if (bol->in(1)->is_Cmp()) {
+ // If condition is invariant and not a loop exit,
+ // then found reason to unswitch.
+ if (loop->is_invariant(bol) && !loop->is_loop_exit(iff)) {
+ unswitch_iff = iff;
+ }
+ }
+ }
+ }
+ }
+ n = n_dom;
+ }
+ return unswitch_iff;
+}
+
+//------------------------------do_unswitching-----------------------------
+// Clone loop with an invariant test (that does not exit) and
+// insert a clone of the test that selects which version to
+// execute.
+void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
+
+ // Find first invariant test that doesn't exit the loop
+ LoopNode *head = loop->_head->as_Loop();
+
+ IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop);
+ assert(unswitch_iff != NULL, "should be at least one");
+
+ // Need to revert back to normal loop
+ if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
+ head->as_CountedLoop()->set_normal_loop();
+ }
+
+ ProjNode* proj_true = create_slow_version_of_loop(loop, old_new);
+
+ assert(proj_true->is_IfTrue() && proj_true->unique_ctrl_out() == head, "by construction");
+
+ // Increment unswitch count
+ LoopNode* head_clone = old_new[head->_idx]->as_Loop();
+ int nct = head->unswitch_count() + 1;
+ head->set_unswitch_count(nct);
+ head_clone->set_unswitch_count(nct);
+
+ // Add test to new "if" outside of loop
+ IfNode* invar_iff = proj_true->in(0)->as_If();
+ Node* invar_iff_c = invar_iff->in(0);
+ BoolNode* bol = unswitch_iff->in(1)->as_Bool();
+ invar_iff->set_req(1, bol);
+ invar_iff->_prob = unswitch_iff->_prob;
+
+ ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj();
+
+ // Hoist invariant casts out of each loop to the appropiate
+ // control projection.
+
+ Node_List worklist;
+
+ for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) {
+ ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj();
+ // Copy to a worklist for easier manipulation
+ for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) {
+ Node* use = proj->fast_out(j);
+ if (use->Opcode() == Op_CheckCastPP && loop->is_invariant(use->in(1))) {
+ worklist.push(use);
+ }
+ }
+ ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj();
+ while (worklist.size() > 0) {
+ Node* use = worklist.pop();
+ Node* nuse = use->clone();
+ nuse->set_req(0, invar_proj);
+ _igvn.hash_delete(use);
+ use->set_req(1, nuse);
+ _igvn._worklist.push(use);
+ register_new_node(nuse, invar_proj);
+ // Same for the clone
+ Node* use_clone = old_new[use->_idx];
+ _igvn.hash_delete(use_clone);
+ use_clone->set_req(1, nuse);
+ _igvn._worklist.push(use_clone);
+ }
+ }
+
+ // Hardwire the control paths in the loops into if(true) and if(false)
+ _igvn.hash_delete(unswitch_iff);
+ short_circuit_if(unswitch_iff, proj_true);
+ _igvn._worklist.push(unswitch_iff);
+
+ IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If();
+ _igvn.hash_delete(unswitch_iff_clone);
+ short_circuit_if(unswitch_iff_clone, proj_false);
+ _igvn._worklist.push(unswitch_iff_clone);
+
+ // Reoptimize loops
+ loop->record_for_igvn();
+ for(int i = loop->_body.size() - 1; i >= 0 ; i--) {
+ Node *n = loop->_body[i];
+ Node *n_clone = old_new[n->_idx];
+ _igvn._worklist.push(n_clone);
+ }
+
+#ifndef PRODUCT
+ if (TraceLoopUnswitching) {
+ tty->print_cr("Loop unswitching orig: %d @ %d new: %d @ %d",
+ head->_idx, unswitch_iff->_idx,
+ old_new[head->_idx]->_idx, unswitch_iff_clone->_idx);
+ }
+#endif
+
+ C->set_major_progress();
+}
+
+//-------------------------create_slow_version_of_loop------------------------
+// Create a slow version of the loop by cloning the loop
+// and inserting an if to select fast-slow versions.
+// Return control projection of the entry to the fast version.
+ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
+ Node_List &old_new) {
+ LoopNode* head = loop->_head->as_Loop();
+ Node* entry = head->in(LoopNode::EntryControl);
+ _igvn.hash_delete(entry);
+ _igvn._worklist.push(entry);
+ IdealLoopTree* outer_loop = loop->_parent;
+
+ Node *cont = _igvn.intcon(1);
+ set_ctrl(cont, C->root());
+ Node* opq = new (C, 2) Opaque1Node(cont);
+ register_node(opq, outer_loop, entry, dom_depth(entry));
+ Node *bol = new (C, 2) Conv2BNode(opq);
+ register_node(bol, outer_loop, entry, dom_depth(entry));
+ IfNode* iff = new (C, 2) IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
+ register_node(iff, outer_loop, entry, dom_depth(entry));
+ ProjNode* iffast = new (C, 1) IfTrueNode(iff);
+ register_node(iffast, outer_loop, iff, dom_depth(iff));
+ ProjNode* ifslow = new (C, 1) IfFalseNode(iff);
+ register_node(ifslow, outer_loop, iff, dom_depth(iff));
+
+ // Clone the loop body. The clone becomes the fast loop. The
+ // original pre-header will (illegally) have 2 control users (old & new loops).
+ clone_loop(loop, old_new, dom_depth(head), iff);
+ assert(old_new[head->_idx]->is_Loop(), "" );
+
+ // Fast (true) control
+ _igvn.hash_delete(head);
+ head->set_req(LoopNode::EntryControl, iffast);
+ set_idom(head, iffast, dom_depth(head));
+ _igvn._worklist.push(head);
+
+ // Slow (false) control
+ LoopNode* slow_head = old_new[head->_idx]->as_Loop();
+ _igvn.hash_delete(slow_head);
+ slow_head->set_req(LoopNode::EntryControl, ifslow);
+ set_idom(slow_head, ifslow, dom_depth(slow_head));
+ _igvn._worklist.push(slow_head);
+
+ recompute_dom_depth();
+
+ return iffast;
+}
diff --git a/src/share/vm/opto/loopnode.cpp b/src/share/vm/opto/loopnode.cpp
new file mode 100644
index 000000000..938862f0f
--- /dev/null
+++ b/src/share/vm/opto/loopnode.cpp
@@ -0,0 +1,2886 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopnode.cpp.incl"
+
+//=============================================================================
+//------------------------------is_loop_iv-------------------------------------
+// Determine if a node is Counted loop induction variable.
+// The method is declared in node.hpp.
+const Node* Node::is_loop_iv() const {
+ if (this->is_Phi() && !this->as_Phi()->is_copy() &&
+ this->as_Phi()->region()->is_CountedLoop() &&
+ this->as_Phi()->region()->as_CountedLoop()->phi() == this) {
+ return this;
+ } else {
+ return NULL;
+ }
+}
+
+//=============================================================================
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void LoopNode::dump_spec(outputStream *st) const {
+ if( is_inner_loop () ) st->print( "inner " );
+ if( is_partial_peel_loop () ) st->print( "partial_peel " );
+ if( partial_peel_has_failed () ) st->print( "partial_peel_failed " );
+}
+#endif
+
+//------------------------------get_early_ctrl---------------------------------
+// Compute earliest legal control
+Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
+ assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
+ uint i;
+ Node *early;
+ if( n->in(0) ) {
+ early = n->in(0);
+ if( !early->is_CFG() ) // Might be a non-CFG multi-def
+ early = get_ctrl(early); // So treat input as a straight data input
+ i = 1;
+ } else {
+ early = get_ctrl(n->in(1));
+ i = 2;
+ }
+ uint e_d = dom_depth(early);
+ assert( early, "" );
+ for( ; i < n->req(); i++ ) {
+ Node *cin = get_ctrl(n->in(i));
+ assert( cin, "" );
+ // Keep deepest dominator depth
+ uint c_d = dom_depth(cin);
+ if( c_d > e_d ) { // Deeper guy?
+ early = cin; // Keep deepest found so far
+ e_d = c_d;
+ } else if( c_d == e_d && // Same depth?
+ early != cin ) { // If not equal, must use slower algorithm
+ // If same depth but not equal, one _must_ dominate the other
+ // and we want the deeper (i.e., dominated) guy.
+ Node *n1 = early;
+ Node *n2 = cin;
+ while( 1 ) {
+ n1 = idom(n1); // Walk up until break cycle
+ n2 = idom(n2);
+ if( n1 == cin || // Walked early up to cin
+ dom_depth(n2) < c_d )
+ break; // early is deeper; keep him
+ if( n2 == early || // Walked cin up to early
+ dom_depth(n1) < c_d ) {
+ early = cin; // cin is deeper; keep him
+ break;
+ }
+ }
+ e_d = dom_depth(early); // Reset depth register cache
+ }
+ }
+
+ // Return earliest legal location
+ assert(early == find_non_split_ctrl(early), "unexpected early control");
+
+ return early;
+}
+
+//------------------------------set_early_ctrl---------------------------------
+// Set earliest legal control
+void PhaseIdealLoop::set_early_ctrl( Node *n ) {
+ Node *early = get_early_ctrl(n);
+
+ // Record earliest legal location
+ set_ctrl(n, early);
+}
+
+//------------------------------set_subtree_ctrl-------------------------------
+// set missing _ctrl entries on new nodes
+void PhaseIdealLoop::set_subtree_ctrl( Node *n ) {
+ // Already set? Get out.
+ if( _nodes[n->_idx] ) return;
+ // Recursively set _nodes array to indicate where the Node goes
+ uint i;
+ for( i = 0; i < n->req(); ++i ) {
+ Node *m = n->in(i);
+ if( m && m != C->root() )
+ set_subtree_ctrl( m );
+ }
+
+ // Fixup self
+ set_early_ctrl( n );
+}
+
+//------------------------------is_counted_loop--------------------------------
+Node *PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
+ PhaseGVN *gvn = &_igvn;
+
+ // Counted loop head must be a good RegionNode with only 3 not NULL
+ // control input edges: Self, Entry, LoopBack.
+ if ( x->in(LoopNode::Self) == NULL || x->req() != 3 )
+ return NULL;
+
+ Node *init_control = x->in(LoopNode::EntryControl);
+ Node *back_control = x->in(LoopNode::LoopBackControl);
+ if( init_control == NULL || back_control == NULL ) // Partially dead
+ return NULL;
+ // Must also check for TOP when looking for a dead loop
+ if( init_control->is_top() || back_control->is_top() )
+ return NULL;
+
+ // Allow funny placement of Safepoint
+ if( back_control->Opcode() == Op_SafePoint )
+ back_control = back_control->in(TypeFunc::Control);
+
+ // Controlling test for loop
+ Node *iftrue = back_control;
+ uint iftrue_op = iftrue->Opcode();
+ if( iftrue_op != Op_IfTrue &&
+ iftrue_op != Op_IfFalse )
+ // I have a weird back-control. Probably the loop-exit test is in
+ // the middle of the loop and I am looking at some trailing control-flow
+ // merge point. To fix this I would have to partially peel the loop.
+ return NULL; // Obscure back-control
+
+ // Get boolean guarding loop-back test
+ Node *iff = iftrue->in(0);
+ if( get_loop(iff) != loop || !iff->in(1)->is_Bool() ) return NULL;
+ BoolNode *test = iff->in(1)->as_Bool();
+ BoolTest::mask bt = test->_test._test;
+ float cl_prob = iff->as_If()->_prob;
+ if( iftrue_op == Op_IfFalse ) {
+ bt = BoolTest(bt).negate();
+ cl_prob = 1.0 - cl_prob;
+ }
+ // Get backedge compare
+ Node *cmp = test->in(1);
+ int cmp_op = cmp->Opcode();
+ if( cmp_op != Op_CmpI )
+ return NULL; // Avoid pointer & float compares
+
+ // Find the trip-counter increment & limit. Limit must be loop invariant.
+ Node *incr = cmp->in(1);
+ Node *limit = cmp->in(2);
+
+ // ---------
+ // need 'loop()' test to tell if limit is loop invariant
+ // ---------
+
+ if( !is_member( loop, get_ctrl(incr) ) ) { // Swapped trip counter and limit?
+ Node *tmp = incr; // Then reverse order into the CmpI
+ incr = limit;
+ limit = tmp;
+ bt = BoolTest(bt).commute(); // And commute the exit test
+ }
+ if( is_member( loop, get_ctrl(limit) ) ) // Limit must loop-invariant
+ return NULL;
+
+ // Trip-counter increment must be commutative & associative.
+ uint incr_op = incr->Opcode();
+ if( incr_op == Op_Phi && incr->req() == 3 ) {
+ incr = incr->in(2); // Assume incr is on backedge of Phi
+ incr_op = incr->Opcode();
+ }
+ Node* trunc1 = NULL;
+ Node* trunc2 = NULL;
+ const TypeInt* iv_trunc_t = NULL;
+ if (!(incr = CountedLoopNode::match_incr_with_optional_truncation(incr, &trunc1, &trunc2, &iv_trunc_t))) {
+ return NULL; // Funny increment opcode
+ }
+
+ // Get merge point
+ Node *xphi = incr->in(1);
+ Node *stride = incr->in(2);
+ if( !stride->is_Con() ) { // Oops, swap these
+ if( !xphi->is_Con() ) // Is the other guy a constant?
+ return NULL; // Nope, unknown stride, bail out
+ Node *tmp = xphi; // 'incr' is commutative, so ok to swap
+ xphi = stride;
+ stride = tmp;
+ }
+ //if( loop(xphi) != l) return NULL;// Merge point is in inner loop??
+ if( !xphi->is_Phi() ) return NULL; // Too much math on the trip counter
+ PhiNode *phi = xphi->as_Phi();
+
+ // Stride must be constant
+ const Type *stride_t = stride->bottom_type();
+ int stride_con = stride_t->is_int()->get_con();
+ assert( stride_con, "missed some peephole opt" );
+
+ // Phi must be of loop header; backedge must wrap to increment
+ if( phi->region() != x ) return NULL;
+ if( trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr ||
+ trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1 ) {
+ return NULL;
+ }
+ Node *init_trip = phi->in(LoopNode::EntryControl);
+ //if (!init_trip->is_Con()) return NULL; // avoid rolling over MAXINT/MININT
+
+ // If iv trunc type is smaller than int, check for possible wrap.
+ if (!TypeInt::INT->higher_equal(iv_trunc_t)) {
+ assert(trunc1 != NULL, "must have found some truncation");
+
+ // Get a better type for the phi (filtered thru if's)
+ const TypeInt* phi_ft = filtered_type(phi);
+
+ // Can iv take on a value that will wrap?
+ //
+ // Ensure iv's limit is not within "stride" of the wrap value.
+ //
+ // Example for "short" type
+ // Truncation ensures value is in the range -32768..32767 (iv_trunc_t)
+ // If the stride is +10, then the last value of the induction
+ // variable before the increment (phi_ft->_hi) must be
+ // <= 32767 - 10 and (phi_ft->_lo) must be >= -32768 to
+ // ensure no truncation occurs after the increment.
+
+ if (stride_con > 0) {
+ if (iv_trunc_t->_hi - phi_ft->_hi < stride_con ||
+ iv_trunc_t->_lo > phi_ft->_lo) {
+ return NULL; // truncation may occur
+ }
+ } else if (stride_con < 0) {
+ if (iv_trunc_t->_lo - phi_ft->_lo > stride_con ||
+ iv_trunc_t->_hi < phi_ft->_hi) {
+ return NULL; // truncation may occur
+ }
+ }
+ // No possibility of wrap so truncation can be discarded
+ // Promote iv type to Int
+ } else {
+ assert(trunc1 == NULL && trunc2 == NULL, "no truncation for int");
+ }
+
+ // =================================================
+ // ---- SUCCESS! Found A Trip-Counted Loop! -----
+ //
+ // Canonicalize the condition on the test. If we can exactly determine
+ // the trip-counter exit value, then set limit to that value and use
+ // a '!=' test. Otherwise use conditon '<' for count-up loops and
+ // '>' for count-down loops. If the condition is inverted and we will
+ // be rolling through MININT to MAXINT, then bail out.
+
+ C->print_method("Before CountedLoop", 3);
+
+ // Check for SafePoint on backedge and remove
+ Node *sfpt = x->in(LoopNode::LoopBackControl);
+ if( sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
+ lazy_replace( sfpt, iftrue );
+ loop->_tail = iftrue;
+ }
+
+
+ // If compare points to incr, we are ok. Otherwise the compare
+ // can directly point to the phi; in this case adjust the compare so that
+ // it points to the incr by adusting the limit.
+ if( cmp->in(1) == phi || cmp->in(2) == phi )
+ limit = gvn->transform(new (C, 3) AddINode(limit,stride));
+
+ // trip-count for +-tive stride should be: (limit - init_trip + stride - 1)/stride.
+ // Final value for iterator should be: trip_count * stride + init_trip.
+ const Type *limit_t = limit->bottom_type();
+ const Type *init_t = init_trip->bottom_type();
+ Node *one_p = gvn->intcon( 1);
+ Node *one_m = gvn->intcon(-1);
+
+ Node *trip_count = NULL;
+ Node *hook = new (C, 6) Node(6);
+ switch( bt ) {
+ case BoolTest::eq:
+ return NULL; // Bail out, but this loop trips at most twice!
+ case BoolTest::ne: // Ahh, the case we desire
+ if( stride_con == 1 )
+ trip_count = gvn->transform(new (C, 3) SubINode(limit,init_trip));
+ else if( stride_con == -1 )
+ trip_count = gvn->transform(new (C, 3) SubINode(init_trip,limit));
+ else
+ return NULL; // Odd stride; must prove we hit limit exactly
+ set_subtree_ctrl( trip_count );
+ //_loop.map(trip_count->_idx,loop(limit));
+ break;
+ case BoolTest::le: // Maybe convert to '<' case
+ limit = gvn->transform(new (C, 3) AddINode(limit,one_p));
+ set_subtree_ctrl( limit );
+ hook->init_req(4, limit);
+
+ bt = BoolTest::lt;
+ // Make the new limit be in the same loop nest as the old limit
+ //_loop.map(limit->_idx,limit_loop);
+ // Fall into next case
+ case BoolTest::lt: { // Maybe convert to '!=' case
+ if( stride_con < 0 ) return NULL; // Count down loop rolls through MAXINT
+ Node *range = gvn->transform(new (C, 3) SubINode(limit,init_trip));
+ set_subtree_ctrl( range );
+ hook->init_req(0, range);
+
+ Node *bias = gvn->transform(new (C, 3) AddINode(range,stride));
+ set_subtree_ctrl( bias );
+ hook->init_req(1, bias);
+
+ Node *bias1 = gvn->transform(new (C, 3) AddINode(bias,one_m));
+ set_subtree_ctrl( bias1 );
+ hook->init_req(2, bias1);
+
+ trip_count = gvn->transform(new (C, 3) DivINode(0,bias1,stride));
+ set_subtree_ctrl( trip_count );
+ hook->init_req(3, trip_count);
+ break;
+ }
+
+ case BoolTest::ge: // Maybe convert to '>' case
+ limit = gvn->transform(new (C, 3) AddINode(limit,one_m));
+ set_subtree_ctrl( limit );
+ hook->init_req(4 ,limit);
+
+ bt = BoolTest::gt;
+ // Make the new limit be in the same loop nest as the old limit
+ //_loop.map(limit->_idx,limit_loop);
+ // Fall into next case
+ case BoolTest::gt: { // Maybe convert to '!=' case
+ if( stride_con > 0 ) return NULL; // count up loop rolls through MININT
+ Node *range = gvn->transform(new (C, 3) SubINode(limit,init_trip));
+ set_subtree_ctrl( range );
+ hook->init_req(0, range);
+
+ Node *bias = gvn->transform(new (C, 3) AddINode(range,stride));
+ set_subtree_ctrl( bias );
+ hook->init_req(1, bias);
+
+ Node *bias1 = gvn->transform(new (C, 3) AddINode(bias,one_p));
+ set_subtree_ctrl( bias1 );
+ hook->init_req(2, bias1);
+
+ trip_count = gvn->transform(new (C, 3) DivINode(0,bias1,stride));
+ set_subtree_ctrl( trip_count );
+ hook->init_req(3, trip_count);
+ break;
+ }
+ }
+
+ Node *span = gvn->transform(new (C, 3) MulINode(trip_count,stride));
+ set_subtree_ctrl( span );
+ hook->init_req(5, span);
+
+ limit = gvn->transform(new (C, 3) AddINode(span,init_trip));
+ set_subtree_ctrl( limit );
+
+ // Build a canonical trip test.
+ // Clone code, as old values may be in use.
+ incr = incr->clone();
+ incr->set_req(1,phi);
+ incr->set_req(2,stride);
+ incr = _igvn.register_new_node_with_optimizer(incr);
+ set_early_ctrl( incr );
+ _igvn.hash_delete(phi);
+ phi->set_req_X( LoopNode::LoopBackControl, incr, &_igvn );
+
+ // If phi type is more restrictive than Int, raise to
+ // Int to prevent (almost) infinite recursion in igvn
+ // which can only handle integer types for constants or minint..maxint.
+ if (!TypeInt::INT->higher_equal(phi->bottom_type())) {
+ Node* nphi = PhiNode::make(phi->in(0), phi->in(LoopNode::EntryControl), TypeInt::INT);
+ nphi->set_req(LoopNode::LoopBackControl, phi->in(LoopNode::LoopBackControl));
+ nphi = _igvn.register_new_node_with_optimizer(nphi);
+ set_ctrl(nphi, get_ctrl(phi));
+ _igvn.subsume_node(phi, nphi);
+ phi = nphi->as_Phi();
+ }
+ cmp = cmp->clone();
+ cmp->set_req(1,incr);
+ cmp->set_req(2,limit);
+ cmp = _igvn.register_new_node_with_optimizer(cmp);
+ set_ctrl(cmp, iff->in(0));
+
+ Node *tmp = test->clone();
+ assert( tmp->is_Bool(), "" );
+ test = (BoolNode*)tmp;
+ (*(BoolTest*)&test->_test)._test = bt; //BoolTest::ne;
+ test->set_req(1,cmp);
+ _igvn.register_new_node_with_optimizer(test);
+ set_ctrl(test, iff->in(0));
+ // If the exit test is dead, STOP!
+ if( test == NULL ) return NULL;
+ _igvn.hash_delete(iff);
+ iff->set_req_X( 1, test, &_igvn );
+
+ // Replace the old IfNode with a new LoopEndNode
+ Node *lex = _igvn.register_new_node_with_optimizer(new (C, 2) CountedLoopEndNode( iff->in(0), iff->in(1), cl_prob, iff->as_If()->_fcnt ));
+ IfNode *le = lex->as_If();
+ uint dd = dom_depth(iff);
+ set_idom(le, le->in(0), dd); // Update dominance for loop exit
+ set_loop(le, loop);
+
+ // Get the loop-exit control
+ Node *if_f = iff->as_If()->proj_out(!(iftrue_op == Op_IfTrue));
+
+ // Need to swap loop-exit and loop-back control?
+ if( iftrue_op == Op_IfFalse ) {
+ Node *ift2=_igvn.register_new_node_with_optimizer(new (C, 1) IfTrueNode (le));
+ Node *iff2=_igvn.register_new_node_with_optimizer(new (C, 1) IfFalseNode(le));
+
+ loop->_tail = back_control = ift2;
+ set_loop(ift2, loop);
+ set_loop(iff2, get_loop(if_f));
+
+ // Lazy update of 'get_ctrl' mechanism.
+ lazy_replace_proj( if_f , iff2 );
+ lazy_replace_proj( iftrue, ift2 );
+
+ // Swap names
+ if_f = iff2;
+ iftrue = ift2;
+ } else {
+ _igvn.hash_delete(if_f );
+ _igvn.hash_delete(iftrue);
+ if_f ->set_req_X( 0, le, &_igvn );
+ iftrue->set_req_X( 0, le, &_igvn );
+ }
+
+ set_idom(iftrue, le, dd+1);
+ set_idom(if_f, le, dd+1);
+
+ // Now setup a new CountedLoopNode to replace the existing LoopNode
+ CountedLoopNode *l = new (C, 3) CountedLoopNode(init_control, back_control);
+ // The following assert is approximately true, and defines the intention
+ // of can_be_counted_loop. It fails, however, because phase->type
+ // is not yet initialized for this loop and its parts.
+ //assert(l->can_be_counted_loop(this), "sanity");
+ _igvn.register_new_node_with_optimizer(l);
+ set_loop(l, loop);
+ loop->_head = l;
+ // Fix all data nodes placed at the old loop head.
+ // Uses the lazy-update mechanism of 'get_ctrl'.
+ lazy_replace( x, l );
+ set_idom(l, init_control, dom_depth(x));
+
+ // Check for immediately preceeding SafePoint and remove
+ Node *sfpt2 = le->in(0);
+ if( sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2))
+ lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
+
+ // Free up intermediate goo
+ _igvn.remove_dead_node(hook);
+
+ C->print_method("After CountedLoop", 3);
+
+ // Return trip counter
+ return trip_count;
+}
+
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Attempt to convert into a counted-loop.
+Node *LoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (!can_be_counted_loop(phase)) {
+ phase->C->set_major_progress();
+ }
+ return RegionNode::Ideal(phase, can_reshape);
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Attempt to convert into a counted-loop.
+Node *CountedLoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ return RegionNode::Ideal(phase, can_reshape);
+}
+
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void CountedLoopNode::dump_spec(outputStream *st) const {
+ LoopNode::dump_spec(st);
+ if( stride_is_con() ) {
+ st->print("stride: %d ",stride_con());
+ } else {
+ st->print("stride: not constant ");
+ }
+ if( is_pre_loop () ) st->print("pre of N%d" , _main_idx );
+ if( is_main_loop() ) st->print("main of N%d", _idx );
+ if( is_post_loop() ) st->print("post of N%d", _main_idx );
+}
+#endif
+
+//=============================================================================
+int CountedLoopEndNode::stride_con() const {
+ return stride()->bottom_type()->is_int()->get_con();
+}
+
+
+//----------------------match_incr_with_optional_truncation--------------------
+// Match increment with optional truncation:
+// CHAR: (i+1)&0x7fff, BYTE: ((i+1)<<8)>>8, or SHORT: ((i+1)<<16)>>16
+// Return NULL for failure. Success returns the increment node.
+Node* CountedLoopNode::match_incr_with_optional_truncation(
+ Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type) {
+ // Quick cutouts:
+ if (expr == NULL || expr->req() != 3) return false;
+
+ Node *t1 = NULL;
+ Node *t2 = NULL;
+ const TypeInt* trunc_t = TypeInt::INT;
+ Node* n1 = expr;
+ int n1op = n1->Opcode();
+
+ // Try to strip (n1 & M) or (n1 << N >> N) from n1.
+ if (n1op == Op_AndI &&
+ n1->in(2)->is_Con() &&
+ n1->in(2)->bottom_type()->is_int()->get_con() == 0x7fff) {
+ // %%% This check should match any mask of 2**K-1.
+ t1 = n1;
+ n1 = t1->in(1);
+ n1op = n1->Opcode();
+ trunc_t = TypeInt::CHAR;
+ } else if (n1op == Op_RShiftI &&
+ n1->in(1) != NULL &&
+ n1->in(1)->Opcode() == Op_LShiftI &&
+ n1->in(2) == n1->in(1)->in(2) &&
+ n1->in(2)->is_Con()) {
+ jint shift = n1->in(2)->bottom_type()->is_int()->get_con();
+ // %%% This check should match any shift in [1..31].
+ if (shift == 16 || shift == 8) {
+ t1 = n1;
+ t2 = t1->in(1);
+ n1 = t2->in(1);
+ n1op = n1->Opcode();
+ if (shift == 16) {
+ trunc_t = TypeInt::SHORT;
+ } else if (shift == 8) {
+ trunc_t = TypeInt::BYTE;
+ }
+ }
+ }
+
+ // If (maybe after stripping) it is an AddI, we won:
+ if (n1op == Op_AddI) {
+ *trunc1 = t1;
+ *trunc2 = t2;
+ *trunc_type = trunc_t;
+ return n1;
+ }
+
+ // failed
+ return NULL;
+}
+
+
+//------------------------------filtered_type--------------------------------
+// Return a type based on condition control flow
+// A successful return will be a type that is restricted due
+// to a series of dominating if-tests, such as:
+// if (i < 10) {
+// if (i > 0) {
+// here: "i" type is [1..10)
+// }
+// }
+// or a control flow merge
+// if (i < 10) {
+// do {
+// phi( , ) -- at top of loop type is [min_int..10)
+// i = ?
+// } while ( i < 10)
+//
+const TypeInt* PhaseIdealLoop::filtered_type( Node *n, Node* n_ctrl) {
+ assert(n && n->bottom_type()->is_int(), "must be int");
+ const TypeInt* filtered_t = NULL;
+ if (!n->is_Phi()) {
+ assert(n_ctrl != NULL || n_ctrl == C->top(), "valid control");
+ filtered_t = filtered_type_from_dominators(n, n_ctrl);
+
+ } else {
+ Node* phi = n->as_Phi();
+ Node* region = phi->in(0);
+ assert(n_ctrl == NULL || n_ctrl == region, "ctrl parameter must be region");
+ if (region && region != C->top()) {
+ for (uint i = 1; i < phi->req(); i++) {
+ Node* val = phi->in(i);
+ Node* use_c = region->in(i);
+ const TypeInt* val_t = filtered_type_from_dominators(val, use_c);
+ if (val_t != NULL) {
+ if (filtered_t == NULL) {
+ filtered_t = val_t;
+ } else {
+ filtered_t = filtered_t->meet(val_t)->is_int();
+ }
+ }
+ }
+ }
+ }
+ const TypeInt* n_t = _igvn.type(n)->is_int();
+ if (filtered_t != NULL) {
+ n_t = n_t->join(filtered_t)->is_int();
+ }
+ return n_t;
+}
+
+
+//------------------------------filtered_type_from_dominators--------------------------------
+// Return a possibly more restrictive type for val based on condition control flow of dominators
+const TypeInt* PhaseIdealLoop::filtered_type_from_dominators( Node* val, Node *use_ctrl) {
+ if (val->is_Con()) {
+ return val->bottom_type()->is_int();
+ }
+ uint if_limit = 10; // Max number of dominating if's visited
+ const TypeInt* rtn_t = NULL;
+
+ if (use_ctrl && use_ctrl != C->top()) {
+ Node* val_ctrl = get_ctrl(val);
+ uint val_dom_depth = dom_depth(val_ctrl);
+ Node* pred = use_ctrl;
+ uint if_cnt = 0;
+ while (if_cnt < if_limit) {
+ if ((pred->Opcode() == Op_IfTrue || pred->Opcode() == Op_IfFalse)) {
+ if_cnt++;
+ const TypeInt* if_t = filtered_type_at_if(val, pred);
+ if (if_t != NULL) {
+ if (rtn_t == NULL) {
+ rtn_t = if_t;
+ } else {
+ rtn_t = rtn_t->join(if_t)->is_int();
+ }
+ }
+ }
+ pred = idom(pred);
+ if (pred == NULL || pred == C->top()) {
+ break;
+ }
+ // Stop if going beyond definition block of val
+ if (dom_depth(pred) < val_dom_depth) {
+ break;
+ }
+ }
+ }
+ return rtn_t;
+}
+
+
+//------------------------------filtered_type_at_if--------------------------------
+// Return a possibly more restrictive type for val based on condition control flow for an if
+const TypeInt* PhaseIdealLoop::filtered_type_at_if( Node* val, Node *if_proj) {
+ assert(if_proj &&
+ (if_proj->Opcode() == Op_IfTrue || if_proj->Opcode() == Op_IfFalse), "expecting an if projection");
+ if (if_proj->in(0) && if_proj->in(0)->is_If()) {
+ IfNode* iff = if_proj->in(0)->as_If();
+ if (iff->in(1) && iff->in(1)->is_Bool()) {
+ BoolNode* bol = iff->in(1)->as_Bool();
+ if (bol->in(1) && bol->in(1)->is_Cmp()) {
+ const CmpNode* cmp = bol->in(1)->as_Cmp();
+ if (cmp->in(1) == val) {
+ const TypeInt* cmp2_t = _igvn.type(cmp->in(2))->isa_int();
+ if (cmp2_t != NULL) {
+ jint lo = cmp2_t->_lo;
+ jint hi = cmp2_t->_hi;
+ BoolTest::mask msk = if_proj->Opcode() == Op_IfTrue ? bol->_test._test : bol->_test.negate();
+ switch (msk) {
+ case BoolTest::ne:
+ // Can't refine type
+ return NULL;
+ case BoolTest::eq:
+ return cmp2_t;
+ case BoolTest::lt:
+ lo = TypeInt::INT->_lo;
+ if (hi - 1 < hi) {
+ hi = hi - 1;
+ }
+ break;
+ case BoolTest::le:
+ lo = TypeInt::INT->_lo;
+ break;
+ case BoolTest::gt:
+ if (lo + 1 > lo) {
+ lo = lo + 1;
+ }
+ hi = TypeInt::INT->_hi;
+ break;
+ case BoolTest::ge:
+ // lo unchanged
+ hi = TypeInt::INT->_hi;
+ break;
+ }
+ const TypeInt* rtn_t = TypeInt::make(lo, hi, cmp2_t->_widen);
+ return rtn_t;
+ }
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void CountedLoopEndNode::dump_spec(outputStream *st) const {
+ if( in(TestValue)->is_Bool() ) {
+ BoolTest bt( test_trip()); // Added this for g++.
+
+ st->print("[");
+ bt.dump_on(st);
+ st->print("]");
+ }
+ st->print(" ");
+ IfNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+//------------------------------is_member--------------------------------------
+// Is 'l' a member of 'this'?
+int IdealLoopTree::is_member( const IdealLoopTree *l ) const {
+ while( l->_nest > _nest ) l = l->_parent;
+ return l == this;
+}
+
+//------------------------------set_nest---------------------------------------
+// Set loop tree nesting depth. Accumulate _has_call bits.
+int IdealLoopTree::set_nest( uint depth ) {
+ _nest = depth;
+ int bits = _has_call;
+ if( _child ) bits |= _child->set_nest(depth+1);
+ if( bits ) _has_call = 1;
+ if( _next ) bits |= _next ->set_nest(depth );
+ return bits;
+}
+
+//------------------------------split_fall_in----------------------------------
+// Split out multiple fall-in edges from the loop header. Move them to a
+// private RegionNode before the loop. This becomes the loop landing pad.
+void IdealLoopTree::split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ) {
+ PhaseIterGVN &igvn = phase->_igvn;
+ uint i;
+
+ // Make a new RegionNode to be the landing pad.
+ Node *landing_pad = new (phase->C, fall_in_cnt+1) RegionNode( fall_in_cnt+1 );
+ phase->set_loop(landing_pad,_parent);
+ // Gather all the fall-in control paths into the landing pad
+ uint icnt = fall_in_cnt;
+ uint oreq = _head->req();
+ for( i = oreq-1; i>0; i-- )
+ if( !phase->is_member( this, _head->in(i) ) )
+ landing_pad->set_req(icnt--,_head->in(i));
+
+ // Peel off PhiNode edges as well
+ for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
+ Node *oj = _head->fast_out(j);
+ if( oj->is_Phi() ) {
+ PhiNode* old_phi = oj->as_Phi();
+ assert( old_phi->region() == _head, "" );
+ igvn.hash_delete(old_phi); // Yank from hash before hacking edges
+ Node *p = PhiNode::make_blank(landing_pad, old_phi);
+ uint icnt = fall_in_cnt;
+ for( i = oreq-1; i>0; i-- ) {
+ if( !phase->is_member( this, _head->in(i) ) ) {
+ p->init_req(icnt--, old_phi->in(i));
+ // Go ahead and clean out old edges from old phi
+ old_phi->del_req(i);
+ }
+ }
+ // Search for CSE's here, because ZKM.jar does a lot of
+ // loop hackery and we need to be a little incremental
+ // with the CSE to avoid O(N^2) node blow-up.
+ Node *p2 = igvn.hash_find_insert(p); // Look for a CSE
+ if( p2 ) { // Found CSE
+ p->destruct(); // Recover useless new node
+ p = p2; // Use old node
+ } else {
+ igvn.register_new_node_with_optimizer(p, old_phi);
+ }
+ // Make old Phi refer to new Phi.
+ old_phi->add_req(p);
+ // Check for the special case of making the old phi useless and
+ // disappear it. In JavaGrande I have a case where this useless
+ // Phi is the loop limit and prevents recognizing a CountedLoop
+ // which in turn prevents removing an empty loop.
+ Node *id_old_phi = old_phi->Identity( &igvn );
+ if( id_old_phi != old_phi ) { // Found a simple identity?
+ // Note that I cannot call 'subsume_node' here, because
+ // that will yank the edge from old_phi to the Region and
+ // I'm mid-iteration over the Region's uses.
+ for (DUIterator_Last imin, i = old_phi->last_outs(imin); i >= imin; ) {
+ Node* use = old_phi->last_out(i);
+ igvn.hash_delete(use);
+ igvn._worklist.push(use);
+ uint uses_found = 0;
+ for (uint j = 0; j < use->len(); j++) {
+ if (use->in(j) == old_phi) {
+ if (j < use->req()) use->set_req (j, id_old_phi);
+ else use->set_prec(j, id_old_phi);
+ uses_found++;
+ }
+ }
+ i -= uses_found; // we deleted 1 or more copies of this edge
+ }
+ }
+ igvn._worklist.push(old_phi);
+ }
+ }
+ // Finally clean out the fall-in edges from the RegionNode
+ for( i = oreq-1; i>0; i-- ) {
+ if( !phase->is_member( this, _head->in(i) ) ) {
+ _head->del_req(i);
+ }
+ }
+ // Transform landing pad
+ igvn.register_new_node_with_optimizer(landing_pad, _head);
+ // Insert landing pad into the header
+ _head->add_req(landing_pad);
+}
+
+//------------------------------split_outer_loop-------------------------------
+// Split out the outermost loop from this shared header.
+void IdealLoopTree::split_outer_loop( PhaseIdealLoop *phase ) {
+ PhaseIterGVN &igvn = phase->_igvn;
+
+ // Find index of outermost loop; it should also be my tail.
+ uint outer_idx = 1;
+ while( _head->in(outer_idx) != _tail ) outer_idx++;
+
+ // Make a LoopNode for the outermost loop.
+ Node *ctl = _head->in(LoopNode::EntryControl);
+ Node *outer = new (phase->C, 3) LoopNode( ctl, _head->in(outer_idx) );
+ outer = igvn.register_new_node_with_optimizer(outer, _head);
+ phase->set_created_loop_node();
+ // Outermost loop falls into '_head' loop
+ _head->set_req(LoopNode::EntryControl, outer);
+ _head->del_req(outer_idx);
+ // Split all the Phis up between '_head' loop and 'outer' loop.
+ for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
+ Node *out = _head->fast_out(j);
+ if( out->is_Phi() ) {
+ PhiNode *old_phi = out->as_Phi();
+ assert( old_phi->region() == _head, "" );
+ Node *phi = PhiNode::make_blank(outer, old_phi);
+ phi->init_req(LoopNode::EntryControl, old_phi->in(LoopNode::EntryControl));
+ phi->init_req(LoopNode::LoopBackControl, old_phi->in(outer_idx));
+ phi = igvn.register_new_node_with_optimizer(phi, old_phi);
+ // Make old Phi point to new Phi on the fall-in path
+ igvn.hash_delete(old_phi);
+ old_phi->set_req(LoopNode::EntryControl, phi);
+ old_phi->del_req(outer_idx);
+ igvn._worklist.push(old_phi);
+ }
+ }
+
+ // Use the new loop head instead of the old shared one
+ _head = outer;
+ phase->set_loop(_head, this);
+}
+
+//------------------------------fix_parent-------------------------------------
+static void fix_parent( IdealLoopTree *loop, IdealLoopTree *parent ) {
+ loop->_parent = parent;
+ if( loop->_child ) fix_parent( loop->_child, loop );
+ if( loop->_next ) fix_parent( loop->_next , parent );
+}
+
+//------------------------------estimate_path_freq-----------------------------
+static float estimate_path_freq( Node *n ) {
+ // Try to extract some path frequency info
+ IfNode *iff;
+ for( int i = 0; i < 50; i++ ) { // Skip through a bunch of uncommon tests
+ uint nop = n->Opcode();
+ if( nop == Op_SafePoint ) { // Skip any safepoint
+ n = n->in(0);
+ continue;
+ }
+ if( nop == Op_CatchProj ) { // Get count from a prior call
+ // Assume call does not always throw exceptions: means the call-site
+ // count is also the frequency of the fall-through path.
+ assert( n->is_CatchProj(), "" );
+ if( ((CatchProjNode*)n)->_con != CatchProjNode::fall_through_index )
+ return 0.0f; // Assume call exception path is rare
+ Node *call = n->in(0)->in(0)->in(0);
+ assert( call->is_Call(), "expect a call here" );
+ const JVMState *jvms = ((CallNode*)call)->jvms();
+ ciMethodData* methodData = jvms->method()->method_data();
+ if (!methodData->is_mature()) return 0.0f; // No call-site data
+ ciProfileData* data = methodData->bci_to_data(jvms->bci());
+ if ((data == NULL) || !data->is_CounterData()) {
+ // no call profile available, try call's control input
+ n = n->in(0);
+ continue;
+ }
+ return data->as_CounterData()->count()/FreqCountInvocations;
+ }
+ // See if there's a gating IF test
+ Node *n_c = n->in(0);
+ if( !n_c->is_If() ) break; // No estimate available
+ iff = n_c->as_If();
+ if( iff->_fcnt != COUNT_UNKNOWN ) // Have a valid count?
+ // Compute how much count comes on this path
+ return ((nop == Op_IfTrue) ? iff->_prob : 1.0f - iff->_prob) * iff->_fcnt;
+ // Have no count info. Skip dull uncommon-trap like branches.
+ if( (nop == Op_IfTrue && iff->_prob < PROB_LIKELY_MAG(5)) ||
+ (nop == Op_IfFalse && iff->_prob > PROB_UNLIKELY_MAG(5)) )
+ break;
+ // Skip through never-taken branch; look for a real loop exit.
+ n = iff->in(0);
+ }
+ return 0.0f; // No estimate available
+}
+
+//------------------------------merge_many_backedges---------------------------
+// Merge all the backedges from the shared header into a private Region.
+// Feed that region as the one backedge to this loop.
+void IdealLoopTree::merge_many_backedges( PhaseIdealLoop *phase ) {
+ uint i;
+
+ // Scan for the top 2 hottest backedges
+ float hotcnt = 0.0f;
+ float warmcnt = 0.0f;
+ uint hot_idx = 0;
+ // Loop starts at 2 because slot 1 is the fall-in path
+ for( i = 2; i < _head->req(); i++ ) {
+ float cnt = estimate_path_freq(_head->in(i));
+ if( cnt > hotcnt ) { // Grab hottest path
+ warmcnt = hotcnt;
+ hotcnt = cnt;
+ hot_idx = i;
+ } else if( cnt > warmcnt ) { // And 2nd hottest path
+ warmcnt = cnt;
+ }
+ }
+
+ // See if the hottest backedge is worthy of being an inner loop
+ // by being much hotter than the next hottest backedge.
+ if( hotcnt <= 0.0001 ||
+ hotcnt < 2.0*warmcnt ) hot_idx = 0;// No hot backedge
+
+ // Peel out the backedges into a private merge point; peel
+ // them all except optionally hot_idx.
+ PhaseIterGVN &igvn = phase->_igvn;
+
+ Node *hot_tail = NULL;
+ // Make a Region for the merge point
+ Node *r = new (phase->C, 1) RegionNode(1);
+ for( i = 2; i < _head->req(); i++ ) {
+ if( i != hot_idx )
+ r->add_req( _head->in(i) );
+ else hot_tail = _head->in(i);
+ }
+ igvn.register_new_node_with_optimizer(r, _head);
+ // Plug region into end of loop _head, followed by hot_tail
+ while( _head->req() > 3 ) _head->del_req( _head->req()-1 );
+ _head->set_req(2, r);
+ if( hot_idx ) _head->add_req(hot_tail);
+
+ // Split all the Phis up between '_head' loop and the Region 'r'
+ for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
+ Node *out = _head->fast_out(j);
+ if( out->is_Phi() ) {
+ PhiNode* n = out->as_Phi();
+ igvn.hash_delete(n); // Delete from hash before hacking edges
+ Node *hot_phi = NULL;
+ Node *phi = new (phase->C, r->req()) PhiNode(r, n->type(), n->adr_type());
+ // Check all inputs for the ones to peel out
+ uint j = 1;
+ for( uint i = 2; i < n->req(); i++ ) {
+ if( i != hot_idx )
+ phi->set_req( j++, n->in(i) );
+ else hot_phi = n->in(i);
+ }
+ // Register the phi but do not transform until whole place transforms
+ igvn.register_new_node_with_optimizer(phi, n);
+ // Add the merge phi to the old Phi
+ while( n->req() > 3 ) n->del_req( n->req()-1 );
+ n->set_req(2, phi);
+ if( hot_idx ) n->add_req(hot_phi);
+ }
+ }
+
+
+ // Insert a new IdealLoopTree inserted below me. Turn it into a clone
+ // of self loop tree. Turn self into a loop headed by _head and with
+ // tail being the new merge point.
+ IdealLoopTree *ilt = new IdealLoopTree( phase, _head, _tail );
+ phase->set_loop(_tail,ilt); // Adjust tail
+ _tail = r; // Self's tail is new merge point
+ phase->set_loop(r,this);
+ ilt->_child = _child; // New guy has my children
+ _child = ilt; // Self has new guy as only child
+ ilt->_parent = this; // new guy has self for parent
+ ilt->_nest = _nest; // Same nesting depth (for now)
+
+ // Starting with 'ilt', look for child loop trees using the same shared
+ // header. Flatten these out; they will no longer be loops in the end.
+ IdealLoopTree **pilt = &_child;
+ while( ilt ) {
+ if( ilt->_head == _head ) {
+ uint i;
+ for( i = 2; i < _head->req(); i++ )
+ if( _head->in(i) == ilt->_tail )
+ break; // Still a loop
+ if( i == _head->req() ) { // No longer a loop
+ // Flatten ilt. Hang ilt's "_next" list from the end of
+ // ilt's '_child' list. Move the ilt's _child up to replace ilt.
+ IdealLoopTree **cp = &ilt->_child;
+ while( *cp ) cp = &(*cp)->_next; // Find end of child list
+ *cp = ilt->_next; // Hang next list at end of child list
+ *pilt = ilt->_child; // Move child up to replace ilt
+ ilt->_head = NULL; // Flag as a loop UNIONED into parent
+ ilt = ilt->_child; // Repeat using new ilt
+ continue; // do not advance over ilt->_child
+ }
+ assert( ilt->_tail == hot_tail, "expected to only find the hot inner loop here" );
+ phase->set_loop(_head,ilt);
+ }
+ pilt = &ilt->_child; // Advance to next
+ ilt = *pilt;
+ }
+
+ if( _child ) fix_parent( _child, this );
+}
+
+//------------------------------beautify_loops---------------------------------
+// Split shared headers and insert loop landing pads.
+// Insert a LoopNode to replace the RegionNode.
+// Return TRUE if loop tree is structurally changed.
+bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
+ bool result = false;
+ // Cache parts in locals for easy
+ PhaseIterGVN &igvn = phase->_igvn;
+
+ phase->C->print_method("Before beautify loops", 3);
+
+ igvn.hash_delete(_head); // Yank from hash before hacking edges
+
+ // Check for multiple fall-in paths. Peel off a landing pad if need be.
+ int fall_in_cnt = 0;
+ for( uint i = 1; i < _head->req(); i++ )
+ if( !phase->is_member( this, _head->in(i) ) )
+ fall_in_cnt++;
+ assert( fall_in_cnt, "at least 1 fall-in path" );
+ if( fall_in_cnt > 1 ) // Need a loop landing pad to merge fall-ins
+ split_fall_in( phase, fall_in_cnt );
+
+ // Swap inputs to the _head and all Phis to move the fall-in edge to
+ // the left.
+ fall_in_cnt = 1;
+ while( phase->is_member( this, _head->in(fall_in_cnt) ) )
+ fall_in_cnt++;
+ if( fall_in_cnt > 1 ) {
+ // Since I am just swapping inputs I do not need to update def-use info
+ Node *tmp = _head->in(1);
+ _head->set_req( 1, _head->in(fall_in_cnt) );
+ _head->set_req( fall_in_cnt, tmp );
+ // Swap also all Phis
+ for (DUIterator_Fast imax, i = _head->fast_outs(imax); i < imax; i++) {
+ Node* phi = _head->fast_out(i);
+ if( phi->is_Phi() ) {
+ igvn.hash_delete(phi); // Yank from hash before hacking edges
+ tmp = phi->in(1);
+ phi->set_req( 1, phi->in(fall_in_cnt) );
+ phi->set_req( fall_in_cnt, tmp );
+ }
+ }
+ }
+ assert( !phase->is_member( this, _head->in(1) ), "left edge is fall-in" );
+ assert( phase->is_member( this, _head->in(2) ), "right edge is loop" );
+
+ // If I am a shared header (multiple backedges), peel off the many
+ // backedges into a private merge point and use the merge point as
+ // the one true backedge.
+ if( _head->req() > 3 ) {
+ // Merge the many backedges into a single backedge.
+ merge_many_backedges( phase );
+ result = true;
+ }
+
+ // If I am a shared header (multiple backedges), peel off myself loop.
+ // I better be the outermost loop.
+ if( _head->req() > 3 ) {
+ split_outer_loop( phase );
+ result = true;
+
+ } else if( !_head->is_Loop() && !_irreducible ) {
+ // Make a new LoopNode to replace the old loop head
+ Node *l = new (phase->C, 3) LoopNode( _head->in(1), _head->in(2) );
+ l = igvn.register_new_node_with_optimizer(l, _head);
+ phase->set_created_loop_node();
+ // Go ahead and replace _head
+ phase->_igvn.subsume_node( _head, l );
+ _head = l;
+ phase->set_loop(_head, this);
+ for (DUIterator_Fast imax, i = l->fast_outs(imax); i < imax; i++)
+ phase->_igvn.add_users_to_worklist(l->fast_out(i));
+ }
+
+ phase->C->print_method("After beautify loops", 3);
+
+ // Now recursively beautify nested loops
+ if( _child ) result |= _child->beautify_loops( phase );
+ if( _next ) result |= _next ->beautify_loops( phase );
+ return result;
+}
+
+//------------------------------allpaths_check_safepts----------------------------
+// Allpaths backwards scan from loop tail, terminating each path at first safepoint
+// encountered. Helper for check_safepts.
+void IdealLoopTree::allpaths_check_safepts(VectorSet &visited, Node_List &stack) {
+ assert(stack.size() == 0, "empty stack");
+ stack.push(_tail);
+ visited.Clear();
+ visited.set(_tail->_idx);
+ while (stack.size() > 0) {
+ Node* n = stack.pop();
+ if (n->is_Call() && n->as_Call()->guaranteed_safepoint()) {
+ // Terminate this path
+ } else if (n->Opcode() == Op_SafePoint) {
+ if (_phase->get_loop(n) != this) {
+ if (_required_safept == NULL) _required_safept = new Node_List();
+ _required_safept->push(n); // save the one closest to the tail
+ }
+ // Terminate this path
+ } else {
+ uint start = n->is_Region() ? 1 : 0;
+ uint end = n->is_Region() && !n->is_Loop() ? n->req() : start + 1;
+ for (uint i = start; i < end; i++) {
+ Node* in = n->in(i);
+ assert(in->is_CFG(), "must be");
+ if (!visited.test_set(in->_idx) && is_member(_phase->get_loop(in))) {
+ stack.push(in);
+ }
+ }
+ }
+ }
+}
+
+//------------------------------check_safepts----------------------------
+// Given dominators, try to find loops with calls that must always be
+// executed (call dominates loop tail). These loops do not need non-call
+// safepoints (ncsfpt).
+//
+// A complication is that a safepoint in a inner loop may be needed
+// by an outer loop. In the following, the inner loop sees it has a
+// call (block 3) on every path from the head (block 2) to the
+// backedge (arc 3->2). So it deletes the ncsfpt (non-call safepoint)
+// in block 2, _but_ this leaves the outer loop without a safepoint.
+//
+// entry 0
+// |
+// v
+// outer 1,2 +->1
+// | |
+// | v
+// | 2<---+ ncsfpt in 2
+// |_/|\ |
+// | v |
+// inner 2,3 / 3 | call in 3
+// / | |
+// v +--+
+// exit 4
+//
+//
+// This method creates a list (_required_safept) of ncsfpt nodes that must
+// be protected is created for each loop. When a ncsfpt maybe deleted, it
+// is first looked for in the lists for the outer loops of the current loop.
+//
+// The insights into the problem:
+// A) counted loops are okay
+// B) innermost loops are okay (only an inner loop can delete
+// a ncsfpt needed by an outer loop)
+// C) a loop is immune from an inner loop deleting a safepoint
+// if the loop has a call on the idom-path
+// D) a loop is also immune if it has a ncsfpt (non-call safepoint) on the
+// idom-path that is not in a nested loop
+// E) otherwise, an ncsfpt on the idom-path that is nested in an inner
+// loop needs to be prevented from deletion by an inner loop
+//
+// There are two analyses:
+// 1) The first, and cheaper one, scans the loop body from
+// tail to head following the idom (immediate dominator)
+// chain, looking for the cases (C,D,E) above.
+// Since inner loops are scanned before outer loops, there is summary
+// information about inner loops. Inner loops can be skipped over
+// when the tail of an inner loop is encountered.
+//
+// 2) The second, invoked if the first fails to find a call or ncsfpt on
+// the idom path (which is rare), scans all predecessor control paths
+// from the tail to the head, terminating a path when a call or sfpt
+// is encountered, to find the ncsfpt's that are closest to the tail.
+//
+void IdealLoopTree::check_safepts(VectorSet &visited, Node_List &stack) {
+ // Bottom up traversal
+ IdealLoopTree* ch = _child;
+ while (ch != NULL) {
+ ch->check_safepts(visited, stack);
+ ch = ch->_next;
+ }
+
+ if (!_head->is_CountedLoop() && !_has_sfpt && _parent != NULL && !_irreducible) {
+ bool has_call = false; // call on dom-path
+ bool has_local_ncsfpt = false; // ncsfpt on dom-path at this loop depth
+ Node* nonlocal_ncsfpt = NULL; // ncsfpt on dom-path at a deeper depth
+ // Scan the dom-path nodes from tail to head
+ for (Node* n = tail(); n != _head; n = _phase->idom(n)) {
+ if (n->is_Call() && n->as_Call()->guaranteed_safepoint()) {
+ has_call = true;
+ _has_sfpt = 1; // Then no need for a safept!
+ break;
+ } else if (n->Opcode() == Op_SafePoint) {
+ if (_phase->get_loop(n) == this) {
+ has_local_ncsfpt = true;
+ break;
+ }
+ if (nonlocal_ncsfpt == NULL) {
+ nonlocal_ncsfpt = n; // save the one closest to the tail
+ }
+ } else {
+ IdealLoopTree* nlpt = _phase->get_loop(n);
+ if (this != nlpt) {
+ // If at an inner loop tail, see if the inner loop has already
+ // recorded seeing a call on the dom-path (and stop.) If not,
+ // jump to the head of the inner loop.
+ assert(is_member(nlpt), "nested loop");
+ Node* tail = nlpt->_tail;
+ if (tail->in(0)->is_If()) tail = tail->in(0);
+ if (n == tail) {
+ // If inner loop has call on dom-path, so does outer loop
+ if (nlpt->_has_sfpt) {
+ has_call = true;
+ _has_sfpt = 1;
+ break;
+ }
+ // Skip to head of inner loop
+ assert(_phase->is_dominator(_head, nlpt->_head), "inner head dominated by outer head");
+ n = nlpt->_head;
+ }
+ }
+ }
+ }
+ // Record safept's that this loop needs preserved when an
+ // inner loop attempts to delete it's safepoints.
+ if (_child != NULL && !has_call && !has_local_ncsfpt) {
+ if (nonlocal_ncsfpt != NULL) {
+ if (_required_safept == NULL) _required_safept = new Node_List();
+ _required_safept->push(nonlocal_ncsfpt);
+ } else {
+ // Failed to find a suitable safept on the dom-path. Now use
+ // an all paths walk from tail to head, looking for safepoints to preserve.
+ allpaths_check_safepts(visited, stack);
+ }
+ }
+ }
+}
+
+//---------------------------is_deleteable_safept----------------------------
+// Is safept not required by an outer loop?
+bool PhaseIdealLoop::is_deleteable_safept(Node* sfpt) {
+ assert(sfpt->Opcode() == Op_SafePoint, "");
+ IdealLoopTree* lp = get_loop(sfpt)->_parent;
+ while (lp != NULL) {
+ Node_List* sfpts = lp->_required_safept;
+ if (sfpts != NULL) {
+ for (uint i = 0; i < sfpts->size(); i++) {
+ if (sfpt == sfpts->at(i))
+ return false;
+ }
+ }
+ lp = lp->_parent;
+ }
+ return true;
+}
+
+//------------------------------counted_loop-----------------------------------
+// Convert to counted loops where possible
+void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
+
+ // For grins, set the inner-loop flag here
+ if( !_child ) {
+ if( _head->is_Loop() ) _head->as_Loop()->set_inner_loop();
+ }
+
+ if( _head->is_CountedLoop() ||
+ phase->is_counted_loop( _head, this ) ) {
+ _has_sfpt = 1; // Indicate we do not need a safepoint here
+
+ // Look for a safepoint to remove
+ for (Node* n = tail(); n != _head; n = phase->idom(n))
+ if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this &&
+ phase->is_deleteable_safept(n))
+ phase->lazy_replace(n,n->in(TypeFunc::Control));
+
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ Node *incr = cl->incr();
+ if( !incr ) return; // Dead loop?
+ Node *init = cl->init_trip();
+ Node *phi = cl->phi();
+ // protect against stride not being a constant
+ if( !cl->stride_is_con() ) return;
+ int stride_con = cl->stride_con();
+
+ // Look for induction variables
+
+ // Visit all children, looking for Phis
+ for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
+ Node *out = cl->out(i);
+ if (!out->is_Phi()) continue; // Looking for phis
+ PhiNode* phi2 = out->as_Phi();
+ Node *incr2 = phi2->in( LoopNode::LoopBackControl );
+ // Look for induction variables of the form: X += constant
+ if( phi2->region() != _head ||
+ incr2->req() != 3 ||
+ incr2->in(1) != phi2 ||
+ incr2 == incr ||
+ incr2->Opcode() != Op_AddI ||
+ !incr2->in(2)->is_Con() )
+ continue;
+
+ // Check for parallel induction variable (parallel to trip counter)
+ // via an affine function. In particular, count-down loops with
+ // count-up array indices are common. We only RCE references off
+ // the trip-counter, so we need to convert all these to trip-counter
+ // expressions.
+ Node *init2 = phi2->in( LoopNode::EntryControl );
+ int stride_con2 = incr2->in(2)->get_int();
+
+ // The general case here gets a little tricky. We want to find the
+ // GCD of all possible parallel IV's and make a new IV using this
+ // GCD for the loop. Then all possible IVs are simple multiples of
+ // the GCD. In practice, this will cover very few extra loops.
+ // Instead we require 'stride_con2' to be a multiple of 'stride_con',
+ // where +/-1 is the common case, but other integer multiples are
+ // also easy to handle.
+ int ratio_con = stride_con2/stride_con;
+
+ if( ratio_con * stride_con == stride_con2 ) { // Check for exact
+ // Convert to using the trip counter. The parallel induction
+ // variable differs from the trip counter by a loop-invariant
+ // amount, the difference between their respective initial values.
+ // It is scaled by the 'ratio_con'.
+ Compile* C = phase->C;
+ Node* ratio = phase->_igvn.intcon(ratio_con);
+ phase->set_ctrl(ratio, C->root());
+ Node* ratio_init = new (C, 3) MulINode(init, ratio);
+ phase->_igvn.register_new_node_with_optimizer(ratio_init, init);
+ phase->set_early_ctrl(ratio_init);
+ Node* diff = new (C, 3) SubINode(init2, ratio_init);
+ phase->_igvn.register_new_node_with_optimizer(diff, init2);
+ phase->set_early_ctrl(diff);
+ Node* ratio_idx = new (C, 3) MulINode(phi, ratio);
+ phase->_igvn.register_new_node_with_optimizer(ratio_idx, phi);
+ phase->set_ctrl(ratio_idx, cl);
+ Node* add = new (C, 3) AddINode(ratio_idx, diff);
+ phase->_igvn.register_new_node_with_optimizer(add);
+ phase->set_ctrl(add, cl);
+ phase->_igvn.hash_delete( phi2 );
+ phase->_igvn.subsume_node( phi2, add );
+ // Sometimes an induction variable is unused
+ if (add->outcnt() == 0) {
+ phase->_igvn.remove_dead_node(add);
+ }
+ --i; // deleted this phi; rescan starting with next position
+ continue;
+ }
+ }
+ } else if (_parent != NULL && !_irreducible) {
+ // Not a counted loop.
+ // Look for a safepoint on the idom-path to remove, preserving the first one
+ bool found = false;
+ Node* n = tail();
+ for (; n != _head && !found; n = phase->idom(n)) {
+ if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this)
+ found = true; // Found one
+ }
+ // Skip past it and delete the others
+ for (; n != _head; n = phase->idom(n)) {
+ if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this &&
+ phase->is_deleteable_safept(n))
+ phase->lazy_replace(n,n->in(TypeFunc::Control));
+ }
+ }
+
+ // Recursively
+ if( _child ) _child->counted_loop( phase );
+ if( _next ) _next ->counted_loop( phase );
+}
+
+#ifndef PRODUCT
+//------------------------------dump_head--------------------------------------
+// Dump 1 liner for loop header info
+void IdealLoopTree::dump_head( ) const {
+ for( uint i=0; i<_nest; i++ )
+ tty->print(" ");
+ tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
+ if( _irreducible ) tty->print(" IRREDUCIBLE");
+ if( _head->is_CountedLoop() ) {
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ tty->print(" counted");
+ if( cl->is_pre_loop () ) tty->print(" pre" );
+ if( cl->is_main_loop() ) tty->print(" main");
+ if( cl->is_post_loop() ) tty->print(" post");
+ }
+ tty->cr();
+}
+
+//------------------------------dump-------------------------------------------
+// Dump loops by loop tree
+void IdealLoopTree::dump( ) const {
+ dump_head();
+ if( _child ) _child->dump();
+ if( _next ) _next ->dump();
+}
+
+#endif
+
+//=============================================================================
+//------------------------------PhaseIdealLoop---------------------------------
+// Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to
+// its corresponding LoopNode. If 'optimize' is true, do some loop cleanups.
+PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs )
+ : PhaseTransform(Ideal_Loop),
+ _igvn(igvn),
+ _dom_lca_tags(C->comp_arena()) {
+ // Reset major-progress flag for the driver's heuristics
+ C->clear_major_progress();
+
+#ifndef PRODUCT
+ // Capture for later assert
+ uint unique = C->unique();
+ _loop_invokes++;
+ _loop_work += unique;
+#endif
+
+ // True if the method has at least 1 irreducible loop
+ _has_irreducible_loops = false;
+
+ _created_loop_node = false;
+
+ Arena *a = Thread::current()->resource_area();
+ VectorSet visited(a);
+ // Pre-grow the mapping from Nodes to IdealLoopTrees.
+ _nodes.map(C->unique(), NULL);
+ memset(_nodes.adr(), 0, wordSize * C->unique());
+
+ // Pre-build the top-level outermost loop tree entry
+ _ltree_root = new IdealLoopTree( this, C->root(), C->root() );
+ // Do not need a safepoint at the top level
+ _ltree_root->_has_sfpt = 1;
+
+ // Empty pre-order array
+ allocate_preorders();
+
+ // Build a loop tree on the fly. Build a mapping from CFG nodes to
+ // IdealLoopTree entries. Data nodes are NOT walked.
+ build_loop_tree();
+ // Check for bailout, and return
+ if (C->failing()) {
+ return;
+ }
+
+ // No loops after all
+ if( !_ltree_root->_child ) C->set_has_loops(false);
+
+ // There should always be an outer loop containing the Root and Return nodes.
+ // If not, we have a degenerate empty program. Bail out in this case.
+ if (!has_node(C->root())) {
+ C->clear_major_progress();
+ C->record_method_not_compilable("empty program detected during loop optimization");
+ return;
+ }
+
+ // Nothing to do, so get out
+ if( !C->has_loops() && !do_split_ifs && !verify_me) {
+ _igvn.optimize(); // Cleanup NeverBranches
+ return;
+ }
+
+ // Set loop nesting depth
+ _ltree_root->set_nest( 0 );
+
+ // Split shared headers and insert loop landing pads.
+ // Do not bother doing this on the Root loop of course.
+ if( !verify_me && _ltree_root->_child ) {
+ if( _ltree_root->_child->beautify_loops( this ) ) {
+ // Re-build loop tree!
+ _ltree_root->_child = NULL;
+ _nodes.clear();
+ reallocate_preorders();
+ build_loop_tree();
+ // Check for bailout, and return
+ if (C->failing()) {
+ return;
+ }
+ // Reset loop nesting depth
+ _ltree_root->set_nest( 0 );
+ }
+ }
+
+ // Build Dominators for elision of NULL checks & loop finding.
+ // Since nodes do not have a slot for immediate dominator, make
+ // a persistant side array for that info indexed on node->_idx.
+ _idom_size = C->unique();
+ _idom = NEW_RESOURCE_ARRAY( Node*, _idom_size );
+ _dom_depth = NEW_RESOURCE_ARRAY( uint, _idom_size );
+ _dom_stk = NULL; // Allocated on demand in recompute_dom_depth
+ memset( _dom_depth, 0, _idom_size * sizeof(uint) );
+
+ Dominators();
+
+ // As a side effect, Dominators removed any unreachable CFG paths
+ // into RegionNodes. It doesn't do this test against Root, so
+ // we do it here.
+ for( uint i = 1; i < C->root()->req(); i++ ) {
+ if( !_nodes[C->root()->in(i)->_idx] ) { // Dead path into Root?
+ _igvn.hash_delete(C->root());
+ C->root()->del_req(i);
+ _igvn._worklist.push(C->root());
+ i--; // Rerun same iteration on compressed edges
+ }
+ }
+
+ // Given dominators, try to find inner loops with calls that must
+ // always be executed (call dominates loop tail). These loops do
+ // not need a seperate safepoint.
+ Node_List cisstack(a);
+ _ltree_root->check_safepts(visited, cisstack);
+
+ // Walk the DATA nodes and place into loops. Find earliest control
+ // node. For CFG nodes, the _nodes array starts out and remains
+ // holding the associated IdealLoopTree pointer. For DATA nodes, the
+ // _nodes array holds the earliest legal controlling CFG node.
+
+ // Allocate stack with enough space to avoid frequent realloc
+ int stack_size = (C->unique() >> 1) + 16; // (unique>>1)+16 from Java2D stats
+ Node_Stack nstack( a, stack_size );
+
+ visited.Clear();
+ Node_List worklist(a);
+ // Don't need C->root() on worklist since
+ // it will be processed among C->top() inputs
+ worklist.push( C->top() );
+ visited.set( C->top()->_idx ); // Set C->top() as visited now
+ build_loop_early( visited, worklist, nstack, verify_me );
+
+ // Given early legal placement, try finding counted loops. This placement
+ // is good enough to discover most loop invariants.
+ if( !verify_me )
+ _ltree_root->counted_loop( this );
+
+ // Find latest loop placement. Find ideal loop placement.
+ visited.Clear();
+ init_dom_lca_tags();
+ // Need C->root() on worklist when processing outs
+ worklist.push( C->root() );
+ NOT_PRODUCT( C->verify_graph_edges(); )
+ worklist.push( C->top() );
+ build_loop_late( visited, worklist, nstack, verify_me );
+
+ // clear out the dead code
+ while(_deadlist.size()) {
+ igvn.remove_globally_dead_node(_deadlist.pop());
+ }
+
+#ifndef PRODUCT
+ C->verify_graph_edges();
+ if( verify_me ) { // Nested verify pass?
+ // Check to see if the verify mode is broken
+ assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
+ return;
+ }
+ if( VerifyLoopOptimizations ) verify();
+#endif
+
+ if (ReassociateInvariants) {
+ // Reassociate invariants and prep for split_thru_phi
+ for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+ IdealLoopTree* lpt = iter.current();
+ if (!lpt->is_counted() || !lpt->is_inner()) continue;
+
+ lpt->reassociate_invariants(this);
+
+ // Because RCE opportunities can be masked by split_thru_phi,
+ // look for RCE candidates and inhibit split_thru_phi
+ // on just their loop-phi's for this pass of loop opts
+ if( SplitIfBlocks && do_split_ifs ) {
+ if (lpt->policy_range_check(this)) {
+ lpt->_rce_candidate = true;
+ }
+ }
+ }
+ }
+
+ // Check for aggressive application of split-if and other transforms
+ // that require basic-block info (like cloning through Phi's)
+ if( SplitIfBlocks && do_split_ifs ) {
+ visited.Clear();
+ split_if_with_blocks( visited, nstack );
+ NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
+ }
+
+ // Perform iteration-splitting on inner loops. Split iterations to avoid
+ // range checks or one-shot null checks.
+
+ // If split-if's didn't hack the graph too bad (no CFG changes)
+ // then do loop opts.
+ if( C->has_loops() && !C->major_progress() ) {
+ memset( worklist.adr(), 0, worklist.Size()*sizeof(Node*) );
+ _ltree_root->_child->iteration_split( this, worklist );
+ // No verify after peeling! GCM has hoisted code out of the loop.
+ // After peeling, the hoisted code could sink inside the peeled area.
+ // The peeling code does not try to recompute the best location for
+ // all the code before the peeled area, so the verify pass will always
+ // complain about it.
+ }
+ // Do verify graph edges in any case
+ NOT_PRODUCT( C->verify_graph_edges(); );
+
+ if( !do_split_ifs ) {
+ // We saw major progress in Split-If to get here. We forced a
+ // pass with unrolling and not split-if, however more split-if's
+ // might make progress. If the unrolling didn't make progress
+ // then the major-progress flag got cleared and we won't try
+ // another round of Split-If. In particular the ever-common
+ // instance-of/check-cast pattern requires at least 2 rounds of
+ // Split-If to clear out.
+ C->set_major_progress();
+ }
+
+ // Repeat loop optimizations if new loops were seen
+ if (created_loop_node()) {
+ C->set_major_progress();
+ }
+
+ // Convert scalar to superword operations
+
+ if (UseSuperWord && C->has_loops() && !C->major_progress()) {
+ // SuperWord transform
+ SuperWord sw(this);
+ for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+ IdealLoopTree* lpt = iter.current();
+ if (lpt->is_counted()) {
+ sw.transform_loop(lpt);
+ }
+ }
+ }
+
+ // Cleanup any modified bits
+ _igvn.optimize();
+
+ // Do not repeat loop optimizations if irreducible loops are present
+ // by claiming no-progress.
+ if( _has_irreducible_loops )
+ C->clear_major_progress();
+}
+
+#ifndef PRODUCT
+//------------------------------print_statistics-------------------------------
+int PhaseIdealLoop::_loop_invokes=0;// Count of PhaseIdealLoop invokes
+int PhaseIdealLoop::_loop_work=0; // Sum of PhaseIdealLoop x unique
+void PhaseIdealLoop::print_statistics() {
+ tty->print_cr("PhaseIdealLoop=%d, sum _unique=%d", _loop_invokes, _loop_work);
+}
+
+//------------------------------verify-----------------------------------------
+// Build a verify-only PhaseIdealLoop, and see that it agrees with me.
+static int fail; // debug only, so its multi-thread dont care
+void PhaseIdealLoop::verify() const {
+ int old_progress = C->major_progress();
+ ResourceMark rm;
+ PhaseIdealLoop loop_verify( _igvn, this, false );
+ VectorSet visited(Thread::current()->resource_area());
+
+ fail = 0;
+ verify_compare( C->root(), &loop_verify, visited );
+ assert( fail == 0, "verify loops failed" );
+ // Verify loop structure is the same
+ _ltree_root->verify_tree(loop_verify._ltree_root, NULL);
+ // Reset major-progress. It was cleared by creating a verify version of
+ // PhaseIdealLoop.
+ for( int i=0; i<old_progress; i++ )
+ C->set_major_progress();
+}
+
+//------------------------------verify_compare---------------------------------
+// Make sure me and the given PhaseIdealLoop agree on key data structures
+void PhaseIdealLoop::verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const {
+ if( !n ) return;
+ if( visited.test_set( n->_idx ) ) return;
+ if( !_nodes[n->_idx] ) { // Unreachable
+ assert( !loop_verify->_nodes[n->_idx], "both should be unreachable" );
+ return;
+ }
+
+ uint i;
+ for( i = 0; i < n->req(); i++ )
+ verify_compare( n->in(i), loop_verify, visited );
+
+ // Check the '_nodes' block/loop structure
+ i = n->_idx;
+ if( has_ctrl(n) ) { // We have control; verify has loop or ctrl
+ if( _nodes[i] != loop_verify->_nodes[i] &&
+ get_ctrl_no_update(n) != loop_verify->get_ctrl_no_update(n) ) {
+ tty->print("Mismatched control setting for: ");
+ n->dump();
+ if( fail++ > 10 ) return;
+ Node *c = get_ctrl_no_update(n);
+ tty->print("We have it as: ");
+ if( c->in(0) ) c->dump();
+ else tty->print_cr("N%d",c->_idx);
+ tty->print("Verify thinks: ");
+ if( loop_verify->has_ctrl(n) )
+ loop_verify->get_ctrl_no_update(n)->dump();
+ else
+ loop_verify->get_loop_idx(n)->dump();
+ tty->cr();
+ }
+ } else { // We have a loop
+ IdealLoopTree *us = get_loop_idx(n);
+ if( loop_verify->has_ctrl(n) ) {
+ tty->print("Mismatched loop setting for: ");
+ n->dump();
+ if( fail++ > 10 ) return;
+ tty->print("We have it as: ");
+ us->dump();
+ tty->print("Verify thinks: ");
+ loop_verify->get_ctrl_no_update(n)->dump();
+ tty->cr();
+ } else if (!C->major_progress()) {
+ // Loop selection can be messed up if we did a major progress
+ // operation, like split-if. Do not verify in that case.
+ IdealLoopTree *them = loop_verify->get_loop_idx(n);
+ if( us->_head != them->_head || us->_tail != them->_tail ) {
+ tty->print("Unequals loops for: ");
+ n->dump();
+ if( fail++ > 10 ) return;
+ tty->print("We have it as: ");
+ us->dump();
+ tty->print("Verify thinks: ");
+ them->dump();
+ tty->cr();
+ }
+ }
+ }
+
+ // Check for immediate dominators being equal
+ if( i >= _idom_size ) {
+ if( !n->is_CFG() ) return;
+ tty->print("CFG Node with no idom: ");
+ n->dump();
+ return;
+ }
+ if( !n->is_CFG() ) return;
+ if( n == C->root() ) return; // No IDOM here
+
+ assert(n->_idx == i, "sanity");
+ Node *id = idom_no_update(n);
+ if( id != loop_verify->idom_no_update(n) ) {
+ tty->print("Unequals idoms for: ");
+ n->dump();
+ if( fail++ > 10 ) return;
+ tty->print("We have it as: ");
+ id->dump();
+ tty->print("Verify thinks: ");
+ loop_verify->idom_no_update(n)->dump();
+ tty->cr();
+ }
+
+}
+
+//------------------------------verify_tree------------------------------------
+// Verify that tree structures match. Because the CFG can change, siblings
+// within the loop tree can be reordered. We attempt to deal with that by
+// reordering the verify's loop tree if possible.
+void IdealLoopTree::verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const {
+ assert( _parent == parent, "Badly formed loop tree" );
+
+ // Siblings not in same order? Attempt to re-order.
+ if( _head != loop->_head ) {
+ // Find _next pointer to update
+ IdealLoopTree **pp = &loop->_parent->_child;
+ while( *pp != loop )
+ pp = &((*pp)->_next);
+ // Find proper sibling to be next
+ IdealLoopTree **nn = &loop->_next;
+ while( (*nn) && (*nn)->_head != _head )
+ nn = &((*nn)->_next);
+
+ // Check for no match.
+ if( !(*nn) ) {
+ // Annoyingly, irreducible loops can pick different headers
+ // after a major_progress operation, so the rest of the loop
+ // tree cannot be matched.
+ if (_irreducible && Compile::current()->major_progress()) return;
+ assert( 0, "failed to match loop tree" );
+ }
+
+ // Move (*nn) to (*pp)
+ IdealLoopTree *hit = *nn;
+ *nn = hit->_next;
+ hit->_next = loop;
+ *pp = loop;
+ loop = hit;
+ // Now try again to verify
+ }
+
+ assert( _head == loop->_head , "mismatched loop head" );
+ Node *tail = _tail; // Inline a non-updating version of
+ while( !tail->in(0) ) // the 'tail()' call.
+ tail = tail->in(1);
+ assert( tail == loop->_tail, "mismatched loop tail" );
+
+ // Counted loops that are guarded should be able to find their guards
+ if( _head->is_CountedLoop() && _head->as_CountedLoop()->is_main_loop() ) {
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ Node *init = cl->init_trip();
+ Node *ctrl = cl->in(LoopNode::EntryControl);
+ assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
+ Node *iff = ctrl->in(0);
+ assert( iff->Opcode() == Op_If, "" );
+ Node *bol = iff->in(1);
+ assert( bol->Opcode() == Op_Bool, "" );
+ Node *cmp = bol->in(1);
+ assert( cmp->Opcode() == Op_CmpI, "" );
+ Node *add = cmp->in(1);
+ Node *opaq;
+ if( add->Opcode() == Op_Opaque1 ) {
+ opaq = add;
+ } else {
+ assert( add->Opcode() == Op_AddI || add->Opcode() == Op_ConI , "" );
+ assert( add == init, "" );
+ opaq = cmp->in(2);
+ }
+ assert( opaq->Opcode() == Op_Opaque1, "" );
+
+ }
+
+ if (_child != NULL) _child->verify_tree(loop->_child, this);
+ if (_next != NULL) _next ->verify_tree(loop->_next, parent);
+ // Innermost loops need to verify loop bodies,
+ // but only if no 'major_progress'
+ int fail = 0;
+ if (!Compile::current()->major_progress() && _child == NULL) {
+ for( uint i = 0; i < _body.size(); i++ ) {
+ Node *n = _body.at(i);
+ if (n->outcnt() == 0) continue; // Ignore dead
+ uint j;
+ for( j = 0; j < loop->_body.size(); j++ )
+ if( loop->_body.at(j) == n )
+ break;
+ if( j == loop->_body.size() ) { // Not found in loop body
+ // Last ditch effort to avoid assertion: Its possible that we
+ // have some users (so outcnt not zero) but are still dead.
+ // Try to find from root.
+ if (Compile::current()->root()->find(n->_idx)) {
+ fail++;
+ tty->print("We have that verify does not: ");
+ n->dump();
+ }
+ }
+ }
+ for( uint i2 = 0; i2 < loop->_body.size(); i2++ ) {
+ Node *n = loop->_body.at(i2);
+ if (n->outcnt() == 0) continue; // Ignore dead
+ uint j;
+ for( j = 0; j < _body.size(); j++ )
+ if( _body.at(j) == n )
+ break;
+ if( j == _body.size() ) { // Not found in loop body
+ // Last ditch effort to avoid assertion: Its possible that we
+ // have some users (so outcnt not zero) but are still dead.
+ // Try to find from root.
+ if (Compile::current()->root()->find(n->_idx)) {
+ fail++;
+ tty->print("Verify has that we do not: ");
+ n->dump();
+ }
+ }
+ }
+ assert( !fail, "loop body mismatch" );
+ }
+}
+
+#endif
+
+//------------------------------set_idom---------------------------------------
+void PhaseIdealLoop::set_idom(Node* d, Node* n, uint dom_depth) {
+ uint idx = d->_idx;
+ if (idx >= _idom_size) {
+ uint newsize = _idom_size<<1;
+ while( idx >= newsize ) {
+ newsize <<= 1;
+ }
+ _idom = REALLOC_RESOURCE_ARRAY( Node*, _idom,_idom_size,newsize);
+ _dom_depth = REALLOC_RESOURCE_ARRAY( uint, _dom_depth,_idom_size,newsize);
+ memset( _dom_depth + _idom_size, 0, (newsize - _idom_size) * sizeof(uint) );
+ _idom_size = newsize;
+ }
+ _idom[idx] = n;
+ _dom_depth[idx] = dom_depth;
+}
+
+//------------------------------recompute_dom_depth---------------------------------------
+// The dominator tree is constructed with only parent pointers.
+// This recomputes the depth in the tree by first tagging all
+// nodes as "no depth yet" marker. The next pass then runs up
+// the dom tree from each node marked "no depth yet", and computes
+// the depth on the way back down.
+void PhaseIdealLoop::recompute_dom_depth() {
+ uint no_depth_marker = C->unique();
+ uint i;
+ // Initialize depth to "no depth yet"
+ for (i = 0; i < _idom_size; i++) {
+ if (_dom_depth[i] > 0 && _idom[i] != NULL) {
+ _dom_depth[i] = no_depth_marker;
+ }
+ }
+ if (_dom_stk == NULL) {
+ uint init_size = C->unique() / 100; // Guess that 1/100 is a reasonable initial size.
+ if (init_size < 10) init_size = 10;
+ _dom_stk = new (C->node_arena()) GrowableArray<uint>(C->node_arena(), init_size, 0, 0);
+ }
+ // Compute new depth for each node.
+ for (i = 0; i < _idom_size; i++) {
+ uint j = i;
+ // Run up the dom tree to find a node with a depth
+ while (_dom_depth[j] == no_depth_marker) {
+ _dom_stk->push(j);
+ j = _idom[j]->_idx;
+ }
+ // Compute the depth on the way back down this tree branch
+ uint dd = _dom_depth[j] + 1;
+ while (_dom_stk->length() > 0) {
+ uint j = _dom_stk->pop();
+ _dom_depth[j] = dd;
+ dd++;
+ }
+ }
+}
+
+//------------------------------sort-------------------------------------------
+// Insert 'loop' into the existing loop tree. 'innermost' is a leaf of the
+// loop tree, not the root.
+IdealLoopTree *PhaseIdealLoop::sort( IdealLoopTree *loop, IdealLoopTree *innermost ) {
+ if( !innermost ) return loop; // New innermost loop
+
+ int loop_preorder = get_preorder(loop->_head); // Cache pre-order number
+ assert( loop_preorder, "not yet post-walked loop" );
+ IdealLoopTree **pp = &innermost; // Pointer to previous next-pointer
+ IdealLoopTree *l = *pp; // Do I go before or after 'l'?
+
+ // Insert at start of list
+ while( l ) { // Insertion sort based on pre-order
+ if( l == loop ) return innermost; // Already on list!
+ int l_preorder = get_preorder(l->_head); // Cache pre-order number
+ assert( l_preorder, "not yet post-walked l" );
+ // Check header pre-order number to figure proper nesting
+ if( loop_preorder > l_preorder )
+ break; // End of insertion
+ // If headers tie (e.g., shared headers) check tail pre-order numbers.
+ // Since I split shared headers, you'd think this could not happen.
+ // BUT: I must first do the preorder numbering before I can discover I
+ // have shared headers, so the split headers all get the same preorder
+ // number as the RegionNode they split from.
+ if( loop_preorder == l_preorder &&
+ get_preorder(loop->_tail) < get_preorder(l->_tail) )
+ break; // Also check for shared headers (same pre#)
+ pp = &l->_parent; // Chain up list
+ l = *pp;
+ }
+ // Link into list
+ // Point predecessor to me
+ *pp = loop;
+ // Point me to successor
+ IdealLoopTree *p = loop->_parent;
+ loop->_parent = l; // Point me to successor
+ if( p ) sort( p, innermost ); // Insert my parents into list as well
+ return innermost;
+}
+
+//------------------------------build_loop_tree--------------------------------
+// I use a modified Vick/Tarjan algorithm. I need pre- and a post- visit
+// bits. The _nodes[] array is mapped by Node index and holds a NULL for
+// not-yet-pre-walked, pre-order # for pre-but-not-post-walked and holds the
+// tightest enclosing IdealLoopTree for post-walked.
+//
+// During my forward walk I do a short 1-layer lookahead to see if I can find
+// a loop backedge with that doesn't have any work on the backedge. This
+// helps me construct nested loops with shared headers better.
+//
+// Once I've done the forward recursion, I do the post-work. For each child
+// I check to see if there is a backedge. Backedges define a loop! I
+// insert an IdealLoopTree at the target of the backedge.
+//
+// During the post-work I also check to see if I have several children
+// belonging to different loops. If so, then this Node is a decision point
+// where control flow can choose to change loop nests. It is at this
+// decision point where I can figure out how loops are nested. At this
+// time I can properly order the different loop nests from my children.
+// Note that there may not be any backedges at the decision point!
+//
+// Since the decision point can be far removed from the backedges, I can't
+// order my loops at the time I discover them. Thus at the decision point
+// I need to inspect loop header pre-order numbers to properly nest my
+// loops. This means I need to sort my childrens' loops by pre-order.
+// The sort is of size number-of-control-children, which generally limits
+// it to size 2 (i.e., I just choose between my 2 target loops).
+void PhaseIdealLoop::build_loop_tree() {
+ // Allocate stack of size C->unique()/2 to avoid frequent realloc
+ GrowableArray <Node *> bltstack(C->unique() >> 1);
+ Node *n = C->root();
+ bltstack.push(n);
+ int pre_order = 1;
+ int stack_size;
+
+ while ( ( stack_size = bltstack.length() ) != 0 ) {
+ n = bltstack.top(); // Leave node on stack
+ if ( !is_visited(n) ) {
+ // ---- Pre-pass Work ----
+ // Pre-walked but not post-walked nodes need a pre_order number.
+
+ set_preorder_visited( n, pre_order ); // set as visited
+
+ // ---- Scan over children ----
+ // Scan first over control projections that lead to loop headers.
+ // This helps us find inner-to-outer loops with shared headers better.
+
+ // Scan children's children for loop headers.
+ for ( int i = n->outcnt() - 1; i >= 0; --i ) {
+ Node* m = n->raw_out(i); // Child
+ if( m->is_CFG() && !is_visited(m) ) { // Only for CFG children
+ // Scan over children's children to find loop
+ for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
+ Node* l = m->fast_out(j);
+ if( is_visited(l) && // Been visited?
+ !is_postvisited(l) && // But not post-visited
+ get_preorder(l) < pre_order ) { // And smaller pre-order
+ // Found! Scan the DFS down this path before doing other paths
+ bltstack.push(m);
+ break;
+ }
+ }
+ }
+ }
+ pre_order++;
+ }
+ else if ( !is_postvisited(n) ) {
+ // Note: build_loop_tree_impl() adds out edges on rare occasions,
+ // such as com.sun.rsasign.am::a.
+ // For non-recursive version, first, process current children.
+ // On next iteration, check if additional children were added.
+ for ( int k = n->outcnt() - 1; k >= 0; --k ) {
+ Node* u = n->raw_out(k);
+ if ( u->is_CFG() && !is_visited(u) ) {
+ bltstack.push(u);
+ }
+ }
+ if ( bltstack.length() == stack_size ) {
+ // There were no additional children, post visit node now
+ (void)bltstack.pop(); // Remove node from stack
+ pre_order = build_loop_tree_impl( n, pre_order );
+ // Check for bailout
+ if (C->failing()) {
+ return;
+ }
+ // Check to grow _preorders[] array for the case when
+ // build_loop_tree_impl() adds new nodes.
+ check_grow_preorders();
+ }
+ }
+ else {
+ (void)bltstack.pop(); // Remove post-visited node from stack
+ }
+ }
+}
+
+//------------------------------build_loop_tree_impl---------------------------
+int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
+ // ---- Post-pass Work ----
+ // Pre-walked but not post-walked nodes need a pre_order number.
+
+ // Tightest enclosing loop for this Node
+ IdealLoopTree *innermost = NULL;
+
+ // For all children, see if any edge is a backedge. If so, make a loop
+ // for it. Then find the tightest enclosing loop for the self Node.
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node* m = n->fast_out(i); // Child
+ if( n == m ) continue; // Ignore control self-cycles
+ if( !m->is_CFG() ) continue;// Ignore non-CFG edges
+
+ IdealLoopTree *l; // Child's loop
+ if( !is_postvisited(m) ) { // Child visited but not post-visited?
+ // Found a backedge
+ assert( get_preorder(m) < pre_order, "should be backedge" );
+ // Check for the RootNode, which is already a LoopNode and is allowed
+ // to have multiple "backedges".
+ if( m == C->root()) { // Found the root?
+ l = _ltree_root; // Root is the outermost LoopNode
+ } else { // Else found a nested loop
+ // Insert a LoopNode to mark this loop.
+ l = new IdealLoopTree(this, m, n);
+ } // End of Else found a nested loop
+ if( !has_loop(m) ) // If 'm' does not already have a loop set
+ set_loop(m, l); // Set loop header to loop now
+
+ } else { // Else not a nested loop
+ if( !_nodes[m->_idx] ) continue; // Dead code has no loop
+ l = get_loop(m); // Get previously determined loop
+ // If successor is header of a loop (nest), move up-loop till it
+ // is a member of some outer enclosing loop. Since there are no
+ // shared headers (I've split them already) I only need to go up
+ // at most 1 level.
+ while( l && l->_head == m ) // Successor heads loop?
+ l = l->_parent; // Move up 1 for me
+ // If this loop is not properly parented, then this loop
+ // has no exit path out, i.e. its an infinite loop.
+ if( !l ) {
+ // Make loop "reachable" from root so the CFG is reachable. Basically
+ // insert a bogus loop exit that is never taken. 'm', the loop head,
+ // points to 'n', one (of possibly many) fall-in paths. There may be
+ // many backedges as well.
+
+ // Here I set the loop to be the root loop. I could have, after
+ // inserting a bogus loop exit, restarted the recursion and found my
+ // new loop exit. This would make the infinite loop a first-class
+ // loop and it would then get properly optimized. What's the use of
+ // optimizing an infinite loop?
+ l = _ltree_root; // Oops, found infinite loop
+
+ // Insert the NeverBranch between 'm' and it's control user.
+ NeverBranchNode *iff = new (C, 1) NeverBranchNode( m );
+ _igvn.register_new_node_with_optimizer(iff);
+ set_loop(iff, l);
+ Node *if_t = new (C, 1) CProjNode( iff, 0 );
+ _igvn.register_new_node_with_optimizer(if_t);
+ set_loop(if_t, l);
+
+ Node* cfg = NULL; // Find the One True Control User of m
+ for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
+ Node* x = m->fast_out(j);
+ if (x->is_CFG() && x != m && x != iff)
+ { cfg = x; break; }
+ }
+ assert(cfg != NULL, "must find the control user of m");
+ uint k = 0; // Probably cfg->in(0)
+ while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
+ cfg->set_req( k, if_t ); // Now point to NeverBranch
+
+ // Now create the never-taken loop exit
+ Node *if_f = new (C, 1) CProjNode( iff, 1 );
+ _igvn.register_new_node_with_optimizer(if_f);
+ set_loop(if_f, l);
+ // Find frame ptr for Halt. Relies on the optimizer
+ // V-N'ing. Easier and quicker than searching through
+ // the program structure.
+ Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr );
+ _igvn.register_new_node_with_optimizer(frame);
+ // Halt & Catch Fire
+ Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame );
+ _igvn.register_new_node_with_optimizer(halt);
+ set_loop(halt, l);
+ C->root()->add_req(halt);
+ set_loop(C->root(), _ltree_root);
+ }
+ }
+ // Weeny check for irreducible. This child was already visited (this
+ // IS the post-work phase). Is this child's loop header post-visited
+ // as well? If so, then I found another entry into the loop.
+ while( is_postvisited(l->_head) ) {
+ // found irreducible
+ l->_irreducible = true;
+ l = l->_parent;
+ _has_irreducible_loops = true;
+ // Check for bad CFG here to prevent crash, and bailout of compile
+ if (l == NULL) {
+ C->record_method_not_compilable("unhandled CFG detected during loop optimization");
+ return pre_order;
+ }
+ }
+
+ // This Node might be a decision point for loops. It is only if
+ // it's children belong to several different loops. The sort call
+ // does a trivial amount of work if there is only 1 child or all
+ // children belong to the same loop. If however, the children
+ // belong to different loops, the sort call will properly set the
+ // _parent pointers to show how the loops nest.
+ //
+ // In any case, it returns the tightest enclosing loop.
+ innermost = sort( l, innermost );
+ }
+
+ // Def-use info will have some dead stuff; dead stuff will have no
+ // loop decided on.
+
+ // Am I a loop header? If so fix up my parent's child and next ptrs.
+ if( innermost && innermost->_head == n ) {
+ assert( get_loop(n) == innermost, "" );
+ IdealLoopTree *p = innermost->_parent;
+ IdealLoopTree *l = innermost;
+ while( p && l->_head == n ) {
+ l->_next = p->_child; // Put self on parents 'next child'
+ p->_child = l; // Make self as first child of parent
+ l = p; // Now walk up the parent chain
+ p = l->_parent;
+ }
+ } else {
+ // Note that it is possible for a LoopNode to reach here, if the
+ // backedge has been made unreachable (hence the LoopNode no longer
+ // denotes a Loop, and will eventually be removed).
+
+ // Record tightest enclosing loop for self. Mark as post-visited.
+ set_loop(n, innermost);
+ // Also record has_call flag early on
+ if( innermost ) {
+ if( n->is_Call() && !n->is_CallLeaf() && !n->is_macro() ) {
+ // Do not count uncommon calls
+ if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
+ Node *iff = n->in(0)->in(0);
+ if( !iff->is_If() ||
+ (n->in(0)->Opcode() == Op_IfFalse &&
+ (1.0 - iff->as_If()->_prob) >= 0.01) ||
+ (iff->as_If()->_prob >= 0.01) )
+ innermost->_has_call = 1;
+ }
+ }
+ }
+ }
+
+ // Flag as post-visited now
+ set_postvisited(n);
+ return pre_order;
+}
+
+
+//------------------------------build_loop_early-------------------------------
+// Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
+// First pass computes the earliest controlling node possible. This is the
+// controlling input with the deepest dominating depth.
+void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
+ while (worklist.size() != 0) {
+ // Use local variables nstack_top_n & nstack_top_i to cache values
+ // on nstack's top.
+ Node *nstack_top_n = worklist.pop();
+ uint nstack_top_i = 0;
+//while_nstack_nonempty:
+ while (true) {
+ // Get parent node and next input's index from stack's top.
+ Node *n = nstack_top_n;
+ uint i = nstack_top_i;
+ uint cnt = n->req(); // Count of inputs
+ if (i == 0) { // Pre-process the node.
+ if( has_node(n) && // Have either loop or control already?
+ !has_ctrl(n) ) { // Have loop picked out already?
+ // During "merge_many_backedges" we fold up several nested loops
+ // into a single loop. This makes the members of the original
+ // loop bodies pointing to dead loops; they need to move up
+ // to the new UNION'd larger loop. I set the _head field of these
+ // dead loops to NULL and the _parent field points to the owning
+ // loop. Shades of UNION-FIND algorithm.
+ IdealLoopTree *ilt;
+ while( !(ilt = get_loop(n))->_head ) {
+ // Normally I would use a set_loop here. But in this one special
+ // case, it is legal (and expected) to change what loop a Node
+ // belongs to.
+ _nodes.map(n->_idx, (Node*)(ilt->_parent) );
+ }
+ // Remove safepoints ONLY if I've already seen I don't need one.
+ // (the old code here would yank a 2nd safepoint after seeing a
+ // first one, even though the 1st did not dominate in the loop body
+ // and thus could be avoided indefinitely)
+ if( !verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint &&
+ is_deleteable_safept(n)) {
+ Node *in = n->in(TypeFunc::Control);
+ lazy_replace(n,in); // Pull safepoint now
+ // Carry on with the recursion "as if" we are walking
+ // only the control input
+ if( !visited.test_set( in->_idx ) ) {
+ worklist.push(in); // Visit this guy later, using worklist
+ }
+ // Get next node from nstack:
+ // - skip n's inputs processing by setting i > cnt;
+ // - we also will not call set_early_ctrl(n) since
+ // has_node(n) == true (see the condition above).
+ i = cnt + 1;
+ }
+ }
+ } // if (i == 0)
+
+ // Visit all inputs
+ bool done = true; // Assume all n's inputs will be processed
+ while (i < cnt) {
+ Node *in = n->in(i);
+ ++i;
+ if (in == NULL) continue;
+ if (in->pinned() && !in->is_CFG())
+ set_ctrl(in, in->in(0));
+ int is_visited = visited.test_set( in->_idx );
+ if (!has_node(in)) { // No controlling input yet?
+ assert( !in->is_CFG(), "CFG Node with no controlling input?" );
+ assert( !is_visited, "visit only once" );
+ nstack.push(n, i); // Save parent node and next input's index.
+ nstack_top_n = in; // Process current input now.
+ nstack_top_i = 0;
+ done = false; // Not all n's inputs processed.
+ break; // continue while_nstack_nonempty;
+ } else if (!is_visited) {
+ // This guy has a location picked out for him, but has not yet
+ // been visited. Happens to all CFG nodes, for instance.
+ // Visit him using the worklist instead of recursion, to break
+ // cycles. Since he has a location already we do not need to
+ // find his location before proceeding with the current Node.
+ worklist.push(in); // Visit this guy later, using worklist
+ }
+ }
+ if (done) {
+ // All of n's inputs have been processed, complete post-processing.
+
+ // Compute earilest point this Node can go.
+ // CFG, Phi, pinned nodes already know their controlling input.
+ if (!has_node(n)) {
+ // Record earliest legal location
+ set_early_ctrl( n );
+ }
+ if (nstack.is_empty()) {
+ // Finished all nodes on stack.
+ // Process next node on the worklist.
+ break;
+ }
+ // Get saved parent node and next input's index.
+ nstack_top_n = nstack.node();
+ nstack_top_i = nstack.index();
+ nstack.pop();
+ }
+ } // while (true)
+ }
+}
+
+//------------------------------dom_lca_internal--------------------------------
+// Pair-wise LCA
+Node *PhaseIdealLoop::dom_lca_internal( Node *n1, Node *n2 ) const {
+ if( !n1 ) return n2; // Handle NULL original LCA
+ assert( n1->is_CFG(), "" );
+ assert( n2->is_CFG(), "" );
+ // find LCA of all uses
+ uint d1 = dom_depth(n1);
+ uint d2 = dom_depth(n2);
+ while (n1 != n2) {
+ if (d1 > d2) {
+ n1 = idom(n1);
+ d1 = dom_depth(n1);
+ } else if (d1 < d2) {
+ n2 = idom(n2);
+ d2 = dom_depth(n2);
+ } else {
+ // Here d1 == d2. Due to edits of the dominator-tree, sections
+ // of the tree might have the same depth. These sections have
+ // to be searched more carefully.
+
+ // Scan up all the n1's with equal depth, looking for n2.
+ Node *t1 = idom(n1);
+ while (dom_depth(t1) == d1) {
+ if (t1 == n2) return n2;
+ t1 = idom(t1);
+ }
+ // Scan up all the n2's with equal depth, looking for n1.
+ Node *t2 = idom(n2);
+ while (dom_depth(t2) == d2) {
+ if (t2 == n1) return n1;
+ t2 = idom(t2);
+ }
+ // Move up to a new dominator-depth value as well as up the dom-tree.
+ n1 = t1;
+ n2 = t2;
+ d1 = dom_depth(n1);
+ d2 = dom_depth(n2);
+ }
+ }
+ return n1;
+}
+
+//------------------------------compute_idom-----------------------------------
+// Locally compute IDOM using dom_lca call. Correct only if the incoming
+// IDOMs are correct.
+Node *PhaseIdealLoop::compute_idom( Node *region ) const {
+ assert( region->is_Region(), "" );
+ Node *LCA = NULL;
+ for( uint i = 1; i < region->req(); i++ ) {
+ if( region->in(i) != C->top() )
+ LCA = dom_lca( LCA, region->in(i) );
+ }
+ return LCA;
+}
+
+//------------------------------get_late_ctrl----------------------------------
+// Compute latest legal control.
+Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
+ assert(early != NULL, "early control should not be NULL");
+
+ // Compute LCA over list of uses
+ Node *LCA = NULL;
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && LCA != early; i++) {
+ Node* c = n->fast_out(i);
+ if (_nodes[c->_idx] == NULL)
+ continue; // Skip the occasional dead node
+ if( c->is_Phi() ) { // For Phis, we must land above on the path
+ for( uint j=1; j<c->req(); j++ ) {// For all inputs
+ if( c->in(j) == n ) { // Found matching input?
+ Node *use = c->in(0)->in(j);
+ LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
+ }
+ }
+ } else {
+ // For CFG data-users, use is in the block just prior
+ Node *use = has_ctrl(c) ? get_ctrl(c) : c->in(0);
+ LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
+ }
+ }
+
+ // if this is a load, check for anti-dependent stores
+ // We use a conservative algorithm to identify potential interfering
+ // instructions and for rescheduling the load. The users of the memory
+ // input of this load are examined. Any use which is not a load and is
+ // dominated by early is considered a potentially interfering store.
+ // This can produce false positives.
+ if (n->is_Load() && LCA != early) {
+ Node_List worklist;
+
+ Node *mem = n->in(MemNode::Memory);
+ for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+ Node* s = mem->fast_out(i);
+ worklist.push(s);
+ }
+ while(worklist.size() != 0 && LCA != early) {
+ Node* s = worklist.pop();
+ if (s->is_Load()) {
+ continue;
+ } else if (s->is_MergeMem()) {
+ for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
+ Node* s1 = s->fast_out(i);
+ worklist.push(s1);
+ }
+ } else {
+ Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
+ assert(sctrl != NULL || s->outcnt() == 0, "must have control");
+ if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
+ LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+ }
+ }
+ }
+ }
+
+ assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
+ return LCA;
+}
+
+// true if CFG node d dominates CFG node n
+bool PhaseIdealLoop::is_dominator(Node *d, Node *n) {
+ if (d == n)
+ return true;
+ assert(d->is_CFG() && n->is_CFG(), "must have CFG nodes");
+ uint dd = dom_depth(d);
+ while (dom_depth(n) >= dd) {
+ if (n == d)
+ return true;
+ n = idom(n);
+ }
+ return false;
+}
+
+//------------------------------dom_lca_for_get_late_ctrl_internal-------------
+// Pair-wise LCA with tags.
+// Tag each index with the node 'tag' currently being processed
+// before advancing up the dominator chain using idom().
+// Later calls that find a match to 'tag' know that this path has already
+// been considered in the current LCA (which is input 'n1' by convention).
+// Since get_late_ctrl() is only called once for each node, the tag array
+// does not need to be cleared between calls to get_late_ctrl().
+// Algorithm trades a larger constant factor for better asymptotic behavior
+//
+Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, Node *tag ) {
+ uint d1 = dom_depth(n1);
+ uint d2 = dom_depth(n2);
+
+ do {
+ if (d1 > d2) {
+ // current lca is deeper than n2
+ _dom_lca_tags.map(n1->_idx, tag);
+ n1 = idom(n1);
+ d1 = dom_depth(n1);
+ } else if (d1 < d2) {
+ // n2 is deeper than current lca
+ Node *memo = _dom_lca_tags[n2->_idx];
+ if( memo == tag ) {
+ return n1; // Return the current LCA
+ }
+ _dom_lca_tags.map(n2->_idx, tag);
+ n2 = idom(n2);
+ d2 = dom_depth(n2);
+ } else {
+ // Here d1 == d2. Due to edits of the dominator-tree, sections
+ // of the tree might have the same depth. These sections have
+ // to be searched more carefully.
+
+ // Scan up all the n1's with equal depth, looking for n2.
+ _dom_lca_tags.map(n1->_idx, tag);
+ Node *t1 = idom(n1);
+ while (dom_depth(t1) == d1) {
+ if (t1 == n2) return n2;
+ _dom_lca_tags.map(t1->_idx, tag);
+ t1 = idom(t1);
+ }
+ // Scan up all the n2's with equal depth, looking for n1.
+ _dom_lca_tags.map(n2->_idx, tag);
+ Node *t2 = idom(n2);
+ while (dom_depth(t2) == d2) {
+ if (t2 == n1) return n1;
+ _dom_lca_tags.map(t2->_idx, tag);
+ t2 = idom(t2);
+ }
+ // Move up to a new dominator-depth value as well as up the dom-tree.
+ n1 = t1;
+ n2 = t2;
+ d1 = dom_depth(n1);
+ d2 = dom_depth(n2);
+ }
+ } while (n1 != n2);
+ return n1;
+}
+
+//------------------------------init_dom_lca_tags------------------------------
+// Tag could be a node's integer index, 32bits instead of 64bits in some cases
+// Intended use does not involve any growth for the array, so it could
+// be of fixed size.
+void PhaseIdealLoop::init_dom_lca_tags() {
+ uint limit = C->unique() + 1;
+ _dom_lca_tags.map( limit, NULL );
+#ifdef ASSERT
+ for( uint i = 0; i < limit; ++i ) {
+ assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
+ }
+#endif // ASSERT
+}
+
+//------------------------------clear_dom_lca_tags------------------------------
+// Tag could be a node's integer index, 32bits instead of 64bits in some cases
+// Intended use does not involve any growth for the array, so it could
+// be of fixed size.
+void PhaseIdealLoop::clear_dom_lca_tags() {
+ uint limit = C->unique() + 1;
+ _dom_lca_tags.map( limit, NULL );
+ _dom_lca_tags.clear();
+#ifdef ASSERT
+ for( uint i = 0; i < limit; ++i ) {
+ assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
+ }
+#endif // ASSERT
+}
+
+//------------------------------build_loop_late--------------------------------
+// Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
+// Second pass finds latest legal placement, and ideal loop placement.
+void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
+ while (worklist.size() != 0) {
+ Node *n = worklist.pop();
+ // Only visit once
+ if (visited.test_set(n->_idx)) continue;
+ uint cnt = n->outcnt();
+ uint i = 0;
+ while (true) {
+ assert( _nodes[n->_idx], "no dead nodes" );
+ // Visit all children
+ if (i < cnt) {
+ Node* use = n->raw_out(i);
+ ++i;
+ // Check for dead uses. Aggressively prune such junk. It might be
+ // dead in the global sense, but still have local uses so I cannot
+ // easily call 'remove_dead_node'.
+ if( _nodes[use->_idx] != NULL || use->is_top() ) { // Not dead?
+ // Due to cycles, we might not hit the same fixed point in the verify
+ // pass as we do in the regular pass. Instead, visit such phis as
+ // simple uses of the loop head.
+ if( use->in(0) && (use->is_CFG() || use->is_Phi()) ) {
+ if( !visited.test(use->_idx) )
+ worklist.push(use);
+ } else if( !visited.test_set(use->_idx) ) {
+ nstack.push(n, i); // Save parent and next use's index.
+ n = use; // Process all children of current use.
+ cnt = use->outcnt();
+ i = 0;
+ }
+ } else {
+ // Do not visit around the backedge of loops via data edges.
+ // push dead code onto a worklist
+ _deadlist.push(use);
+ }
+ } else {
+ // All of n's children have been processed, complete post-processing.
+ build_loop_late_post(n, verify_me);
+ if (nstack.is_empty()) {
+ // Finished all nodes on stack.
+ // Process next node on the worklist.
+ break;
+ }
+ // Get saved parent node and next use's index. Visit the rest of uses.
+ n = nstack.node();
+ cnt = n->outcnt();
+ i = nstack.index();
+ nstack.pop();
+ }
+ }
+ }
+}
+
+//------------------------------build_loop_late_post---------------------------
+// Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
+// Second pass finds latest legal placement, and ideal loop placement.
+void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify_me ) {
+
+ if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress()) {
+ _igvn._worklist.push(n); // Maybe we'll normalize it, if no more loops.
+ }
+
+ // CFG and pinned nodes already handled
+ if( n->in(0) ) {
+ if( n->in(0)->is_top() ) return; // Dead?
+
+ // We'd like +VerifyLoopOptimizations to not believe that Mod's/Loads
+ // _must_ be pinned (they have to observe their control edge of course).
+ // Unlike Stores (which modify an unallocable resource, the memory
+ // state), Mods/Loads can float around. So free them up.
+ bool pinned = true;
+ switch( n->Opcode() ) {
+ case Op_DivI:
+ case Op_DivF:
+ case Op_DivD:
+ case Op_ModI:
+ case Op_ModF:
+ case Op_ModD:
+ case Op_LoadB: // Same with Loads; they can sink
+ case Op_LoadC: // during loop optimizations.
+ case Op_LoadD:
+ case Op_LoadF:
+ case Op_LoadI:
+ case Op_LoadKlass:
+ case Op_LoadL:
+ case Op_LoadS:
+ case Op_LoadP:
+ case Op_LoadRange:
+ case Op_LoadD_unaligned:
+ case Op_LoadL_unaligned:
+ case Op_StrComp: // Does a bunch of load-like effects
+ pinned = false;
+ }
+ if( pinned ) {
+ IdealLoopTree *choosen_loop = get_loop(n->is_CFG() ? n : get_ctrl(n));
+ if( !choosen_loop->_child ) // Inner loop?
+ choosen_loop->_body.push(n); // Collect inner loops
+ return;
+ }
+ } else { // No slot zero
+ if( n->is_CFG() ) { // CFG with no slot 0 is dead
+ _nodes.map(n->_idx,0); // No block setting, it's globally dead
+ return;
+ }
+ assert(!n->is_CFG() || n->outcnt() == 0, "");
+ }
+
+ // Do I have a "safe range" I can select over?
+ Node *early = get_ctrl(n);// Early location already computed
+
+ // Compute latest point this Node can go
+ Node *LCA = get_late_ctrl( n, early );
+ // LCA is NULL due to uses being dead
+ if( LCA == NULL ) {
+#ifdef ASSERT
+ for (DUIterator i1 = n->outs(); n->has_out(i1); i1++) {
+ assert( _nodes[n->out(i1)->_idx] == NULL, "all uses must also be dead");
+ }
+#endif
+ _nodes.map(n->_idx, 0); // This node is useless
+ _deadlist.push(n);
+ return;
+ }
+ assert(LCA != NULL && !LCA->is_top(), "no dead nodes");
+
+ Node *legal = LCA; // Walk 'legal' up the IDOM chain
+ Node *least = legal; // Best legal position so far
+ while( early != legal ) { // While not at earliest legal
+ // Find least loop nesting depth
+ legal = idom(legal); // Bump up the IDOM tree
+ // Check for lower nesting depth
+ if( get_loop(legal)->_nest < get_loop(least)->_nest )
+ least = legal;
+ }
+
+ // Try not to place code on a loop entry projection
+ // which can inhibit range check elimination.
+ if (least != early) {
+ Node* ctrl_out = least->unique_ctrl_out();
+ if (ctrl_out && ctrl_out->is_CountedLoop() &&
+ least == ctrl_out->in(LoopNode::EntryControl)) {
+ Node* least_dom = idom(least);
+ if (get_loop(least_dom)->is_member(get_loop(least))) {
+ least = least_dom;
+ }
+ }
+ }
+
+#ifdef ASSERT
+ // If verifying, verify that 'verify_me' has a legal location
+ // and choose it as our location.
+ if( verify_me ) {
+ Node *v_ctrl = verify_me->get_ctrl_no_update(n);
+ Node *legal = LCA;
+ while( early != legal ) { // While not at earliest legal
+ if( legal == v_ctrl ) break; // Check for prior good location
+ legal = idom(legal) ;// Bump up the IDOM tree
+ }
+ // Check for prior good location
+ if( legal == v_ctrl ) least = legal; // Keep prior if found
+ }
+#endif
+
+ // Assign discovered "here or above" point
+ least = find_non_split_ctrl(least);
+ set_ctrl(n, least);
+
+ // Collect inner loop bodies
+ IdealLoopTree *choosen_loop = get_loop(least);
+ if( !choosen_loop->_child ) // Inner loop?
+ choosen_loop->_body.push(n);// Collect inner loops
+}
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+void PhaseIdealLoop::dump( ) const {
+ ResourceMark rm;
+ Arena* arena = Thread::current()->resource_area();
+ Node_Stack stack(arena, C->unique() >> 2);
+ Node_List rpo_list;
+ VectorSet visited(arena);
+ visited.set(C->top()->_idx);
+ rpo( C->root(), stack, visited, rpo_list );
+ // Dump root loop indexed by last element in PO order
+ dump( _ltree_root, rpo_list.size(), rpo_list );
+}
+
+void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
+
+ // Indent by loop nesting depth
+ for( uint x = 0; x < loop->_nest; x++ )
+ tty->print(" ");
+ tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx);
+
+ // Now scan for CFG nodes in the same loop
+ for( uint j=idx; j > 0; j-- ) {
+ Node *n = rpo_list[j-1];
+ if( !_nodes[n->_idx] ) // Skip dead nodes
+ continue;
+ if( get_loop(n) != loop ) { // Wrong loop nest
+ if( get_loop(n)->_head == n && // Found nested loop?
+ get_loop(n)->_parent == loop )
+ dump(get_loop(n),rpo_list.size(),rpo_list); // Print it nested-ly
+ continue;
+ }
+
+ // Dump controlling node
+ for( uint x = 0; x < loop->_nest; x++ )
+ tty->print(" ");
+ tty->print("C");
+ if( n == C->root() ) {
+ n->dump();
+ } else {
+ Node* cached_idom = idom_no_update(n);
+ Node *computed_idom = n->in(0);
+ if( n->is_Region() ) {
+ computed_idom = compute_idom(n);
+ // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
+ // any MultiBranch ctrl node), so apply a similar transform to
+ // the cached idom returned from idom_no_update.
+ cached_idom = find_non_split_ctrl(cached_idom);
+ }
+ tty->print(" ID:%d",computed_idom->_idx);
+ n->dump();
+ if( cached_idom != computed_idom ) {
+ tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d",
+ computed_idom->_idx, cached_idom->_idx);
+ }
+ }
+ // Dump nodes it controls
+ for( uint k = 0; k < _nodes.Size(); k++ ) {
+ // (k < C->unique() && get_ctrl(find(k)) == n)
+ if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
+ Node *m = C->root()->find(k);
+ if( m && m->outcnt() > 0 ) {
+ if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
+ tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p",
+ _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
+ }
+ for( uint j = 0; j < loop->_nest; j++ )
+ tty->print(" ");
+ tty->print(" ");
+ m->dump();
+ }
+ }
+ }
+ }
+}
+
+// Collect a R-P-O for the whole CFG.
+// Result list is in post-order (scan backwards for RPO)
+void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
+ stk.push(start, 0);
+ visited.set(start->_idx);
+
+ while (stk.is_nonempty()) {
+ Node* m = stk.node();
+ uint idx = stk.index();
+ if (idx < m->outcnt()) {
+ stk.set_index(idx + 1);
+ Node* n = m->raw_out(idx);
+ if (n->is_CFG() && !visited.test_set(n->_idx)) {
+ stk.push(n, 0);
+ }
+ } else {
+ rpo_list.push(m);
+ stk.pop();
+ }
+ }
+}
+#endif
+
+
+//=============================================================================
+//------------------------------LoopTreeIterator-----------------------------------
+
+// Advance to next loop tree using a preorder, left-to-right traversal.
+void LoopTreeIterator::next() {
+ assert(!done(), "must not be done.");
+ if (_curnt->_child != NULL) {
+ _curnt = _curnt->_child;
+ } else if (_curnt->_next != NULL) {
+ _curnt = _curnt->_next;
+ } else {
+ while (_curnt != _root && _curnt->_next == NULL) {
+ _curnt = _curnt->_parent;
+ }
+ if (_curnt == _root) {
+ _curnt = NULL;
+ assert(done(), "must be done.");
+ } else {
+ assert(_curnt->_next != NULL, "must be more to do");
+ _curnt = _curnt->_next;
+ }
+ }
+}
diff --git a/src/share/vm/opto/loopnode.hpp b/src/share/vm/opto/loopnode.hpp
new file mode 100644
index 000000000..21ddf8015
--- /dev/null
+++ b/src/share/vm/opto/loopnode.hpp
@@ -0,0 +1,919 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class CmpNode;
+class CountedLoopEndNode;
+class CountedLoopNode;
+class IdealLoopTree;
+class LoopNode;
+class Node;
+class PhaseIdealLoop;
+class VectorSet;
+struct small_cache;
+
+//
+// I D E A L I Z E D L O O P S
+//
+// Idealized loops are the set of loops I perform more interesting
+// transformations on, beyond simple hoisting.
+
+//------------------------------LoopNode---------------------------------------
+// Simple loop header. Fall in path on left, loop-back path on right.
+class LoopNode : public RegionNode {
+ // Size is bigger to hold the flags. However, the flags do not change
+ // the semantics so it does not appear in the hash & cmp functions.
+ virtual uint size_of() const { return sizeof(*this); }
+protected:
+ short _loop_flags;
+ // Names for flag bitfields
+ enum { pre_post_main=0, inner_loop=8, partial_peel_loop=16, partial_peel_failed=32 };
+ char _unswitch_count;
+ enum { _unswitch_max=3 };
+
+public:
+ // Names for edge indices
+ enum { Self=0, EntryControl, LoopBackControl };
+
+ int is_inner_loop() const { return _loop_flags & inner_loop; }
+ void set_inner_loop() { _loop_flags |= inner_loop; }
+
+ int is_partial_peel_loop() const { return _loop_flags & partial_peel_loop; }
+ void set_partial_peel_loop() { _loop_flags |= partial_peel_loop; }
+ int partial_peel_has_failed() const { return _loop_flags & partial_peel_failed; }
+ void mark_partial_peel_failed() { _loop_flags |= partial_peel_failed; }
+
+ int unswitch_max() { return _unswitch_max; }
+ int unswitch_count() { return _unswitch_count; }
+ void set_unswitch_count(int val) {
+ assert (val <= unswitch_max(), "too many unswitches");
+ _unswitch_count = val;
+ }
+
+ LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
+ init_class_id(Class_Loop);
+ init_req(EntryControl, entry);
+ init_req(LoopBackControl, backedge);
+ }
+
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual int Opcode() const;
+ bool can_be_counted_loop(PhaseTransform* phase) const {
+ return req() == 3 && in(0) != NULL &&
+ in(1) != NULL && phase->type(in(1)) != Type::TOP &&
+ in(2) != NULL && phase->type(in(2)) != Type::TOP;
+ }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------Counted Loops----------------------------------
+// Counted loops are all trip-counted loops, with exactly 1 trip-counter exit
+// path (and maybe some other exit paths). The trip-counter exit is always
+// last in the loop. The trip-counter does not have to stride by a constant,
+// but it does have to stride by a loop-invariant amount; the exit value is
+// also loop invariant.
+
+// CountedLoopNodes and CountedLoopEndNodes come in matched pairs. The
+// CountedLoopNode has the incoming loop control and the loop-back-control
+// which is always the IfTrue before the matching CountedLoopEndNode. The
+// CountedLoopEndNode has an incoming control (possibly not the
+// CountedLoopNode if there is control flow in the loop), the post-increment
+// trip-counter value, and the limit. The trip-counter value is always of
+// the form (Op old-trip-counter stride). The old-trip-counter is produced
+// by a Phi connected to the CountedLoopNode. The stride is loop invariant.
+// The Op is any commutable opcode, including Add, Mul, Xor. The
+// CountedLoopEndNode also takes in the loop-invariant limit value.
+
+// From a CountedLoopNode I can reach the matching CountedLoopEndNode via the
+// loop-back control. From CountedLoopEndNodes I can reach CountedLoopNodes
+// via the old-trip-counter from the Op node.
+
+//------------------------------CountedLoopNode--------------------------------
+// CountedLoopNodes head simple counted loops. CountedLoopNodes have as
+// inputs the incoming loop-start control and the loop-back control, so they
+// act like RegionNodes. They also take in the initial trip counter, the
+// loop-invariant stride and the loop-invariant limit value. CountedLoopNodes
+// produce a loop-body control and the trip counter value. Since
+// CountedLoopNodes behave like RegionNodes I still have a standard CFG model.
+
+class CountedLoopNode : public LoopNode {
+ // Size is bigger to hold _main_idx. However, _main_idx does not change
+ // the semantics so it does not appear in the hash & cmp functions.
+ virtual uint size_of() const { return sizeof(*this); }
+
+ // For Pre- and Post-loops during debugging ONLY, this holds the index of
+ // the Main CountedLoop. Used to assert that we understand the graph shape.
+ node_idx_t _main_idx;
+
+ // Known trip count calculated by policy_maximally_unroll
+ int _trip_count;
+
+ // Expected trip count from profile data
+ float _profile_trip_cnt;
+
+ // Log2 of original loop bodies in unrolled loop
+ int _unrolled_count_log2;
+
+ // Node count prior to last unrolling - used to decide if
+ // unroll,optimize,unroll,optimize,... is making progress
+ int _node_count_before_unroll;
+
+public:
+ CountedLoopNode( Node *entry, Node *backedge )
+ : LoopNode(entry, backedge), _trip_count(max_jint),
+ _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0),
+ _node_count_before_unroll(0) {
+ init_class_id(Class_CountedLoop);
+ // Initialize _trip_count to the largest possible value.
+ // Will be reset (lower) if the loop's trip count is known.
+ }
+
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+ Node *init_control() const { return in(EntryControl); }
+ Node *back_control() const { return in(LoopBackControl); }
+ CountedLoopEndNode *loopexit() const;
+ Node *init_trip() const;
+ Node *stride() const;
+ int stride_con() const;
+ bool stride_is_con() const;
+ Node *limit() const;
+ Node *incr() const;
+ Node *phi() const;
+
+ // Match increment with optional truncation
+ static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type);
+
+ // A 'main' loop has a pre-loop and a post-loop. The 'main' loop
+ // can run short a few iterations and may start a few iterations in.
+ // It will be RCE'd and unrolled and aligned.
+
+ // A following 'post' loop will run any remaining iterations. Used
+ // during Range Check Elimination, the 'post' loop will do any final
+ // iterations with full checks. Also used by Loop Unrolling, where
+ // the 'post' loop will do any epilog iterations needed. Basically,
+ // a 'post' loop can not profitably be further unrolled or RCE'd.
+
+ // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
+ // it may do under-flow checks for RCE and may do alignment iterations
+ // so the following main loop 'knows' that it is striding down cache
+ // lines.
+
+ // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
+ // Aligned, may be missing it's pre-loop.
+ enum { Normal=0, Pre=1, Main=2, Post=3, PrePostFlagsMask=3, Main_Has_No_Pre_Loop=4 };
+ int is_normal_loop() const { return (_loop_flags&PrePostFlagsMask) == Normal; }
+ int is_pre_loop () const { return (_loop_flags&PrePostFlagsMask) == Pre; }
+ int is_main_loop () const { return (_loop_flags&PrePostFlagsMask) == Main; }
+ int is_post_loop () const { return (_loop_flags&PrePostFlagsMask) == Post; }
+ int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
+ void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
+
+
+ void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
+ void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
+ void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
+ void set_normal_loop( ) { _loop_flags &= ~PrePostFlagsMask; }
+
+ void set_trip_count(int tc) { _trip_count = tc; }
+ int trip_count() { return _trip_count; }
+
+ void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
+ float profile_trip_cnt() { return _profile_trip_cnt; }
+
+ void double_unrolled_count() { _unrolled_count_log2++; }
+ int unrolled_count() { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
+
+ void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; }
+ int node_count_before_unroll() { return _node_count_before_unroll; }
+
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CountedLoopEndNode-----------------------------
+// CountedLoopEndNodes end simple trip counted loops. They act much like
+// IfNodes.
+class CountedLoopEndNode : public IfNode {
+public:
+ enum { TestControl, TestValue };
+
+ CountedLoopEndNode( Node *control, Node *test, float prob, float cnt )
+ : IfNode( control, test, prob, cnt) {
+ init_class_id(Class_CountedLoopEnd);
+ }
+ virtual int Opcode() const;
+
+ Node *cmp_node() const { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; }
+ Node *incr() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
+ Node *limit() const { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; }
+ Node *stride() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; }
+ Node *phi() const { Node *tmp = incr (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
+ Node *init_trip() const { Node *tmp = phi (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
+ int stride_con() const;
+ bool stride_is_con() const { Node *tmp = stride (); return (tmp != NULL && tmp->is_Con()); }
+ BoolTest::mask test_trip() const { return in(TestValue)->as_Bool()->_test._test; }
+ CountedLoopNode *loopnode() const {
+ Node *ln = phi()->in(0);
+ assert( ln->Opcode() == Op_CountedLoop, "malformed loop" );
+ return (CountedLoopNode*)ln; }
+
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+
+inline CountedLoopEndNode *CountedLoopNode::loopexit() const {
+ Node *bc = back_control();
+ if( bc == NULL ) return NULL;
+ Node *le = bc->in(0);
+ if( le->Opcode() != Op_CountedLoopEnd )
+ return NULL;
+ return (CountedLoopEndNode*)le;
+}
+inline Node *CountedLoopNode::init_trip() const { return loopexit() ? loopexit()->init_trip() : NULL; }
+inline Node *CountedLoopNode::stride() const { return loopexit() ? loopexit()->stride() : NULL; }
+inline int CountedLoopNode::stride_con() const { return loopexit() ? loopexit()->stride_con() : 0; }
+inline bool CountedLoopNode::stride_is_con() const { return loopexit() && loopexit()->stride_is_con(); }
+inline Node *CountedLoopNode::limit() const { return loopexit() ? loopexit()->limit() : NULL; }
+inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; }
+inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; }
+
+
+// -----------------------------IdealLoopTree----------------------------------
+class IdealLoopTree : public ResourceObj {
+public:
+ IdealLoopTree *_parent; // Parent in loop tree
+ IdealLoopTree *_next; // Next sibling in loop tree
+ IdealLoopTree *_child; // First child in loop tree
+
+ // The head-tail backedge defines the loop.
+ // If tail is NULL then this loop has multiple backedges as part of the
+ // same loop. During cleanup I'll peel off the multiple backedges; merge
+ // them at the loop bottom and flow 1 real backedge into the loop.
+ Node *_head; // Head of loop
+ Node *_tail; // Tail of loop
+ inline Node *tail(); // Handle lazy update of _tail field
+ PhaseIdealLoop* _phase;
+
+ Node_List _body; // Loop body for inner loops
+
+ uint8 _nest; // Nesting depth
+ uint8 _irreducible:1, // True if irreducible
+ _has_call:1, // True if has call safepoint
+ _has_sfpt:1, // True if has non-call safepoint
+ _rce_candidate:1; // True if candidate for range check elimination
+
+ Node_List* _required_safept; // A inner loop cannot delete these safepts;
+
+ IdealLoopTree( PhaseIdealLoop* phase, Node *head, Node *tail )
+ : _parent(0), _next(0), _child(0),
+ _head(head), _tail(tail),
+ _phase(phase),
+ _required_safept(NULL),
+ _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0)
+ { }
+
+ // Is 'l' a member of 'this'?
+ int is_member( const IdealLoopTree *l ) const; // Test for nested membership
+
+ // Set loop nesting depth. Accumulate has_call bits.
+ int set_nest( uint depth );
+
+ // Split out multiple fall-in edges from the loop header. Move them to a
+ // private RegionNode before the loop. This becomes the loop landing pad.
+ void split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt );
+
+ // Split out the outermost loop from this shared header.
+ void split_outer_loop( PhaseIdealLoop *phase );
+
+ // Merge all the backedges from the shared header into a private Region.
+ // Feed that region as the one backedge to this loop.
+ void merge_many_backedges( PhaseIdealLoop *phase );
+
+ // Split shared headers and insert loop landing pads.
+ // Insert a LoopNode to replace the RegionNode.
+ // Returns TRUE if loop tree is structurally changed.
+ bool beautify_loops( PhaseIdealLoop *phase );
+
+ // Perform iteration-splitting on inner loops. Split iterations to avoid
+ // range checks or one-shot null checks.
+ void iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
+
+ // Driver for various flavors of iteration splitting
+ void iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
+
+ // Given dominators, try to find loops with calls that must always be
+ // executed (call dominates loop tail). These loops do not need non-call
+ // safepoints (ncsfpt).
+ void check_safepts(VectorSet &visited, Node_List &stack);
+
+ // Allpaths backwards scan from loop tail, terminating each path at first safepoint
+ // encountered.
+ void allpaths_check_safepts(VectorSet &visited, Node_List &stack);
+
+ // Convert to counted loops where possible
+ void counted_loop( PhaseIdealLoop *phase );
+
+ // Check for Node being a loop-breaking test
+ Node *is_loop_exit(Node *iff) const;
+
+ // Returns true if ctrl is executed on every complete iteration
+ bool dominates_backedge(Node* ctrl);
+
+ // Remove simplistic dead code from loop body
+ void DCE_loop_body();
+
+ // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
+ // Replace with a 1-in-10 exit guess.
+ void adjust_loop_exit_prob( PhaseIdealLoop *phase );
+
+ // Return TRUE or FALSE if the loop should never be RCE'd or aligned.
+ // Useful for unrolling loops with NO array accesses.
+ bool policy_peel_only( PhaseIdealLoop *phase ) const;
+
+ // Return TRUE or FALSE if the loop should be unswitched -- clone
+ // loop with an invariant test
+ bool policy_unswitching( PhaseIdealLoop *phase ) const;
+
+ // Micro-benchmark spamming. Remove empty loops.
+ bool policy_do_remove_empty_loop( PhaseIdealLoop *phase );
+
+ // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
+ // make some loop-invariant test (usually a null-check) happen before the
+ // loop.
+ bool policy_peeling( PhaseIdealLoop *phase ) const;
+
+ // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any
+ // known trip count in the counted loop node.
+ bool policy_maximally_unroll( PhaseIdealLoop *phase ) const;
+
+ // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
+ // the loop is a CountedLoop and the body is small enough.
+ bool policy_unroll( PhaseIdealLoop *phase ) const;
+
+ // Return TRUE or FALSE if the loop should be range-check-eliminated.
+ // Gather a list of IF tests that are dominated by iteration splitting;
+ // also gather the end of the first split and the start of the 2nd split.
+ bool policy_range_check( PhaseIdealLoop *phase ) const;
+
+ // Return TRUE or FALSE if the loop should be cache-line aligned.
+ // Gather the expression that does the alignment. Note that only
+ // one array base can be aligned in a loop (unless the VM guarentees
+ // mutual alignment). Note that if we vectorize short memory ops
+ // into longer memory ops, we may want to increase alignment.
+ bool policy_align( PhaseIdealLoop *phase ) const;
+
+ // Compute loop trip count from profile data
+ void compute_profile_trip_cnt( PhaseIdealLoop *phase );
+
+ // Reassociate invariant expressions.
+ void reassociate_invariants(PhaseIdealLoop *phase);
+ // Reassociate invariant add and subtract expressions.
+ Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase);
+ // Return nonzero index of invariant operand if invariant and variant
+ // are combined with an Add or Sub. Helper for reassoicate_invariants.
+ int is_invariant_addition(Node* n, PhaseIdealLoop *phase);
+
+ // Return true if n is invariant
+ bool is_invariant(Node* n) const;
+
+ // Put loop body on igvn work list
+ void record_for_igvn();
+
+ bool is_loop() { return !_irreducible && _tail && !_tail->is_top(); }
+ bool is_inner() { return is_loop() && _child == NULL; }
+ bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
+
+#ifndef PRODUCT
+ void dump_head( ) const; // Dump loop head only
+ void dump() const; // Dump this loop recursively
+ void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
+#endif
+
+};
+
+// -----------------------------PhaseIdealLoop---------------------------------
+// Computes the mapping from Nodes to IdealLoopTrees. Organizes IdealLoopTrees into a
+// loop tree. Drives the loop-based transformations on the ideal graph.
+class PhaseIdealLoop : public PhaseTransform {
+ friend class IdealLoopTree;
+ friend class SuperWord;
+ // Pre-computed def-use info
+ PhaseIterGVN &_igvn;
+
+ // Head of loop tree
+ IdealLoopTree *_ltree_root;
+
+ // Array of pre-order numbers, plus post-visited bit.
+ // ZERO for not pre-visited. EVEN for pre-visited but not post-visited.
+ // ODD for post-visited. Other bits are the pre-order number.
+ uint *_preorders;
+ uint _max_preorder;
+
+ // Allocate _preorders[] array
+ void allocate_preorders() {
+ _max_preorder = C->unique()+8;
+ _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder);
+ memset(_preorders, 0, sizeof(uint) * _max_preorder);
+ }
+
+ // Allocate _preorders[] array
+ void reallocate_preorders() {
+ if ( _max_preorder < C->unique() ) {
+ _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, C->unique());
+ _max_preorder = C->unique();
+ }
+ memset(_preorders, 0, sizeof(uint) * _max_preorder);
+ }
+
+ // Check to grow _preorders[] array for the case when build_loop_tree_impl()
+ // adds new nodes.
+ void check_grow_preorders( ) {
+ if ( _max_preorder < C->unique() ) {
+ uint newsize = _max_preorder<<1; // double size of array
+ _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, newsize);
+ memset(&_preorders[_max_preorder],0,sizeof(uint)*(newsize-_max_preorder));
+ _max_preorder = newsize;
+ }
+ }
+ // Check for pre-visited. Zero for NOT visited; non-zero for visited.
+ int is_visited( Node *n ) const { return _preorders[n->_idx]; }
+ // Pre-order numbers are written to the Nodes array as low-bit-set values.
+ void set_preorder_visited( Node *n, int pre_order ) {
+ assert( !is_visited( n ), "already set" );
+ _preorders[n->_idx] = (pre_order<<1);
+ };
+ // Return pre-order number.
+ int get_preorder( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]>>1; }
+
+ // Check for being post-visited.
+ // Should be previsited already (checked with assert(is_visited(n))).
+ int is_postvisited( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]&1; }
+
+ // Mark as post visited
+ void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; }
+
+ // Set/get control node out. Set lower bit to distinguish from IdealLoopTree
+ // Returns true if "n" is a data node, false if it's a control node.
+ bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; }
+
+ // clear out dead code after build_loop_late
+ Node_List _deadlist;
+
+ // Support for faster execution of get_late_ctrl()/dom_lca()
+ // when a node has many uses and dominator depth is deep.
+ Node_Array _dom_lca_tags;
+ void init_dom_lca_tags();
+ void clear_dom_lca_tags();
+ // Inline wrapper for frequent cases:
+ // 1) only one use
+ // 2) a use is the same as the current LCA passed as 'n1'
+ Node *dom_lca_for_get_late_ctrl( Node *lca, Node *n, Node *tag ) {
+ assert( n->is_CFG(), "" );
+ // Fast-path NULL lca
+ if( lca != NULL && lca != n ) {
+ assert( lca->is_CFG(), "" );
+ // find LCA of all uses
+ n = dom_lca_for_get_late_ctrl_internal( lca, n, tag );
+ }
+ return find_non_split_ctrl(n);
+ }
+ Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag );
+ // true if CFG node d dominates CFG node n
+ bool is_dominator(Node *d, Node *n);
+
+ // Helper function for directing control inputs away from CFG split
+ // points.
+ Node *find_non_split_ctrl( Node *ctrl ) const {
+ if (ctrl != NULL) {
+ if (ctrl->is_MultiBranch()) {
+ ctrl = ctrl->in(0);
+ }
+ assert(ctrl->is_CFG(), "CFG");
+ }
+ return ctrl;
+ }
+
+public:
+ bool has_node( Node* n ) const { return _nodes[n->_idx] != NULL; }
+ // check if transform created new nodes that need _ctrl recorded
+ Node *get_late_ctrl( Node *n, Node *early );
+ Node *get_early_ctrl( Node *n );
+ void set_early_ctrl( Node *n );
+ void set_subtree_ctrl( Node *root );
+ void set_ctrl( Node *n, Node *ctrl ) {
+ assert( !has_node(n) || has_ctrl(n), "" );
+ assert( ctrl->in(0), "cannot set dead control node" );
+ assert( ctrl == find_non_split_ctrl(ctrl), "must set legal crtl" );
+ _nodes.map( n->_idx, (Node*)((intptr_t)ctrl + 1) );
+ }
+ // Set control and update loop membership
+ void set_ctrl_and_loop(Node* n, Node* ctrl) {
+ IdealLoopTree* old_loop = get_loop(get_ctrl(n));
+ IdealLoopTree* new_loop = get_loop(ctrl);
+ if (old_loop != new_loop) {
+ if (old_loop->_child == NULL) old_loop->_body.yank(n);
+ if (new_loop->_child == NULL) new_loop->_body.push(n);
+ }
+ set_ctrl(n, ctrl);
+ }
+ // Control nodes can be replaced or subsumed. During this pass they
+ // get their replacement Node in slot 1. Instead of updating the block
+ // location of all Nodes in the subsumed block, we lazily do it. As we
+ // pull such a subsumed block out of the array, we write back the final
+ // correct block.
+ Node *get_ctrl( Node *i ) {
+ assert(has_node(i), "");
+ Node *n = get_ctrl_no_update(i);
+ _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) );
+ assert(has_node(i) && has_ctrl(i), "");
+ assert(n == find_non_split_ctrl(n), "must return legal ctrl" );
+ return n;
+ }
+
+private:
+ Node *get_ctrl_no_update( Node *i ) const {
+ assert( has_ctrl(i), "" );
+ Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
+ if (!n->in(0)) {
+ // Skip dead CFG nodes
+ do {
+ n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
+ } while (!n->in(0));
+ n = find_non_split_ctrl(n);
+ }
+ return n;
+ }
+
+ // Check for loop being set
+ // "n" must be a control node. Returns true if "n" is known to be in a loop.
+ bool has_loop( Node *n ) const {
+ assert(!has_node(n) || !has_ctrl(n), "");
+ return has_node(n);
+ }
+ // Set loop
+ void set_loop( Node *n, IdealLoopTree *loop ) {
+ _nodes.map(n->_idx, (Node*)loop);
+ }
+ // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace
+ // the 'old_node' with 'new_node'. Kill old-node. Add a reference
+ // from old_node to new_node to support the lazy update. Reference
+ // replaces loop reference, since that is not neede for dead node.
+public:
+ void lazy_update( Node *old_node, Node *new_node ) {
+ assert( old_node != new_node, "no cycles please" );
+ //old_node->set_req( 1, new_node /*NO DU INFO*/ );
+ // Nodes always have DU info now, so re-use the side array slot
+ // for this node to provide the forwarding pointer.
+ _nodes.map( old_node->_idx, (Node*)((intptr_t)new_node + 1) );
+ }
+ void lazy_replace( Node *old_node, Node *new_node ) {
+ _igvn.hash_delete(old_node);
+ _igvn.subsume_node( old_node, new_node );
+ lazy_update( old_node, new_node );
+ }
+ void lazy_replace_proj( Node *old_node, Node *new_node ) {
+ assert( old_node->req() == 1, "use this for Projs" );
+ _igvn.hash_delete(old_node); // Must hash-delete before hacking edges
+ old_node->add_req( NULL );
+ lazy_replace( old_node, new_node );
+ }
+
+private:
+
+ // Place 'n' in some loop nest, where 'n' is a CFG node
+ void build_loop_tree();
+ int build_loop_tree_impl( Node *n, int pre_order );
+ // Insert loop into the existing loop tree. 'innermost' is a leaf of the
+ // loop tree, not the root.
+ IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );
+
+ // Place Data nodes in some loop nest
+ void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
+ void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
+ void build_loop_late_post ( Node* n, const PhaseIdealLoop *verify_me );
+
+ // Array of immediate dominance info for each CFG node indexed by node idx
+private:
+ uint _idom_size;
+ Node **_idom; // Array of immediate dominators
+ uint *_dom_depth; // Used for fast LCA test
+ GrowableArray<uint>* _dom_stk; // For recomputation of dom depth
+
+ Node* idom_no_update(Node* d) const {
+ assert(d->_idx < _idom_size, "oob");
+ Node* n = _idom[d->_idx];
+ assert(n != NULL,"Bad immediate dominator info.");
+ while (n->in(0) == NULL) { // Skip dead CFG nodes
+ //n = n->in(1);
+ n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
+ assert(n != NULL,"Bad immediate dominator info.");
+ }
+ return n;
+ }
+ Node *idom(Node* d) const {
+ uint didx = d->_idx;
+ Node *n = idom_no_update(d);
+ _idom[didx] = n; // Lazily remove dead CFG nodes from table.
+ return n;
+ }
+ uint dom_depth(Node* d) const {
+ assert(d->_idx < _idom_size, "");
+ return _dom_depth[d->_idx];
+ }
+ void set_idom(Node* d, Node* n, uint dom_depth);
+ // Locally compute IDOM using dom_lca call
+ Node *compute_idom( Node *region ) const;
+ // Recompute dom_depth
+ void recompute_dom_depth();
+
+ // Is safept not required by an outer loop?
+ bool is_deleteable_safept(Node* sfpt);
+
+public:
+ // Dominators for the sea of nodes
+ void Dominators();
+ Node *dom_lca( Node *n1, Node *n2 ) const {
+ return find_non_split_ctrl(dom_lca_internal(n1, n2));
+ }
+ Node *dom_lca_internal( Node *n1, Node *n2 ) const;
+
+ // Compute the Ideal Node to Loop mapping
+ PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs );
+
+ // True if the method has at least 1 irreducible loop
+ bool _has_irreducible_loops;
+
+ // Per-Node transform
+ virtual Node *transform( Node *a_node ) { return 0; }
+
+ Node *is_counted_loop( Node *x, IdealLoopTree *loop );
+
+ // Return a post-walked LoopNode
+ IdealLoopTree *get_loop( Node *n ) const {
+ // Dead nodes have no loop, so return the top level loop instead
+ if (!has_node(n)) return _ltree_root;
+ assert(!has_ctrl(n), "");
+ return (IdealLoopTree*)_nodes[n->_idx];
+ }
+
+ // Is 'n' a (nested) member of 'loop'?
+ int is_member( const IdealLoopTree *loop, Node *n ) const {
+ return loop->is_member(get_loop(n)); }
+
+ // This is the basic building block of the loop optimizations. It clones an
+ // entire loop body. It makes an old_new loop body mapping; with this
+ // mapping you can find the new-loop equivalent to an old-loop node. All
+ // new-loop nodes are exactly equal to their old-loop counterparts, all
+ // edges are the same. All exits from the old-loop now have a RegionNode
+ // that merges the equivalent new-loop path. This is true even for the
+ // normal "loop-exit" condition. All uses of loop-invariant old-loop values
+ // now come from (one or more) Phis that merge their new-loop equivalents.
+ // Parameter side_by_side_idom:
+ // When side_by_size_idom is NULL, the dominator tree is constructed for
+ // the clone loop to dominate the original. Used in construction of
+ // pre-main-post loop sequence.
+ // When nonnull, the clone and original are side-by-side, both are
+ // dominated by the passed in side_by_side_idom node. Used in
+ // construction of unswitched loops.
+ void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
+ Node* side_by_side_idom = NULL);
+
+ // If we got the effect of peeling, either by actually peeling or by
+ // making a pre-loop which must execute at least once, we can remove
+ // all loop-invariant dominated tests in the main body.
+ void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
+
+ // Generate code to do a loop peel for the given loop (and body).
+ // old_new is a temp array.
+ void do_peeling( IdealLoopTree *loop, Node_List &old_new );
+
+ // Add pre and post loops around the given loop. These loops are used
+ // during RCE, unrolling and aligning loops.
+ void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
+ // If Node n lives in the back_ctrl block, we clone a private version of n
+ // in preheader_ctrl block and return that, otherwise return n.
+ Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n );
+
+ // Take steps to maximally unroll the loop. Peel any odd iterations, then
+ // unroll to do double iterations. The next round of major loop transforms
+ // will repeat till the doubled loop body does all remaining iterations in 1
+ // pass.
+ void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
+
+ // Unroll the loop body one step - make each trip do 2 iterations.
+ void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
+
+ // Return true if exp is a constant times an induction var
+ bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
+
+ // Return true if exp is a scaled induction var plus (or minus) constant
+ bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0);
+
+ // Eliminate range-checks and other trip-counter vs loop-invariant tests.
+ void do_range_check( IdealLoopTree *loop, Node_List &old_new );
+
+ // Create a slow version of the loop by cloning the loop
+ // and inserting an if to select fast-slow versions.
+ ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
+ Node_List &old_new);
+
+ // Clone loop with an invariant test (that does not exit) and
+ // insert a clone of the test that selects which version to
+ // execute.
+ void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
+
+ // Find candidate "if" for unswitching
+ IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
+
+ // Range Check Elimination uses this function!
+ // Constrain the main loop iterations so the affine function:
+ // scale_con * I + offset < limit
+ // always holds true. That is, either increase the number of iterations in
+ // the pre-loop or the post-loop until the condition holds true in the main
+ // loop. Scale_con, offset and limit are all loop invariant.
+ void add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit );
+
+ // Partially peel loop up through last_peel node.
+ bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
+
+ // Create a scheduled list of nodes control dependent on ctrl set.
+ void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
+ // Has a use in the vector set
+ bool has_use_in_set( Node* n, VectorSet& vset );
+ // Has use internal to the vector set (ie. not in a phi at the loop head)
+ bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop );
+ // clone "n" for uses that are outside of loop
+ void clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist );
+ // clone "n" for special uses that are in the not_peeled region
+ void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
+ VectorSet& not_peel, Node_List& sink_list, Node_List& worklist );
+ // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
+ void insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp );
+#ifdef ASSERT
+ // Validate the loop partition sets: peel and not_peel
+ bool is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, VectorSet& not_peel );
+ // Ensure that uses outside of loop are of the right form
+ bool is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
+ uint orig_exit_idx, uint clone_exit_idx);
+ bool is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx);
+#endif
+
+ // Returns nonzero constant stride if-node is a possible iv test (otherwise returns zero.)
+ int stride_of_possible_iv( Node* iff );
+ bool is_possible_iv_test( Node* iff ) { return stride_of_possible_iv(iff) != 0; }
+ // Return the (unique) control output node that's in the loop (if it exists.)
+ Node* stay_in_loop( Node* n, IdealLoopTree *loop);
+ // Insert a signed compare loop exit cloned from an unsigned compare.
+ IfNode* insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop);
+ void remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop);
+ // Utility to register node "n" with PhaseIdealLoop
+ void register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth);
+ // Utility to create an if-projection
+ ProjNode* proj_clone(ProjNode* p, IfNode* iff);
+ // Force the iff control output to be the live_proj
+ Node* short_circuit_if(IfNode* iff, ProjNode* live_proj);
+ // Insert a region before an if projection
+ RegionNode* insert_region_before_proj(ProjNode* proj);
+ // Insert a new if before an if projection
+ ProjNode* insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj);
+
+ // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
+ // "Nearly" because all Nodes have been cloned from the original in the loop,
+ // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
+ // through the Phi recursively, and return a Bool.
+ BoolNode *clone_iff( PhiNode *phi, IdealLoopTree *loop );
+ CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop );
+
+
+ // Rework addressing expressions to get the most loop-invariant stuff
+ // moved out. We'd like to do all associative operators, but it's especially
+ // important (common) to do address expressions.
+ Node *remix_address_expressions( Node *n );
+
+ // Attempt to use a conditional move instead of a phi/branch
+ Node *conditional_move( Node *n );
+
+ // Reorganize offset computations to lower register pressure.
+ // Mostly prevent loop-fallout uses of the pre-incremented trip counter
+ // (which are then alive with the post-incremented trip counter
+ // forcing an extra register move)
+ void reorg_offsets( IdealLoopTree *loop );
+
+ // Check for aggressive application of 'split-if' optimization,
+ // using basic block level info.
+ void split_if_with_blocks ( VectorSet &visited, Node_Stack &nstack );
+ Node *split_if_with_blocks_pre ( Node *n );
+ void split_if_with_blocks_post( Node *n );
+ Node *has_local_phi_input( Node *n );
+ // Mark an IfNode as being dominated by a prior test,
+ // without actually altering the CFG (and hence IDOM info).
+ void dominated_by( Node *prevdom, Node *iff );
+
+ // Split Node 'n' through merge point
+ Node *split_thru_region( Node *n, Node *region );
+ // Split Node 'n' through merge point if there is enough win.
+ Node *split_thru_phi( Node *n, Node *region, int policy );
+ // Found an If getting its condition-code input from a Phi in the
+ // same block. Split thru the Region.
+ void do_split_if( Node *iff );
+
+private:
+ // Return a type based on condition control flow
+ const TypeInt* filtered_type( Node *n, Node* n_ctrl);
+ const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); }
+ // Helpers for filtered type
+ const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
+ const TypeInt* filtered_type_at_if( Node* val, Node *if_proj);
+
+ // Helper functions
+ void register_new_node( Node *n, Node *blk );
+ Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
+ Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
+ void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
+ bool split_up( Node *n, Node *blk1, Node *blk2 );
+ void sink_use( Node *use, Node *post_loop );
+ Node *place_near_use( Node *useblock ) const;
+
+ bool _created_loop_node;
+public:
+ void set_created_loop_node() { _created_loop_node = true; }
+ bool created_loop_node() { return _created_loop_node; }
+
+#ifndef PRODUCT
+ void dump( ) const;
+ void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
+ void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;
+ void verify() const; // Major slow :-)
+ void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const;
+ IdealLoopTree *get_loop_idx(Node* n) const {
+ // Dead nodes have no loop, so return the top level loop instead
+ return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root;
+ }
+ // Print some stats
+ static void print_statistics();
+ static int _loop_invokes; // Count of PhaseIdealLoop invokes
+ static int _loop_work; // Sum of PhaseIdealLoop x _unique
+#endif
+};
+
+inline Node* IdealLoopTree::tail() {
+// Handle lazy update of _tail field
+ Node *n = _tail;
+ //while( !n->in(0) ) // Skip dead CFG nodes
+ //n = n->in(1);
+ if (n->in(0) == NULL)
+ n = _phase->get_ctrl(n);
+ _tail = n;
+ return n;
+}
+
+
+// Iterate over the loop tree using a preorder, left-to-right traversal.
+//
+// Example that visits all counted loops from within PhaseIdealLoop
+//
+// for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+// IdealLoopTree* lpt = iter.current();
+// if (!lpt->is_counted()) continue;
+// ...
+class LoopTreeIterator : public StackObj {
+private:
+ IdealLoopTree* _root;
+ IdealLoopTree* _curnt;
+
+public:
+ LoopTreeIterator(IdealLoopTree* root) : _root(root), _curnt(root) {}
+
+ bool done() { return _curnt == NULL; } // Finished iterating?
+
+ void next(); // Advance to next loop tree
+
+ IdealLoopTree* current() { return _curnt; } // Return current value of iterator.
+};
diff --git a/src/share/vm/opto/loopopts.cpp b/src/share/vm/opto/loopopts.cpp
new file mode 100644
index 000000000..0da6b1eee
--- /dev/null
+++ b/src/share/vm/opto/loopopts.cpp
@@ -0,0 +1,2677 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopopts.cpp.incl"
+
+//=============================================================================
+//------------------------------split_thru_phi---------------------------------
+// Split Node 'n' through merge point if there is enough win.
+Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
+ int wins = 0;
+ assert( !n->is_CFG(), "" );
+ assert( region->is_Region(), "" );
+ Node *phi = new (C, region->req()) PhiNode( region, n->bottom_type() );
+ uint old_unique = C->unique();
+ for( uint i = 1; i < region->req(); i++ ) {
+ Node *x;
+ Node* the_clone = NULL;
+ if( region->in(i) == C->top() ) {
+ x = C->top(); // Dead path? Use a dead data op
+ } else {
+ x = n->clone(); // Else clone up the data op
+ the_clone = x; // Remember for possible deletion.
+ // Alter data node to use pre-phi inputs
+ if( n->in(0) == region )
+ x->set_req( 0, region->in(i) );
+ for( uint j = 1; j < n->req(); j++ ) {
+ Node *in = n->in(j);
+ if( in->is_Phi() && in->in(0) == region )
+ x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
+ }
+ }
+ // Check for a 'win' on some paths
+ const Type *t = x->Value(&_igvn);
+
+ bool singleton = t->singleton();
+
+ // A TOP singleton indicates that there are no possible values incoming
+ // along a particular edge. In most cases, this is OK, and the Phi will
+ // be eliminated later in an Ideal call. However, we can't allow this to
+ // happen if the singleton occurs on loop entry, as the elimination of
+ // the PhiNode may cause the resulting node to migrate back to a previous
+ // loop iteration.
+ if( singleton && t == Type::TOP ) {
+ // Is_Loop() == false does not confirm the absence of a loop (e.g., an
+ // irreducible loop may not be indicated by an affirmative is_Loop());
+ // therefore, the only top we can split thru a phi is on a backedge of
+ // a loop.
+ singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
+ }
+
+ if( singleton ) {
+ wins++;
+ x = ((PhaseGVN&)_igvn).makecon(t);
+ } else {
+ // We now call Identity to try to simplify the cloned node.
+ // Note that some Identity methods call phase->type(this).
+ // Make sure that the type array is big enough for
+ // our new node, even though we may throw the node away.
+ // (Note: This tweaking with igvn only works because x is a new node.)
+ _igvn.set_type(x, t);
+ Node *y = x->Identity(&_igvn);
+ if( y != x ) {
+ wins++;
+ x = y;
+ } else {
+ y = _igvn.hash_find(x);
+ if( y ) {
+ wins++;
+ x = y;
+ } else {
+ // Else x is a new node we are keeping
+ // We do not need register_new_node_with_optimizer
+ // because set_type has already been called.
+ _igvn._worklist.push(x);
+ }
+ }
+ }
+ if (x != the_clone && the_clone != NULL)
+ _igvn.remove_dead_node(the_clone);
+ phi->set_req( i, x );
+ }
+ // Too few wins?
+ if( wins <= policy ) {
+ _igvn.remove_dead_node(phi);
+ return NULL;
+ }
+
+ // Record Phi
+ register_new_node( phi, region );
+
+ for( uint i2 = 1; i2 < phi->req(); i2++ ) {
+ Node *x = phi->in(i2);
+ // If we commoned up the cloned 'x' with another existing Node,
+ // the existing Node picks up a new use. We need to make the
+ // existing Node occur higher up so it dominates its uses.
+ Node *old_ctrl;
+ IdealLoopTree *old_loop;
+
+ // The occasional new node
+ if( x->_idx >= old_unique ) { // Found a new, unplaced node?
+ old_ctrl = x->is_Con() ? C->root() : NULL;
+ old_loop = NULL; // Not in any prior loop
+ } else {
+ old_ctrl = x->is_Con() ? C->root() : get_ctrl(x);
+ old_loop = get_loop(old_ctrl); // Get prior loop
+ }
+ // New late point must dominate new use
+ Node *new_ctrl = dom_lca( old_ctrl, region->in(i2) );
+ // Set new location
+ set_ctrl(x, new_ctrl);
+ IdealLoopTree *new_loop = get_loop( new_ctrl );
+ // If changing loop bodies, see if we need to collect into new body
+ if( old_loop != new_loop ) {
+ if( old_loop && !old_loop->_child )
+ old_loop->_body.yank(x);
+ if( !new_loop->_child )
+ new_loop->_body.push(x); // Collect body info
+ }
+ }
+
+ return phi;
+}
+
+//------------------------------dominated_by------------------------------------
+// Replace the dominated test with an obvious true or false. Place it on the
+// IGVN worklist for later cleanup. Move control-dependent data Nodes on the
+// live path up to the dominating control.
+void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff ) {
+#ifndef PRODUCT
+ if( VerifyLoopOptimizations && PrintOpto ) tty->print_cr("dominating test");
+#endif
+
+
+ // prevdom is the dominating projection of the dominating test.
+ assert( iff->is_If(), "" );
+ assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+ int pop = prevdom->Opcode();
+ assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
+ // 'con' is set to true or false to kill the dominated test.
+ Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
+ set_ctrl(con, C->root()); // Constant gets a new use
+ // Hack the dominated test
+ _igvn.hash_delete(iff);
+ iff->set_req(1, con);
+ _igvn._worklist.push(iff);
+
+ // If I dont have a reachable TRUE and FALSE path following the IfNode then
+ // I can assume this path reaches an infinite loop. In this case it's not
+ // important to optimize the data Nodes - either the whole compilation will
+ // be tossed or this path (and all data Nodes) will go dead.
+ if( iff->outcnt() != 2 ) return;
+
+ // Make control-dependent data Nodes on the live path (path that will remain
+ // once the dominated IF is removed) become control-dependent on the
+ // dominating projection.
+ Node* dp = ((IfNode*)iff)->proj_out(pop == Op_IfTrue);
+ IdealLoopTree *old_loop = get_loop(dp);
+
+ for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
+ Node* cd = dp->fast_out(i); // Control-dependent node
+ if( cd->depends_only_on_test() ) {
+ assert( cd->in(0) == dp, "" );
+ _igvn.hash_delete( cd );
+ cd->set_req(0, prevdom);
+ set_early_ctrl( cd );
+ _igvn._worklist.push(cd);
+ IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
+ if( old_loop != new_loop ) {
+ if( !old_loop->_child ) old_loop->_body.yank(cd);
+ if( !new_loop->_child ) new_loop->_body.push(cd);
+ }
+ --i;
+ --imax;
+ }
+ }
+}
+
+//------------------------------has_local_phi_input----------------------------
+// Return TRUE if 'n' has Phi inputs from its local block and no other
+// block-local inputs (all non-local-phi inputs come from earlier blocks)
+Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
+ Node *n_ctrl = get_ctrl(n);
+ // See if some inputs come from a Phi in this block, or from before
+ // this block.
+ uint i;
+ for( i = 1; i < n->req(); i++ ) {
+ Node *phi = n->in(i);
+ if( phi->is_Phi() && phi->in(0) == n_ctrl )
+ break;
+ }
+ if( i >= n->req() )
+ return NULL; // No Phi inputs; nowhere to clone thru
+
+ // Check for inputs created between 'n' and the Phi input. These
+ // must split as well; they have already been given the chance
+ // (courtesy of a post-order visit) and since they did not we must
+ // recover the 'cost' of splitting them by being very profitable
+ // when splitting 'n'. Since this is unlikely we simply give up.
+ for( i = 1; i < n->req(); i++ ) {
+ Node *m = n->in(i);
+ if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
+ // We allow the special case of AddP's with no local inputs.
+ // This allows us to split-up address expressions.
+ if (m->is_AddP() &&
+ get_ctrl(m->in(2)) != n_ctrl &&
+ get_ctrl(m->in(3)) != n_ctrl) {
+ // Move the AddP up to dominating point
+ set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
+ continue;
+ }
+ return NULL;
+ }
+ }
+
+ return n_ctrl;
+}
+
+//------------------------------remix_address_expressions----------------------
+// Rework addressing expressions to get the most loop-invariant stuff
+// moved out. We'd like to do all associative operators, but it's especially
+// important (common) to do address expressions.
+Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
+ if (!has_ctrl(n)) return NULL;
+ Node *n_ctrl = get_ctrl(n);
+ IdealLoopTree *n_loop = get_loop(n_ctrl);
+
+ // See if 'n' mixes loop-varying and loop-invariant inputs and
+ // itself is loop-varying.
+
+ // Only interested in binary ops (and AddP)
+ if( n->req() < 3 || n->req() > 4 ) return NULL;
+
+ Node *n1_ctrl = get_ctrl(n->in( 1));
+ Node *n2_ctrl = get_ctrl(n->in( 2));
+ Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
+ IdealLoopTree *n1_loop = get_loop( n1_ctrl );
+ IdealLoopTree *n2_loop = get_loop( n2_ctrl );
+ IdealLoopTree *n3_loop = get_loop( n3_ctrl );
+
+ // Does one of my inputs spin in a tighter loop than self?
+ if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
+ (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
+ (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
+ return NULL; // Leave well enough alone
+
+ // Is at least one of my inputs loop-invariant?
+ if( n1_loop == n_loop &&
+ n2_loop == n_loop &&
+ n3_loop == n_loop )
+ return NULL; // No loop-invariant inputs
+
+
+ int n_op = n->Opcode();
+
+ // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
+ if( n_op == Op_LShiftI ) {
+ // Scale is loop invariant
+ Node *scale = n->in(2);
+ Node *scale_ctrl = get_ctrl(scale);
+ IdealLoopTree *scale_loop = get_loop(scale_ctrl );
+ if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
+ return NULL;
+ const TypeInt *scale_t = scale->bottom_type()->isa_int();
+ if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
+ return NULL; // Dont bother with byte/short masking
+ // Add must vary with loop (else shift would be loop-invariant)
+ Node *add = n->in(1);
+ Node *add_ctrl = get_ctrl(add);
+ IdealLoopTree *add_loop = get_loop(add_ctrl);
+ //assert( n_loop == add_loop, "" );
+ if( n_loop != add_loop ) return NULL; // happens w/ evil ZKM loops
+
+ // Convert I-V into I+ (0-V); same for V-I
+ if( add->Opcode() == Op_SubI &&
+ _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
+ Node *zero = _igvn.intcon(0);
+ set_ctrl(zero, C->root());
+ Node *neg = new (C, 3) SubINode( _igvn.intcon(0), add->in(2) );
+ register_new_node( neg, get_ctrl(add->in(2) ) );
+ add = new (C, 3) AddINode( add->in(1), neg );
+ register_new_node( add, add_ctrl );
+ }
+ if( add->Opcode() != Op_AddI ) return NULL;
+ // See if one add input is loop invariant
+ Node *add_var = add->in(1);
+ Node *add_var_ctrl = get_ctrl(add_var);
+ IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
+ Node *add_invar = add->in(2);
+ Node *add_invar_ctrl = get_ctrl(add_invar);
+ IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
+ if( add_var_loop == n_loop ) {
+ } else if( add_invar_loop == n_loop ) {
+ // Swap to find the invariant part
+ add_invar = add_var;
+ add_invar_ctrl = add_var_ctrl;
+ add_invar_loop = add_var_loop;
+ add_var = add->in(2);
+ Node *add_var_ctrl = get_ctrl(add_var);
+ IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
+ } else // Else neither input is loop invariant
+ return NULL;
+ if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
+ return NULL; // No invariant part of the add?
+
+ // Yes! Reshape address expression!
+ Node *inv_scale = new (C, 3) LShiftINode( add_invar, scale );
+ register_new_node( inv_scale, add_invar_ctrl );
+ Node *var_scale = new (C, 3) LShiftINode( add_var, scale );
+ register_new_node( var_scale, n_ctrl );
+ Node *var_add = new (C, 3) AddINode( var_scale, inv_scale );
+ register_new_node( var_add, n_ctrl );
+ _igvn.hash_delete( n );
+ _igvn.subsume_node( n, var_add );
+ return var_add;
+ }
+
+ // Replace (I+V) with (V+I)
+ if( n_op == Op_AddI ||
+ n_op == Op_AddL ||
+ n_op == Op_AddF ||
+ n_op == Op_AddD ||
+ n_op == Op_MulI ||
+ n_op == Op_MulL ||
+ n_op == Op_MulF ||
+ n_op == Op_MulD ) {
+ if( n2_loop == n_loop ) {
+ assert( n1_loop != n_loop, "" );
+ n->swap_edges(1, 2);
+ }
+ }
+
+ // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
+ // but not if I2 is a constant.
+ if( n_op == Op_AddP ) {
+ if( n2_loop == n_loop && n3_loop != n_loop ) {
+ if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
+ Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
+ Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
+ IdealLoopTree *n22loop = get_loop( n22_ctrl );
+ IdealLoopTree *n23_loop = get_loop( n23_ctrl );
+ if( n22loop != n_loop && n22loop->is_member(n_loop) &&
+ n23_loop == n_loop ) {
+ Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
+ // Stuff new AddP in the loop preheader
+ register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
+ Node *add2 = new (C, 4) AddPNode( n->in(1), add1, n->in(2)->in(3) );
+ register_new_node( add2, n_ctrl );
+ _igvn.hash_delete( n );
+ _igvn.subsume_node( n, add2 );
+ return add2;
+ }
+ }
+ }
+
+ // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
+ if( n2_loop != n_loop && n3_loop == n_loop ) {
+ if( n->in(3)->Opcode() == Op_AddI ) {
+ Node *V = n->in(3)->in(1);
+ Node *I = n->in(3)->in(2);
+ if( is_member(n_loop,get_ctrl(V)) ) {
+ } else {
+ Node *tmp = V; V = I; I = tmp;
+ }
+ if( !is_member(n_loop,get_ctrl(I)) ) {
+ Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2), I );
+ // Stuff new AddP in the loop preheader
+ register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
+ Node *add2 = new (C, 4) AddPNode( n->in(1), add1, V );
+ register_new_node( add2, n_ctrl );
+ _igvn.hash_delete( n );
+ _igvn.subsume_node( n, add2 );
+ return add2;
+ }
+ }
+ }
+ }
+
+ return NULL;
+}
+
+//------------------------------conditional_move-------------------------------
+// Attempt to replace a Phi with a conditional move. We have some pretty
+// strict profitability requirements. All Phis at the merge point must
+// be converted, so we can remove the control flow. We need to limit the
+// number of c-moves to a small handful. All code that was in the side-arms
+// of the CFG diamond is now speculatively executed. This code has to be
+// "cheap enough". We are pretty much limited to CFG diamonds that merge
+// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
+Node *PhaseIdealLoop::conditional_move( Node *region ) {
+
+ assert( region->is_Region(), "sanity check" );
+ if( region->req() != 3 ) return NULL;
+
+ // Check for CFG diamond
+ Node *lp = region->in(1);
+ Node *rp = region->in(2);
+ if( !lp || !rp ) return NULL;
+ Node *lp_c = lp->in(0);
+ if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
+ IfNode *iff = lp_c->as_If();
+
+ // Check for highly predictable branch. No point in CMOV'ing if
+ // we are going to predict accurately all the time.
+ // %%% This hides patterns produced by utility methods like Math.min.
+ if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
+ iff->_prob > PROB_LIKELY_MAG(3) )
+ return NULL;
+
+ // Check for ops pinned in an arm of the diamond.
+ // Can't remove the control flow in this case
+ if( lp->outcnt() > 1 ) return NULL;
+ if( rp->outcnt() > 1 ) return NULL;
+
+ // Check profitability
+ int cost = 0;
+ for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+ Node *out = region->fast_out(i);
+ if( !out->is_Phi() ) continue; // Ignore other control edges, etc
+ PhiNode* phi = out->as_Phi();
+ switch (phi->type()->basic_type()) {
+ case T_LONG:
+ cost++; // Probably encodes as 2 CMOV's
+ case T_INT: // These all CMOV fine
+ case T_FLOAT:
+ case T_DOUBLE:
+ case T_ADDRESS: // (RawPtr)
+ cost++;
+ break;
+ case T_OBJECT: { // Base oops are OK, but not derived oops
+ const TypeOopPtr *tp = phi->type()->isa_oopptr();
+ // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
+ // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus
+ // CMOVE'ing a derived pointer requires we also CMOVE the base. If we
+ // have a Phi for the base here that we convert to a CMOVE all is well
+ // and good. But if the base is dead, we'll not make a CMOVE. Later
+ // the allocator will have to produce a base by creating a CMOVE of the
+ // relevant bases. This puts the allocator in the business of
+ // manufacturing expensive instructions, generally a bad plan.
+ // Just Say No to Conditionally-Moved Derived Pointers.
+ if( tp && tp->offset() != 0 )
+ return NULL;
+ cost++;
+ break;
+ }
+ default:
+ return NULL; // In particular, can't do memory or I/O
+ }
+ // Add in cost any speculative ops
+ for( uint j = 1; j < region->req(); j++ ) {
+ Node *proj = region->in(j);
+ Node *inp = phi->in(j);
+ if (get_ctrl(inp) == proj) { // Found local op
+ cost++;
+ // Check for a chain of dependent ops; these will all become
+ // speculative in a CMOV.
+ for( uint k = 1; k < inp->req(); k++ )
+ if (get_ctrl(inp->in(k)) == proj)
+ return NULL; // Too much speculative goo
+ }
+ }
+ // See if the Phi is used by a Cmp. This will likely Split-If, a
+ // higher-payoff operation.
+ for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
+ Node* use = phi->fast_out(k);
+ if( use->is_Cmp() )
+ return NULL;
+ }
+ }
+ if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo
+
+ // --------------
+ // Now replace all Phis with CMOV's
+ Node *cmov_ctrl = iff->in(0);
+ uint flip = (lp->Opcode() == Op_IfTrue);
+ while( 1 ) {
+ PhiNode* phi = NULL;
+ for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+ Node *out = region->fast_out(i);
+ if (out->is_Phi()) {
+ phi = out->as_Phi();
+ break;
+ }
+ }
+ if (phi == NULL) break;
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
+#endif
+ // Move speculative ops
+ for( uint j = 1; j < region->req(); j++ ) {
+ Node *proj = region->in(j);
+ Node *inp = phi->in(j);
+ if (get_ctrl(inp) == proj) { // Found local op
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) {
+ tty->print(" speculate: ");
+ inp->dump();
+ }
+#endif
+ set_ctrl(inp, cmov_ctrl);
+ }
+ }
+ Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
+ register_new_node( cmov, cmov_ctrl );
+ _igvn.hash_delete(phi);
+ _igvn.subsume_node( phi, cmov );
+#ifndef PRODUCT
+ if( VerifyLoopOptimizations ) verify();
+#endif
+ }
+
+ // The useless CFG diamond will fold up later; see the optimization in
+ // RegionNode::Ideal.
+ _igvn._worklist.push(region);
+
+ return iff->in(1);
+}
+
+//------------------------------split_if_with_blocks_pre-----------------------
+// Do the real work in a non-recursive function. Data nodes want to be
+// cloned in the pre-order so they can feed each other nicely.
+Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
+ // Cloning these guys is unlikely to win
+ int n_op = n->Opcode();
+ if( n_op == Op_MergeMem ) return n;
+ if( n->is_Proj() ) return n;
+ // Do not clone-up CmpFXXX variations, as these are always
+ // followed by a CmpI
+ if( n->is_Cmp() ) return n;
+ // Attempt to use a conditional move instead of a phi/branch
+ if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
+ Node *cmov = conditional_move( n );
+ if( cmov ) return cmov;
+ }
+ if( n->is_CFG() || n_op == Op_StorePConditional || n_op == Op_StoreLConditional || n_op == Op_CompareAndSwapI || n_op == Op_CompareAndSwapL ||n_op == Op_CompareAndSwapP) return n;
+ if( n_op == Op_Opaque1 || // Opaque nodes cannot be mod'd
+ n_op == Op_Opaque2 ) {
+ if( !C->major_progress() ) // If chance of no more loop opts...
+ _igvn._worklist.push(n); // maybe we'll remove them
+ return n;
+ }
+
+ if( n->is_Con() ) return n; // No cloning for Con nodes
+
+ Node *n_ctrl = get_ctrl(n);
+ if( !n_ctrl ) return n; // Dead node
+
+ // Attempt to remix address expressions for loop invariants
+ Node *m = remix_address_expressions( n );
+ if( m ) return m;
+
+ // Determine if the Node has inputs from some local Phi.
+ // Returns the block to clone thru.
+ Node *n_blk = has_local_phi_input( n );
+ if( !n_blk ) return n;
+ // Do not clone the trip counter through on a CountedLoop
+ // (messes up the canonical shape).
+ if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
+
+ // Check for having no control input; not pinned. Allow
+ // dominating control.
+ if( n->in(0) ) {
+ Node *dom = idom(n_blk);
+ if( dom_lca( n->in(0), dom ) != n->in(0) )
+ return n;
+ }
+ // Policy: when is it profitable. You must get more wins than
+ // policy before it is considered profitable. Policy is usually 0,
+ // so 1 win is considered profitable. Big merges will require big
+ // cloning, so get a larger policy.
+ int policy = n_blk->req() >> 2;
+
+ // If the loop is a candidate for range check elimination,
+ // delay splitting through it's phi until a later loop optimization
+ if (n_blk->is_CountedLoop()) {
+ IdealLoopTree *lp = get_loop(n_blk);
+ if (lp && lp->_rce_candidate) {
+ return n;
+ }
+ }
+
+ // Use same limit as split_if_with_blocks_post
+ if( C->unique() > 35000 ) return n; // Method too big
+
+ // Split 'n' through the merge point if it is profitable
+ Node *phi = split_thru_phi( n, n_blk, policy );
+ if( !phi ) return n;
+
+ // Found a Phi to split thru!
+ // Replace 'n' with the new phi
+ _igvn.hash_delete(n);
+ _igvn.subsume_node( n, phi );
+ // Moved a load around the loop, 'en-registering' something.
+ if( n_blk->Opcode() == Op_Loop && n->is_Load() &&
+ !phi->in(LoopNode::LoopBackControl)->is_Load() )
+ C->set_major_progress();
+
+ return phi;
+}
+
+static bool merge_point_too_heavy(Compile* C, Node* region) {
+ // Bail out if the region and its phis have too many users.
+ int weight = 0;
+ for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+ weight += region->fast_out(i)->outcnt();
+ }
+ int nodes_left = MaxNodeLimit - C->unique();
+ if (weight * 8 > nodes_left) {
+#ifndef PRODUCT
+ if (PrintOpto)
+ tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight);
+#endif
+ return true;
+ } else {
+ return false;
+ }
+}
+
+#ifdef _LP64
+static bool merge_point_safe(Node* region) {
+ // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
+ // having a PhiNode input. This sidesteps the dangerous case where the split
+ // ConvI2LNode may become TOP if the input Value() does not
+ // overlap the ConvI2L range, leaving a node which may not dominate its
+ // uses.
+ // A better fix for this problem can be found in the BugTraq entry, but
+ // expediency for Mantis demands this hack.
+ for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+ Node* n = region->fast_out(i);
+ if (n->is_Phi()) {
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* m = n->fast_out(j);
+ if (m->Opcode() == Op_ConvI2L) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+#endif
+
+
+//------------------------------place_near_use---------------------------------
+// Place some computation next to use but not inside inner loops.
+// For inner loop uses move it to the preheader area.
+Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
+ IdealLoopTree *u_loop = get_loop( useblock );
+ return (u_loop->_irreducible || u_loop->_child)
+ ? useblock
+ : u_loop->_head->in(LoopNode::EntryControl);
+}
+
+
+//------------------------------split_if_with_blocks_post----------------------
+// Do the real work in a non-recursive function. CFG hackery wants to be
+// in the post-order, so it can dirty the I-DOM info and not use the dirtied
+// info.
+void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
+
+ // Cloning Cmp through Phi's involves the split-if transform.
+ // FastLock is not used by an If
+ if( n->is_Cmp() && !n->is_FastLock() ) {
+ if( C->unique() > 35000 ) return; // Method too big
+
+ // Do not do 'split-if' if irreducible loops are present.
+ if( _has_irreducible_loops )
+ return;
+
+ Node *n_ctrl = get_ctrl(n);
+ // Determine if the Node has inputs from some local Phi.
+ // Returns the block to clone thru.
+ Node *n_blk = has_local_phi_input( n );
+ if( n_blk != n_ctrl ) return;
+
+ if( merge_point_too_heavy(C, n_ctrl) )
+ return;
+
+ if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
+ Node *bol = n->unique_out();
+ assert( bol->is_Bool(), "expect a bool here" );
+ if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
+ Node *iff = bol->unique_out();
+
+ // Check some safety conditions
+ if( iff->is_If() ) { // Classic split-if?
+ if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
+ } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
+ if( get_ctrl(iff->in(2)) == n_ctrl ||
+ get_ctrl(iff->in(3)) == n_ctrl )
+ return; // Inputs not yet split-up
+ if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
+ return; // Loop-invar test gates loop-varying CMOVE
+ }
+ } else {
+ return; // some other kind of node, such as an Allocate
+ }
+
+ // Do not do 'split-if' if some paths are dead. First do dead code
+ // elimination and then see if its still profitable.
+ for( uint i = 1; i < n_ctrl->req(); i++ )
+ if( n_ctrl->in(i) == C->top() )
+ return;
+
+ // When is split-if profitable? Every 'win' on means some control flow
+ // goes dead, so it's almost always a win.
+ int policy = 0;
+ // If trying to do a 'Split-If' at the loop head, it is only
+ // profitable if the cmp folds up on BOTH paths. Otherwise we
+ // risk peeling a loop forever.
+
+ // CNC - Disabled for now. Requires careful handling of loop
+ // body selection for the cloned code. Also, make sure we check
+ // for any input path not being in the same loop as n_ctrl. For
+ // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
+ // because the alternative loop entry points won't be converted
+ // into LoopNodes.
+ IdealLoopTree *n_loop = get_loop(n_ctrl);
+ for( uint j = 1; j < n_ctrl->req(); j++ )
+ if( get_loop(n_ctrl->in(j)) != n_loop )
+ return;
+
+#ifdef _LP64
+ // Check for safety of the merge point.
+ if( !merge_point_safe(n_ctrl) ) {
+ return;
+ }
+#endif
+
+ // Split compare 'n' through the merge point if it is profitable
+ Node *phi = split_thru_phi( n, n_ctrl, policy );
+ if( !phi ) return;
+
+ // Found a Phi to split thru!
+ // Replace 'n' with the new phi
+ _igvn.hash_delete(n);
+ _igvn.subsume_node( n, phi );
+
+ // Now split the bool up thru the phi
+ Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
+ _igvn.hash_delete(bol);
+ _igvn.subsume_node( bol, bolphi );
+ assert( iff->in(1) == bolphi, "" );
+ if( bolphi->Value(&_igvn)->singleton() )
+ return;
+
+ // Conditional-move? Must split up now
+ if( !iff->is_If() ) {
+ Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
+ _igvn.hash_delete(iff);
+ _igvn.subsume_node( iff, cmovphi );
+ return;
+ }
+
+ // Now split the IF
+ do_split_if( iff );
+ return;
+ }
+
+ // Check for an IF ready to split; one that has its
+ // condition codes input coming from a Phi at the block start.
+ int n_op = n->Opcode();
+
+ // Check for an IF being dominated by another IF same test
+ if( n_op == Op_If ) {
+ Node *bol = n->in(1);
+ uint max = bol->outcnt();
+ // Check for same test used more than once?
+ if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
+ // Search up IDOMs to see if this IF is dominated.
+ Node *cutoff = get_ctrl(bol);
+
+ // Now search up IDOMs till cutoff, looking for a dominating test
+ Node *prevdom = n;
+ Node *dom = idom(prevdom);
+ while( dom != cutoff ) {
+ if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
+ // Replace the dominated test with an obvious true or false.
+ // Place it on the IGVN worklist for later cleanup.
+ C->set_major_progress();
+ dominated_by( prevdom, n );
+#ifndef PRODUCT
+ if( VerifyLoopOptimizations ) verify();
+#endif
+ return;
+ }
+ prevdom = dom;
+ dom = idom(prevdom);
+ }
+ }
+ }
+
+ // See if a shared loop-varying computation has no loop-varying uses.
+ // Happens if something is only used for JVM state in uncommon trap exits,
+ // like various versions of induction variable+offset. Clone the
+ // computation per usage to allow it to sink out of the loop.
+ if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
+ Node *n_ctrl = get_ctrl(n);
+ IdealLoopTree *n_loop = get_loop(n_ctrl);
+ if( n_loop != _ltree_root ) {
+ DUIterator_Fast imax, i = n->fast_outs(imax);
+ for (; i < imax; i++) {
+ Node* u = n->fast_out(i);
+ if( !has_ctrl(u) ) break; // Found control user
+ IdealLoopTree *u_loop = get_loop(get_ctrl(u));
+ if( u_loop == n_loop ) break; // Found loop-varying use
+ if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
+ if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
+ }
+ bool did_break = (i < imax); // Did we break out of the previous loop?
+ if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
+ Node *late_load_ctrl;
+ if (n->is_Load()) {
+ // If n is a load, get and save the result from get_late_ctrl(),
+ // to be later used in calculating the control for n's clones.
+ clear_dom_lca_tags();
+ late_load_ctrl = get_late_ctrl(n, n_ctrl);
+ }
+ // If n is a load, and the late control is the same as the current
+ // control, then the cloning of n is a pointless exercise, because
+ // GVN will ensure that we end up where we started.
+ if (!n->is_Load() || late_load_ctrl != n_ctrl) {
+ for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
+ Node *u = n->last_out(j); // Clone private computation per use
+ _igvn.hash_delete(u);
+ _igvn._worklist.push(u);
+ Node *x = n->clone(); // Clone computation
+ Node *x_ctrl = NULL;
+ if( u->is_Phi() ) {
+ // Replace all uses of normal nodes. Replace Phi uses
+ // individually, so the seperate Nodes can sink down
+ // different paths.
+ uint k = 1;
+ while( u->in(k) != n ) k++;
+ u->set_req( k, x );
+ // x goes next to Phi input path
+ x_ctrl = u->in(0)->in(k);
+ --j;
+ } else { // Normal use
+ // Replace all uses
+ for( uint k = 0; k < u->req(); k++ ) {
+ if( u->in(k) == n ) {
+ u->set_req( k, x );
+ --j;
+ }
+ }
+ x_ctrl = get_ctrl(u);
+ }
+
+ // Find control for 'x' next to use but not inside inner loops.
+ // For inner loop uses get the preheader area.
+ x_ctrl = place_near_use(x_ctrl);
+
+ if (n->is_Load()) {
+ // For loads, add a control edge to a CFG node outside of the loop
+ // to force them to not combine and return back inside the loop
+ // during GVN optimization (4641526).
+ //
+ // Because we are setting the actual control input, factor in
+ // the result from get_late_ctrl() so we respect any
+ // anti-dependences. (6233005).
+ x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
+
+ // Don't allow the control input to be a CFG splitting node.
+ // Such nodes should only have ProjNodes as outs, e.g. IfNode
+ // should only have IfTrueNode and IfFalseNode (4985384).
+ x_ctrl = find_non_split_ctrl(x_ctrl);
+ assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
+
+ x->set_req(0, x_ctrl);
+ }
+ register_new_node(x, x_ctrl);
+
+ // Some institutional knowledge is needed here: 'x' is
+ // yanked because if the optimizer runs GVN on it all the
+ // cloned x's will common up and undo this optimization and
+ // be forced back in the loop. This is annoying because it
+ // makes +VerifyOpto report false-positives on progress. I
+ // tried setting control edges on the x's to force them to
+ // not combine, but the matching gets worried when it tries
+ // to fold a StoreP and an AddP together (as part of an
+ // address expression) and the AddP and StoreP have
+ // different controls.
+ if( !x->is_Load() ) _igvn._worklist.yank(x);
+ }
+ _igvn.remove_dead_node(n);
+ }
+ }
+ }
+ }
+
+ // Check for Opaque2's who's loop has disappeared - who's input is in the
+ // same loop nest as their output. Remove 'em, they are no longer useful.
+ if( n_op == Op_Opaque2 &&
+ n->in(1) != NULL &&
+ get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
+ _igvn.add_users_to_worklist(n);
+ _igvn.hash_delete(n);
+ _igvn.subsume_node( n, n->in(1) );
+ }
+}
+
+//------------------------------split_if_with_blocks---------------------------
+// Check for aggressive application of 'split-if' optimization,
+// using basic block level info.
+void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
+ Node *n = C->root();
+ visited.set(n->_idx); // first, mark node as visited
+ // Do pre-visit work for root
+ n = split_if_with_blocks_pre( n );
+ uint cnt = n->outcnt();
+ uint i = 0;
+ while (true) {
+ // Visit all children
+ if (i < cnt) {
+ Node* use = n->raw_out(i);
+ ++i;
+ if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
+ // Now do pre-visit work for this use
+ use = split_if_with_blocks_pre( use );
+ nstack.push(n, i); // Save parent and next use's index.
+ n = use; // Process all children of current use.
+ cnt = use->outcnt();
+ i = 0;
+ }
+ }
+ else {
+ // All of n's children have been processed, complete post-processing.
+ if (cnt != 0 && !n->is_Con()) {
+ assert(has_node(n), "no dead nodes");
+ split_if_with_blocks_post( n );
+ }
+ if (nstack.is_empty()) {
+ // Finished all nodes on stack.
+ break;
+ }
+ // Get saved parent node and next use's index. Visit the rest of uses.
+ n = nstack.node();
+ cnt = n->outcnt();
+ i = nstack.index();
+ nstack.pop();
+ }
+ }
+}
+
+
+//=============================================================================
+//
+// C L O N E A L O O P B O D Y
+//
+
+//------------------------------clone_iff--------------------------------------
+// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
+// "Nearly" because all Nodes have been cloned from the original in the loop,
+// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
+// through the Phi recursively, and return a Bool.
+BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
+
+ // Convert this Phi into a Phi merging Bools
+ uint i;
+ for( i = 1; i < phi->req(); i++ ) {
+ Node *b = phi->in(i);
+ if( b->is_Phi() ) {
+ _igvn.hash_delete(phi);
+ _igvn._worklist.push(phi);
+ phi->set_req(i, clone_iff( b->as_Phi(), loop ));
+ } else {
+ assert( b->is_Bool(), "" );
+ }
+ }
+
+ Node *sample_bool = phi->in(1);
+ Node *sample_cmp = sample_bool->in(1);
+
+ // Make Phis to merge the Cmp's inputs.
+ int size = phi->in(0)->req();
+ PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+ PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+ for( i = 1; i < phi->req(); i++ ) {
+ Node *n1 = phi->in(i)->in(1)->in(1);
+ Node *n2 = phi->in(i)->in(1)->in(2);
+ phi1->set_req( i, n1 );
+ phi2->set_req( i, n2 );
+ phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
+ phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
+ }
+ // See if these Phis have been made before.
+ // Register with optimizer
+ Node *hit1 = _igvn.hash_find_insert(phi1);
+ if( hit1 ) { // Hit, toss just made Phi
+ _igvn.remove_dead_node(phi1); // Remove new phi
+ assert( hit1->is_Phi(), "" );
+ phi1 = (PhiNode*)hit1; // Use existing phi
+ } else { // Miss
+ _igvn.register_new_node_with_optimizer(phi1);
+ }
+ Node *hit2 = _igvn.hash_find_insert(phi2);
+ if( hit2 ) { // Hit, toss just made Phi
+ _igvn.remove_dead_node(phi2); // Remove new phi
+ assert( hit2->is_Phi(), "" );
+ phi2 = (PhiNode*)hit2; // Use existing phi
+ } else { // Miss
+ _igvn.register_new_node_with_optimizer(phi2);
+ }
+ // Register Phis with loop/block info
+ set_ctrl(phi1, phi->in(0));
+ set_ctrl(phi2, phi->in(0));
+ // Make a new Cmp
+ Node *cmp = sample_cmp->clone();
+ cmp->set_req( 1, phi1 );
+ cmp->set_req( 2, phi2 );
+ _igvn.register_new_node_with_optimizer(cmp);
+ set_ctrl(cmp, phi->in(0));
+
+ // Make a new Bool
+ Node *b = sample_bool->clone();
+ b->set_req(1,cmp);
+ _igvn.register_new_node_with_optimizer(b);
+ set_ctrl(b, phi->in(0));
+
+ assert( b->is_Bool(), "" );
+ return (BoolNode*)b;
+}
+
+//------------------------------clone_bool-------------------------------------
+// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
+// "Nearly" because all Nodes have been cloned from the original in the loop,
+// but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs
+// through the Phi recursively, and return a Bool.
+CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
+ uint i;
+ // Convert this Phi into a Phi merging Bools
+ for( i = 1; i < phi->req(); i++ ) {
+ Node *b = phi->in(i);
+ if( b->is_Phi() ) {
+ _igvn.hash_delete(phi);
+ _igvn._worklist.push(phi);
+ phi->set_req(i, clone_bool( b->as_Phi(), loop ));
+ } else {
+ assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
+ }
+ }
+
+ Node *sample_cmp = phi->in(1);
+
+ // Make Phis to merge the Cmp's inputs.
+ int size = phi->in(0)->req();
+ PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+ PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+ for( uint j = 1; j < phi->req(); j++ ) {
+ Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
+ Node *n1, *n2;
+ if( cmp_top->is_Cmp() ) {
+ n1 = cmp_top->in(1);
+ n2 = cmp_top->in(2);
+ } else {
+ n1 = n2 = cmp_top;
+ }
+ phi1->set_req( j, n1 );
+ phi2->set_req( j, n2 );
+ phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
+ phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
+ }
+
+ // See if these Phis have been made before.
+ // Register with optimizer
+ Node *hit1 = _igvn.hash_find_insert(phi1);
+ if( hit1 ) { // Hit, toss just made Phi
+ _igvn.remove_dead_node(phi1); // Remove new phi
+ assert( hit1->is_Phi(), "" );
+ phi1 = (PhiNode*)hit1; // Use existing phi
+ } else { // Miss
+ _igvn.register_new_node_with_optimizer(phi1);
+ }
+ Node *hit2 = _igvn.hash_find_insert(phi2);
+ if( hit2 ) { // Hit, toss just made Phi
+ _igvn.remove_dead_node(phi2); // Remove new phi
+ assert( hit2->is_Phi(), "" );
+ phi2 = (PhiNode*)hit2; // Use existing phi
+ } else { // Miss
+ _igvn.register_new_node_with_optimizer(phi2);
+ }
+ // Register Phis with loop/block info
+ set_ctrl(phi1, phi->in(0));
+ set_ctrl(phi2, phi->in(0));
+ // Make a new Cmp
+ Node *cmp = sample_cmp->clone();
+ cmp->set_req( 1, phi1 );
+ cmp->set_req( 2, phi2 );
+ _igvn.register_new_node_with_optimizer(cmp);
+ set_ctrl(cmp, phi->in(0));
+
+ assert( cmp->is_Cmp(), "" );
+ return (CmpNode*)cmp;
+}
+
+//------------------------------sink_use---------------------------------------
+// If 'use' was in the loop-exit block, it now needs to be sunk
+// below the post-loop merge point.
+void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
+ if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
+ set_ctrl(use, post_loop);
+ for (DUIterator j = use->outs(); use->has_out(j); j++)
+ sink_use(use->out(j), post_loop);
+ }
+}
+
+//------------------------------clone_loop-------------------------------------
+//
+// C L O N E A L O O P B O D Y
+//
+// This is the basic building block of the loop optimizations. It clones an
+// entire loop body. It makes an old_new loop body mapping; with this mapping
+// you can find the new-loop equivalent to an old-loop node. All new-loop
+// nodes are exactly equal to their old-loop counterparts, all edges are the
+// same. All exits from the old-loop now have a RegionNode that merges the
+// equivalent new-loop path. This is true even for the normal "loop-exit"
+// condition. All uses of loop-invariant old-loop values now come from (one
+// or more) Phis that merge their new-loop equivalents.
+//
+// This operation leaves the graph in an illegal state: there are two valid
+// control edges coming from the loop pre-header to both loop bodies. I'll
+// definitely have to hack the graph after running this transform.
+//
+// From this building block I will further edit edges to perform loop peeling
+// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
+//
+// Parameter side_by_size_idom:
+// When side_by_size_idom is NULL, the dominator tree is constructed for
+// the clone loop to dominate the original. Used in construction of
+// pre-main-post loop sequence.
+// When nonnull, the clone and original are side-by-side, both are
+// dominated by the side_by_side_idom node. Used in construction of
+// unswitched loops.
+void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
+ Node* side_by_side_idom) {
+
+ // Step 1: Clone the loop body. Make the old->new mapping.
+ uint i;
+ for( i = 0; i < loop->_body.size(); i++ ) {
+ Node *old = loop->_body.at(i);
+ Node *nnn = old->clone();
+ old_new.map( old->_idx, nnn );
+ _igvn.register_new_node_with_optimizer(nnn);
+ }
+
+
+ // Step 2: Fix the edges in the new body. If the old input is outside the
+ // loop use it. If the old input is INside the loop, use the corresponding
+ // new node instead.
+ for( i = 0; i < loop->_body.size(); i++ ) {
+ Node *old = loop->_body.at(i);
+ Node *nnn = old_new[old->_idx];
+ // Fix CFG/Loop controlling the new node
+ if (has_ctrl(old)) {
+ set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
+ } else {
+ set_loop(nnn, loop->_parent);
+ if (old->outcnt() > 0) {
+ set_idom( nnn, old_new[idom(old)->_idx], dd );
+ }
+ }
+ // Correct edges to the new node
+ for( uint j = 0; j < nnn->req(); j++ ) {
+ Node *n = nnn->in(j);
+ if( n ) {
+ IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
+ if( loop->is_member( old_in_loop ) )
+ nnn->set_req(j, old_new[n->_idx]);
+ }
+ }
+ _igvn.hash_find_insert(nnn);
+ }
+ Node *newhead = old_new[loop->_head->_idx];
+ set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
+
+
+ // Step 3: Now fix control uses. Loop varying control uses have already
+ // been fixed up (as part of all input edges in Step 2). Loop invariant
+ // control uses must be either an IfFalse or an IfTrue. Make a merge
+ // point to merge the old and new IfFalse/IfTrue nodes; make the use
+ // refer to this.
+ ResourceArea *area = Thread::current()->resource_area();
+ Node_List worklist(area);
+ uint new_counter = C->unique();
+ for( i = 0; i < loop->_body.size(); i++ ) {
+ Node* old = loop->_body.at(i);
+ if( !old->is_CFG() ) continue;
+ Node* nnn = old_new[old->_idx];
+
+ // Copy uses to a worklist, so I can munge the def-use info
+ // with impunity.
+ for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
+ worklist.push(old->fast_out(j));
+
+ while( worklist.size() ) { // Visit all uses
+ Node *use = worklist.pop();
+ if (!has_node(use)) continue; // Ignore dead nodes
+ IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
+ if( !loop->is_member( use_loop ) && use->is_CFG() ) {
+ // Both OLD and USE are CFG nodes here.
+ assert( use->is_Proj(), "" );
+
+ // Clone the loop exit control projection
+ Node *newuse = use->clone();
+ newuse->set_req(0,nnn);
+ _igvn.register_new_node_with_optimizer(newuse);
+ set_loop(newuse, use_loop);
+ set_idom(newuse, nnn, dom_depth(nnn) + 1 );
+
+ // We need a Region to merge the exit from the peeled body and the
+ // exit from the old loop body.
+ RegionNode *r = new (C, 3) RegionNode(3);
+ // Map the old use to the new merge point
+ old_new.map( use->_idx, r );
+ uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
+ assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
+
+ // The original user of 'use' uses 'r' instead.
+ for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
+ Node* useuse = use->last_out(l);
+ _igvn.hash_delete(useuse);
+ _igvn._worklist.push(useuse);
+ uint uses_found = 0;
+ if( useuse->in(0) == use ) {
+ useuse->set_req(0, r);
+ uses_found++;
+ if( useuse->is_CFG() ) {
+ assert( dom_depth(useuse) > dd_r, "" );
+ set_idom(useuse, r, dom_depth(useuse));
+ }
+ }
+ for( uint k = 1; k < useuse->req(); k++ ) {
+ if( useuse->in(k) == use ) {
+ useuse->set_req(k, r);
+ uses_found++;
+ }
+ }
+ l -= uses_found; // we deleted 1 or more copies of this edge
+ }
+
+ // Now finish up 'r'
+ r->set_req( 1, newuse );
+ r->set_req( 2, use );
+ _igvn.register_new_node_with_optimizer(r);
+ set_loop(r, use_loop);
+ set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
+ } // End of if a loop-exit test
+ }
+ }
+
+ // Step 4: If loop-invariant use is not control, it must be dominated by a
+ // loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region
+ // there if needed. Make a Phi there merging old and new used values.
+ Node_List *split_if_set = NULL;
+ Node_List *split_bool_set = NULL;
+ Node_List *split_cex_set = NULL;
+ for( i = 0; i < loop->_body.size(); i++ ) {
+ Node* old = loop->_body.at(i);
+ Node* nnn = old_new[old->_idx];
+ // Copy uses to a worklist, so I can munge the def-use info
+ // with impunity.
+ for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
+ worklist.push(old->fast_out(j));
+
+ while( worklist.size() ) {
+ Node *use = worklist.pop();
+ if (!has_node(use)) continue; // Ignore dead nodes
+ if (use->in(0) == C->top()) continue;
+ IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
+ // Check for data-use outside of loop - at least one of OLD or USE
+ // must not be a CFG node.
+ if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
+
+ // If the Data use is an IF, that means we have an IF outside of the
+ // loop that is switching on a condition that is set inside of the
+ // loop. Happens if people set a loop-exit flag; then test the flag
+ // in the loop to break the loop, then test is again outside of the
+ // loop to determine which way the loop exited.
+ if( use->is_If() || use->is_CMove() ) {
+ // Since this code is highly unlikely, we lazily build the worklist
+ // of such Nodes to go split.
+ if( !split_if_set )
+ split_if_set = new Node_List(area);
+ split_if_set->push(use);
+ }
+ if( use->is_Bool() ) {
+ if( !split_bool_set )
+ split_bool_set = new Node_List(area);
+ split_bool_set->push(use);
+ }
+ if( use->Opcode() == Op_CreateEx ) {
+ if( !split_cex_set )
+ split_cex_set = new Node_List(area);
+ split_cex_set->push(use);
+ }
+
+
+ // Get "block" use is in
+ uint idx = 0;
+ while( use->in(idx) != old ) idx++;
+ Node *prev = use->is_CFG() ? use : get_ctrl(use);
+ assert( !loop->is_member( get_loop( prev ) ), "" );
+ Node *cfg = prev->_idx >= new_counter
+ ? prev->in(2)
+ : idom(prev);
+ if( use->is_Phi() ) // Phi use is in prior block
+ cfg = prev->in(idx); // NOT in block of Phi itself
+ if (cfg->is_top()) { // Use is dead?
+ _igvn.hash_delete(use);
+ _igvn._worklist.push(use);
+ use->set_req(idx, C->top());
+ continue;
+ }
+
+ while( !loop->is_member( get_loop( cfg ) ) ) {
+ prev = cfg;
+ cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
+ }
+ // If the use occurs after merging several exits from the loop, then
+ // old value must have dominated all those exits. Since the same old
+ // value was used on all those exits we did not need a Phi at this
+ // merge point. NOW we do need a Phi here. Each loop exit value
+ // is now merged with the peeled body exit; each exit gets its own
+ // private Phi and those Phis need to be merged here.
+ Node *phi;
+ if( prev->is_Region() ) {
+ if( idx == 0 ) { // Updating control edge?
+ phi = prev; // Just use existing control
+ } else { // Else need a new Phi
+ phi = PhiNode::make( prev, old );
+ // Now recursively fix up the new uses of old!
+ for( uint i = 1; i < prev->req(); i++ ) {
+ worklist.push(phi); // Onto worklist once for each 'old' input
+ }
+ }
+ } else {
+ // Get new RegionNode merging old and new loop exits
+ prev = old_new[prev->_idx];
+ assert( prev, "just made this in step 7" );
+ if( idx == 0 ) { // Updating control edge?
+ phi = prev; // Just use existing control
+ } else { // Else need a new Phi
+ // Make a new Phi merging data values properly
+ phi = PhiNode::make( prev, old );
+ phi->set_req( 1, nnn );
+ }
+ }
+ // If inserting a new Phi, check for prior hits
+ if( idx != 0 ) {
+ Node *hit = _igvn.hash_find_insert(phi);
+ if( hit == NULL ) {
+ _igvn.register_new_node_with_optimizer(phi); // Register new phi
+ } else { // or
+ // Remove the new phi from the graph and use the hit
+ _igvn.remove_dead_node(phi);
+ phi = hit; // Use existing phi
+ }
+ set_ctrl(phi, prev);
+ }
+ // Make 'use' use the Phi instead of the old loop body exit value
+ _igvn.hash_delete(use);
+ _igvn._worklist.push(use);
+ use->set_req(idx, phi);
+ if( use->_idx >= new_counter ) { // If updating new phis
+ // Not needed for correctness, but prevents a weak assert
+ // in AddPNode from tripping (when we end up with different
+ // base & derived Phis that will become the same after
+ // IGVN does CSE).
+ Node *hit = _igvn.hash_find_insert(use);
+ if( hit ) // Go ahead and re-hash for hits.
+ _igvn.subsume_node( use, hit );
+ }
+
+ // If 'use' was in the loop-exit block, it now needs to be sunk
+ // below the post-loop merge point.
+ sink_use( use, prev );
+ }
+ }
+ }
+
+ // Check for IFs that need splitting/cloning. Happens if an IF outside of
+ // the loop uses a condition set in the loop. The original IF probably
+ // takes control from one or more OLD Regions (which in turn get from NEW
+ // Regions). In any case, there will be a set of Phis for each merge point
+ // from the IF up to where the original BOOL def exists the loop.
+ if( split_if_set ) {
+ while( split_if_set->size() ) {
+ Node *iff = split_if_set->pop();
+ if( iff->in(1)->is_Phi() ) {
+ BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
+ _igvn.hash_delete(iff);
+ _igvn._worklist.push(iff);
+ iff->set_req(1, b);
+ }
+ }
+ }
+ if( split_bool_set ) {
+ while( split_bool_set->size() ) {
+ Node *b = split_bool_set->pop();
+ Node *phi = b->in(1);
+ assert( phi->is_Phi(), "" );
+ CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
+ _igvn.hash_delete(b);
+ _igvn._worklist.push(b);
+ b->set_req(1, cmp);
+ }
+ }
+ if( split_cex_set ) {
+ while( split_cex_set->size() ) {
+ Node *b = split_cex_set->pop();
+ assert( b->in(0)->is_Region(), "" );
+ assert( b->in(1)->is_Phi(), "" );
+ assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
+ split_up( b, b->in(0), NULL );
+ }
+ }
+
+}
+
+
+//---------------------- stride_of_possible_iv -------------------------------------
+// Looks for an iff/bool/comp with one operand of the compare
+// being a cycle involving an add and a phi,
+// with an optional truncation (left-shift followed by a right-shift)
+// of the add. Returns zero if not an iv.
+int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
+ Node* trunc1 = NULL;
+ Node* trunc2 = NULL;
+ const TypeInt* ttype = NULL;
+ if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
+ return 0;
+ }
+ BoolNode* bl = iff->in(1)->as_Bool();
+ Node* cmp = bl->in(1);
+ if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
+ return 0;
+ }
+ // Must have an invariant operand
+ if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
+ return 0;
+ }
+ Node* add2 = NULL;
+ Node* cmp1 = cmp->in(1);
+ if (cmp1->is_Phi()) {
+ // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
+ Node* phi = cmp1;
+ for (uint i = 1; i < phi->req(); i++) {
+ Node* in = phi->in(i);
+ Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
+ &trunc1, &trunc2, &ttype);
+ if (add && add->in(1) == phi) {
+ add2 = add->in(2);
+ break;
+ }
+ }
+ } else {
+ // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
+ Node* addtrunc = cmp1;
+ Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
+ &trunc1, &trunc2, &ttype);
+ if (add && add->in(1)->is_Phi()) {
+ Node* phi = add->in(1);
+ for (uint i = 1; i < phi->req(); i++) {
+ if (phi->in(i) == addtrunc) {
+ add2 = add->in(2);
+ break;
+ }
+ }
+ }
+ }
+ if (add2 != NULL) {
+ const TypeInt* add2t = _igvn.type(add2)->is_int();
+ if (add2t->is_con()) {
+ return add2t->get_con();
+ }
+ }
+ return 0;
+}
+
+
+//---------------------- stay_in_loop -------------------------------------
+// Return the (unique) control output node that's in the loop (if it exists.)
+Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
+ Node* unique = NULL;
+ if (!n) return NULL;
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node* use = n->fast_out(i);
+ if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
+ if (unique != NULL) {
+ return NULL;
+ }
+ unique = use;
+ }
+ }
+ return unique;
+}
+
+//------------------------------ register_node -------------------------------------
+// Utility to register node "n" with PhaseIdealLoop
+void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
+ _igvn.register_new_node_with_optimizer(n);
+ loop->_body.push(n);
+ if (n->is_CFG()) {
+ set_loop(n, loop);
+ set_idom(n, pred, ddepth);
+ } else {
+ set_ctrl(n, pred);
+ }
+}
+
+//------------------------------ proj_clone -------------------------------------
+// Utility to create an if-projection
+ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
+ ProjNode* c = p->clone()->as_Proj();
+ c->set_req(0, iff);
+ return c;
+}
+
+//------------------------------ short_circuit_if -------------------------------------
+// Force the iff control output to be the live_proj
+Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
+ int proj_con = live_proj->_con;
+ assert(proj_con == 0 || proj_con == 1, "false or true projection");
+ Node *con = _igvn.intcon(proj_con);
+ set_ctrl(con, C->root());
+ if (iff) {
+ iff->set_req(1, con);
+ }
+ return con;
+}
+
+//------------------------------ insert_if_before_proj -------------------------------------
+// Insert a new if before an if projection (* - new node)
+//
+// before
+// if(test)
+// / \
+// v v
+// other-proj proj (arg)
+//
+// after
+// if(test)
+// / \
+// / v
+// | * proj-clone
+// v |
+// other-proj v
+// * new_if(relop(cmp[IU](left,right)))
+// / \
+// v v
+// * new-proj proj
+// (returned)
+//
+ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
+ IfNode* iff = proj->in(0)->as_If();
+ IdealLoopTree *loop = get_loop(proj);
+ ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
+ int ddepth = dom_depth(proj);
+
+ _igvn.hash_delete(iff);
+ _igvn._worklist.push(iff);
+ _igvn.hash_delete(proj);
+ _igvn._worklist.push(proj);
+
+ proj->set_req(0, NULL); // temporary disconnect
+ ProjNode* proj2 = proj_clone(proj, iff);
+ register_node(proj2, loop, iff, ddepth);
+
+ Node* cmp = Signed ? (Node*) new (C,3)CmpINode(left, right) : (Node*) new (C,3)CmpUNode(left, right);
+ register_node(cmp, loop, proj2, ddepth);
+
+ BoolNode* bol = new (C,2)BoolNode(cmp, relop);
+ register_node(bol, loop, proj2, ddepth);
+
+ IfNode* new_if = new (C,2)IfNode(proj2, bol, iff->_prob, iff->_fcnt);
+ register_node(new_if, loop, proj2, ddepth);
+
+ proj->set_req(0, new_if); // reattach
+ set_idom(proj, new_if, ddepth);
+
+ ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
+ register_node(new_exit, get_loop(other_proj), new_if, ddepth);
+
+ return new_exit;
+}
+
+//------------------------------ insert_region_before_proj -------------------------------------
+// Insert a region before an if projection (* - new node)
+//
+// before
+// if(test)
+// / |
+// v |
+// proj v
+// other-proj
+//
+// after
+// if(test)
+// / |
+// v |
+// * proj-clone v
+// | other-proj
+// v
+// * new-region
+// |
+// v
+// * dum_if
+// / \
+// v \
+// * dum-proj v
+// proj
+//
+RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
+ IfNode* iff = proj->in(0)->as_If();
+ IdealLoopTree *loop = get_loop(proj);
+ ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
+ int ddepth = dom_depth(proj);
+
+ _igvn.hash_delete(iff);
+ _igvn._worklist.push(iff);
+ _igvn.hash_delete(proj);
+ _igvn._worklist.push(proj);
+
+ proj->set_req(0, NULL); // temporary disconnect
+ ProjNode* proj2 = proj_clone(proj, iff);
+ register_node(proj2, loop, iff, ddepth);
+
+ RegionNode* reg = new (C,2)RegionNode(2);
+ reg->set_req(1, proj2);
+ register_node(reg, loop, iff, ddepth);
+
+ IfNode* dum_if = new (C,2)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
+ register_node(dum_if, loop, reg, ddepth);
+
+ proj->set_req(0, dum_if); // reattach
+ set_idom(proj, dum_if, ddepth);
+
+ ProjNode* dum_proj = proj_clone(other_proj, dum_if);
+ register_node(dum_proj, loop, dum_if, ddepth);
+
+ return reg;
+}
+
+//------------------------------ insert_cmpi_loop_exit -------------------------------------
+// Clone a signed compare loop exit from an unsigned compare and
+// insert it before the unsigned cmp on the stay-in-loop path.
+// All new nodes inserted in the dominator tree between the original
+// if and it's projections. The original if test is replaced with
+// a constant to force the stay-in-loop path.
+//
+// This is done to make sure that the original if and it's projections
+// still dominate the same set of control nodes, that the ctrl() relation
+// from data nodes to them is preserved, and that their loop nesting is
+// preserved.
+//
+// before
+// if(i <u limit) unsigned compare loop exit
+// / |
+// v v
+// exit-proj stay-in-loop-proj
+//
+// after
+// if(stay-in-loop-const) original if
+// / |
+// / v
+// / if(i < limit) new signed test
+// / / |
+// / / v
+// / / if(i <u limit) new cloned unsigned test
+// / / / |
+// v v v |
+// region |
+// | |
+// dum-if |
+// / | |
+// ether | |
+// v v
+// exit-proj stay-in-loop-proj
+//
+IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
+ const bool Signed = true;
+ const bool Unsigned = false;
+
+ BoolNode* bol = if_cmpu->in(1)->as_Bool();
+ if (bol->_test._test != BoolTest::lt) return NULL;
+ CmpNode* cmpu = bol->in(1)->as_Cmp();
+ if (cmpu->Opcode() != Op_CmpU) return NULL;
+ int stride = stride_of_possible_iv(if_cmpu);
+ if (stride == 0) return NULL;
+
+ ProjNode* lp_continue = stay_in_loop(if_cmpu, loop)->as_Proj();
+ ProjNode* lp_exit = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
+
+ Node* limit = NULL;
+ if (stride > 0) {
+ limit = cmpu->in(2);
+ } else {
+ limit = _igvn.makecon(TypeInt::ZERO);
+ set_ctrl(limit, C->root());
+ }
+ // Create a new region on the exit path
+ RegionNode* reg = insert_region_before_proj(lp_exit);
+
+ // Clone the if-cmpu-true-false using a signed compare
+ BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
+ ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
+ reg->add_req(cmpi_exit);
+
+ // Clone the if-cmpu-true-false
+ BoolTest::mask rel_u = bol->_test._test;
+ ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
+ reg->add_req(cmpu_exit);
+
+ // Force original if to stay in loop.
+ short_circuit_if(if_cmpu, lp_continue);
+
+ return cmpi_exit->in(0)->as_If();
+}
+
+//------------------------------ remove_cmpi_loop_exit -------------------------------------
+// Remove a previously inserted signed compare loop exit.
+void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
+ Node* lp_proj = stay_in_loop(if_cmp, loop);
+ assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
+ stay_in_loop(lp_proj, loop)->is_If() &&
+ stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
+ Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
+ set_ctrl(con, C->root());
+ if_cmp->set_req(1, con);
+}
+
+//------------------------------ scheduled_nodelist -------------------------------------
+// Create a post order schedule of nodes that are in the
+// "member" set. The list is returned in "sched".
+// The first node in "sched" is the loop head, followed by
+// nodes which have no inputs in the "member" set, and then
+// followed by the nodes that have an immediate input dependence
+// on a node in "sched".
+void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
+
+ assert(member.test(loop->_head->_idx), "loop head must be in member set");
+ Arena *a = Thread::current()->resource_area();
+ VectorSet visited(a);
+ Node_Stack nstack(a, loop->_body.size());
+
+ Node* n = loop->_head; // top of stack is cached in "n"
+ uint idx = 0;
+ visited.set(n->_idx);
+
+ // Initially push all with no inputs from within member set
+ for(uint i = 0; i < loop->_body.size(); i++ ) {
+ Node *elt = loop->_body.at(i);
+ if (member.test(elt->_idx)) {
+ bool found = false;
+ for (uint j = 0; j < elt->req(); j++) {
+ Node* def = elt->in(j);
+ if (def && member.test(def->_idx) && def != elt) {
+ found = true;
+ break;
+ }
+ }
+ if (!found && elt != loop->_head) {
+ nstack.push(n, idx);
+ n = elt;
+ assert(!visited.test(n->_idx), "not seen yet");
+ visited.set(n->_idx);
+ }
+ }
+ }
+
+ // traverse out's that are in the member set
+ while (true) {
+ if (idx < n->outcnt()) {
+ Node* use = n->raw_out(idx);
+ idx++;
+ if (!visited.test_set(use->_idx)) {
+ if (member.test(use->_idx)) {
+ nstack.push(n, idx);
+ n = use;
+ idx = 0;
+ }
+ }
+ } else {
+ // All outputs processed
+ sched.push(n);
+ if (nstack.is_empty()) break;
+ n = nstack.node();
+ idx = nstack.index();
+ nstack.pop();
+ }
+ }
+}
+
+
+//------------------------------ has_use_in_set -------------------------------------
+// Has a use in the vector set
+bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* use = n->fast_out(j);
+ if (vset.test(use->_idx)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+//------------------------------ has_use_internal_to_set -------------------------------------
+// Has use internal to the vector set (ie. not in a phi at the loop head)
+bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
+ Node* head = loop->_head;
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* use = n->fast_out(j);
+ if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+//------------------------------ clone_for_use_outside_loop -------------------------------------
+// clone "n" for uses that are outside of loop
+void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
+
+ assert(worklist.size() == 0, "should be empty");
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* use = n->fast_out(j);
+ if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
+ worklist.push(use);
+ }
+ }
+ while( worklist.size() ) {
+ Node *use = worklist.pop();
+ if (!has_node(use) || use->in(0) == C->top()) continue;
+ uint j;
+ for (j = 0; j < use->req(); j++) {
+ if (use->in(j) == n) break;
+ }
+ assert(j < use->req(), "must be there");
+
+ // clone "n" and insert it between the inputs of "n" and the use outside the loop
+ Node* n_clone = n->clone();
+ _igvn.hash_delete(use);
+ use->set_req(j, n_clone);
+ _igvn._worklist.push(use);
+ if (!use->is_Phi()) {
+ Node* use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
+ set_ctrl(n_clone, use_c);
+ assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
+ get_loop(use_c)->_body.push(n_clone);
+ } else {
+ // Use in a phi is considered a use in the associated predecessor block
+ Node *prevbb = use->in(0)->in(j);
+ set_ctrl(n_clone, prevbb);
+ assert(!loop->is_member(get_loop(prevbb)), "should be outside loop");
+ get_loop(prevbb)->_body.push(n_clone);
+ }
+ _igvn.register_new_node_with_optimizer(n_clone);
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
+ }
+#endif
+ }
+}
+
+
+//------------------------------ clone_for_special_use_inside_loop -------------------------------------
+// clone "n" for special uses that are in the not_peeled region.
+// If these def-uses occur in separate blocks, the code generator
+// marks the method as not compilable. For example, if a "BoolNode"
+// is in a different basic block than the "IfNode" that uses it, then
+// the compilation is aborted in the code generator.
+void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
+ VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
+ if (n->is_Phi() || n->is_Load()) {
+ return;
+ }
+ assert(worklist.size() == 0, "should be empty");
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* use = n->fast_out(j);
+ if ( not_peel.test(use->_idx) &&
+ (use->is_If() || use->is_CMove() || use->is_Bool()) &&
+ use->in(1) == n) {
+ worklist.push(use);
+ }
+ }
+ if (worklist.size() > 0) {
+ // clone "n" and insert it between inputs of "n" and the use
+ Node* n_clone = n->clone();
+ loop->_body.push(n_clone);
+ _igvn.register_new_node_with_optimizer(n_clone);
+ set_ctrl(n_clone, get_ctrl(n));
+ sink_list.push(n_clone);
+ not_peel <<= n_clone->_idx; // add n_clone to not_peel set.
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
+ }
+#endif
+ while( worklist.size() ) {
+ Node *use = worklist.pop();
+ _igvn.hash_delete(use);
+ _igvn._worklist.push(use);
+ for (uint j = 1; j < use->req(); j++) {
+ if (use->in(j) == n) {
+ use->set_req(j, n_clone);
+ }
+ }
+ }
+ }
+}
+
+
+//------------------------------ insert_phi_for_loop -------------------------------------
+// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
+void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
+ Node *phi = PhiNode::make(lp, back_edge_val);
+ phi->set_req(LoopNode::EntryControl, lp_entry_val);
+ // Use existing phi if it already exists
+ Node *hit = _igvn.hash_find_insert(phi);
+ if( hit == NULL ) {
+ _igvn.register_new_node_with_optimizer(phi);
+ set_ctrl(phi, lp);
+ } else {
+ // Remove the new phi from the graph and use the hit
+ _igvn.remove_dead_node(phi);
+ phi = hit;
+ }
+ _igvn.hash_delete(use);
+ _igvn._worklist.push(use);
+ use->set_req(idx, phi);
+}
+
+#ifdef ASSERT
+//------------------------------ is_valid_loop_partition -------------------------------------
+// Validate the loop partition sets: peel and not_peel
+bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
+ VectorSet& not_peel ) {
+ uint i;
+ // Check that peel_list entries are in the peel set
+ for (i = 0; i < peel_list.size(); i++) {
+ if (!peel.test(peel_list.at(i)->_idx)) {
+ return false;
+ }
+ }
+ // Check at loop members are in one of peel set or not_peel set
+ for (i = 0; i < loop->_body.size(); i++ ) {
+ Node *def = loop->_body.at(i);
+ uint di = def->_idx;
+ // Check that peel set elements are in peel_list
+ if (peel.test(di)) {
+ if (not_peel.test(di)) {
+ return false;
+ }
+ // Must be in peel_list also
+ bool found = false;
+ for (uint j = 0; j < peel_list.size(); j++) {
+ if (peel_list.at(j)->_idx == di) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ return false;
+ }
+ } else if (not_peel.test(di)) {
+ if (peel.test(di)) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+ return true;
+}
+
+//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
+// Ensure a use outside of loop is of the right form
+bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
+ Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
+ return (use->is_Phi() &&
+ use_c->is_Region() && use_c->req() == 3 &&
+ (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
+ use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
+ use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
+ loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
+}
+
+//------------------------------ is_valid_clone_loop_form -------------------------------------
+// Ensure that all uses outside of loop are of the right form
+bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
+ uint orig_exit_idx, uint clone_exit_idx) {
+ uint len = peel_list.size();
+ for (uint i = 0; i < len; i++) {
+ Node *def = peel_list.at(i);
+
+ for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+ Node *use = def->fast_out(j);
+ Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
+ if (!loop->is_member(get_loop(use_c))) {
+ // use is not in the loop, check for correct structure
+ if (use->in(0) == def) {
+ // Okay
+ } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+#endif
+
+//------------------------------ partial_peel -------------------------------------
+// Partially peel (aka loop rotation) the top portion of a loop (called
+// the peel section below) by cloning it and placing one copy just before
+// the new loop head and the other copy at the bottom of the new loop.
+//
+// before after where it came from
+//
+// stmt1 stmt1
+// loop: stmt2 clone
+// stmt2 if condA goto exitA clone
+// if condA goto exitA new_loop: new
+// stmt3 stmt3 clone
+// if !condB goto loop if condB goto exitB clone
+// exitB: stmt2 orig
+// stmt4 if !condA goto new_loop orig
+// exitA: goto exitA
+// exitB:
+// stmt4
+// exitA:
+//
+// Step 1: find the cut point: an exit test on probable
+// induction variable.
+// Step 2: schedule (with cloning) operations in the peel
+// section that can be executed after the cut into
+// the section that is not peeled. This may need
+// to clone operations into exit blocks. For
+// instance, a reference to A[i] in the not-peel
+// section and a reference to B[i] in an exit block
+// may cause a left-shift of i by 2 to be placed
+// in the peel block. This step will clone the left
+// shift into the exit block and sink the left shift
+// from the peel to the not-peel section.
+// Step 3: clone the loop, retarget the control, and insert
+// phis for values that are live across the new loop
+// head. This is very dependent on the graph structure
+// from clone_loop. It creates region nodes for
+// exit control and associated phi nodes for values
+// flow out of the loop through that exit. The region
+// node is dominated by the clone's control projection.
+// So the clone's peel section is placed before the
+// new loop head, and the clone's not-peel section is
+// forms the top part of the new loop. The original
+// peel section forms the tail of the new loop.
+// Step 4: update the dominator tree and recompute the
+// dominator depth.
+//
+// orig
+//
+// stmt1
+// |
+// v
+// loop<----+
+// | |
+// stmt2 |
+// | |
+// v |
+// ifA |
+// / | |
+// v v |
+// false true ^ <-- last_peel
+// / | |
+// / ===|==cut |
+// / stmt3 | <-- first_not_peel
+// / | |
+// | v |
+// v ifB |
+// exitA: / \ |
+// / \ |
+// v v |
+// false true |
+// / \ |
+// / ----+
+// |
+// v
+// exitB:
+// stmt4
+//
+//
+// after clone loop
+//
+// stmt1
+// / \
+// clone / \ orig
+// / \
+// / \
+// v v
+// +---->loop loop<----+
+// | | | |
+// | stmt2 stmt2 |
+// | | | |
+// | v v |
+// | ifA ifA |
+// | | \ / | |
+// | v v v v |
+// ^ true false false true ^ <-- last_peel
+// | | ^ \ / | |
+// | cut==|== \ \ / ===|==cut |
+// | stmt3 \ \ / stmt3 | <-- first_not_peel
+// | | dom | | | |
+// | v \ 1v v2 v |
+// | ifB regionA ifB |
+// | / \ | / \ |
+// | / \ v / \ |
+// | v v exitA: v v |
+// | true false false true |
+// | / ^ \ / \ |
+// +---- \ \ / ----+
+// dom \ /
+// \ 1v v2
+// regionB
+// |
+// v
+// exitB:
+// stmt4
+//
+//
+// after partial peel
+//
+// stmt1
+// /
+// clone / orig
+// / TOP
+// / \
+// v v
+// TOP->region region----+
+// | | |
+// stmt2 stmt2 |
+// | | |
+// v v |
+// ifA ifA |
+// | \ / | |
+// v v v v |
+// true false false true | <-- last_peel
+// | ^ \ / +------|---+
+// +->newloop \ \ / === ==cut | |
+// | stmt3 \ \ / TOP | |
+// | | dom | | stmt3 | | <-- first_not_peel
+// | v \ 1v v2 v | |
+// | ifB regionA ifB ^ v
+// | / \ | / \ | |
+// | / \ v / \ | |
+// | v v exitA: v v | |
+// | true false false true | |
+// | / ^ \ / \ | |
+// | | \ \ / v | |
+// | | dom \ / TOP | |
+// | | \ 1v v2 | |
+// ^ v regionB | |
+// | | | | |
+// | | v ^ v
+// | | exitB: | |
+// | | stmt4 | |
+// | +------------>-----------------+ |
+// | |
+// +-----------------<---------------------+
+//
+//
+// final graph
+//
+// stmt1
+// |
+// v
+// ........> ifA clone
+// : / |
+// dom / |
+// : v v
+// : false true
+// : | |
+// : | stmt2 clone
+// : | |
+// : | v
+// : | newloop<-----+
+// : | | |
+// : | stmt3 clone |
+// : | | |
+// : | v |
+// : | ifB |
+// : | / \ |
+// : | v v |
+// : | false true |
+// : | | | |
+// : | v stmt2 |
+// : | exitB: | |
+// : | stmt4 v |
+// : | ifA orig |
+// : | / \ |
+// : | / \ |
+// : | v v |
+// : | false true |
+// : | / \ |
+// : v v -----+
+// RegionA
+// |
+// v
+// exitA
+//
+bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
+
+ LoopNode *head = loop->_head->as_Loop();
+
+ if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
+ return false;
+ }
+
+ // Check for complex exit control
+ for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
+ Node *n = loop->_body.at(ii);
+ int opc = n->Opcode();
+ if (n->is_Call() ||
+ opc == Op_Catch ||
+ opc == Op_CatchProj ||
+ opc == Op_Jump ||
+ opc == Op_JumpProj) {
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
+ }
+#endif
+ return false;
+ }
+ }
+
+ int dd = dom_depth(head);
+
+ // Step 1: find cut point
+
+ // Walk up dominators to loop head looking for first loop exit
+ // which is executed on every path thru loop.
+ IfNode *peel_if = NULL;
+ IfNode *peel_if_cmpu = NULL;
+
+ Node *iff = loop->tail();
+ while( iff != head ) {
+ if( iff->is_If() ) {
+ Node *ctrl = get_ctrl(iff->in(1));
+ if (ctrl->is_top()) return false; // Dead test on live IF.
+ // If loop-varying exit-test, check for induction variable
+ if( loop->is_member(get_loop(ctrl)) &&
+ loop->is_loop_exit(iff) &&
+ is_possible_iv_test(iff)) {
+ Node* cmp = iff->in(1)->in(1);
+ if (cmp->Opcode() == Op_CmpI) {
+ peel_if = iff->as_If();
+ } else {
+ assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
+ peel_if_cmpu = iff->as_If();
+ }
+ }
+ }
+ iff = idom(iff);
+ }
+ // Prefer signed compare over unsigned compare.
+ IfNode* new_peel_if = NULL;
+ if (peel_if == NULL) {
+ if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
+ return false; // No peel point found
+ }
+ new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
+ if (new_peel_if == NULL) {
+ return false; // No peel point found
+ }
+ peel_if = new_peel_if;
+ }
+ Node* last_peel = stay_in_loop(peel_if, loop);
+ Node* first_not_peeled = stay_in_loop(last_peel, loop);
+ if (first_not_peeled == NULL || first_not_peeled == head) {
+ return false;
+ }
+
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("before partial peel one iteration");
+ Node_List wl;
+ Node* t = head->in(2);
+ while (true) {
+ wl.push(t);
+ if (t == head) break;
+ t = idom(t);
+ }
+ while (wl.size() > 0) {
+ Node* tt = wl.pop();
+ tt->dump();
+ if (tt == last_peel) tty->print_cr("-- cut --");
+ }
+ }
+#endif
+ ResourceArea *area = Thread::current()->resource_area();
+ VectorSet peel(area);
+ VectorSet not_peel(area);
+ Node_List peel_list(area);
+ Node_List worklist(area);
+ Node_List sink_list(area);
+
+ // Set of cfg nodes to peel are those that are executable from
+ // the head through last_peel.
+ assert(worklist.size() == 0, "should be empty");
+ worklist.push(head);
+ peel.set(head->_idx);
+ while (worklist.size() > 0) {
+ Node *n = worklist.pop();
+ if (n != last_peel) {
+ for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+ Node* use = n->fast_out(j);
+ if (use->is_CFG() &&
+ loop->is_member(get_loop(use)) &&
+ !peel.test_set(use->_idx)) {
+ worklist.push(use);
+ }
+ }
+ }
+ }
+
+ // Set of non-cfg nodes to peel are those that are control
+ // dependent on the cfg nodes.
+ uint i;
+ for(i = 0; i < loop->_body.size(); i++ ) {
+ Node *n = loop->_body.at(i);
+ Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
+ if (peel.test(n_c->_idx)) {
+ peel.set(n->_idx);
+ } else {
+ not_peel.set(n->_idx);
+ }
+ }
+
+ // Step 2: move operations from the peeled section down into the
+ // not-peeled section
+
+ // Get a post order schedule of nodes in the peel region
+ // Result in right-most operand.
+ scheduled_nodelist(loop, peel, peel_list );
+
+ assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
+
+ // For future check for too many new phis
+ uint old_phi_cnt = 0;
+ for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
+ Node* use = head->fast_out(j);
+ if (use->is_Phi()) old_phi_cnt++;
+ }
+
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("\npeeled list");
+ }
+#endif
+
+ // Evacuate nodes in peel region into the not_peeled region if possible
+ uint new_phi_cnt = 0;
+ for (i = 0; i < peel_list.size();) {
+ Node* n = peel_list.at(i);
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) n->dump();
+#endif
+ bool incr = true;
+ if ( !n->is_CFG() ) {
+
+ if ( has_use_in_set(n, not_peel) ) {
+
+ // If not used internal to the peeled region,
+ // move "n" from peeled to not_peeled region.
+
+ if ( !has_use_internal_to_set(n, peel, loop) ) {
+
+ // if not pinned and not a load (which maybe anti-dependent on a store)
+ // and not a CMove (Matcher expects only bool->cmove).
+ if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
+ clone_for_use_outside_loop( loop, n, worklist );
+
+ sink_list.push(n);
+ peel >>= n->_idx; // delete n from peel set.
+ not_peel <<= n->_idx; // add n to not_peel set.
+ peel_list.remove(i);
+ incr = false;
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("sink to not_peeled region: %d newbb: %d",
+ n->_idx, get_ctrl(n)->_idx);
+ }
+#endif
+ }
+ } else {
+ // Otherwise check for special def-use cases that span
+ // the peel/not_peel boundary such as bool->if
+ clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
+ new_phi_cnt++;
+ }
+ }
+ }
+ if (incr) i++;
+ }
+
+ if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c",
+ new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
+ }
+#endif
+ if (new_peel_if != NULL) {
+ remove_cmpi_loop_exit(new_peel_if, loop);
+ }
+ // Inhibit more partial peeling on this loop
+ assert(!head->is_partial_peel_loop(), "not partial peeled");
+ head->mark_partial_peel_failed();
+ return false;
+ }
+
+ // Step 3: clone loop, retarget control, and insert new phis
+
+ // Create new loop head for new phis and to hang
+ // the nodes being moved (sinked) from the peel region.
+ LoopNode* new_head = new (C, 3) LoopNode(last_peel, last_peel);
+ _igvn.register_new_node_with_optimizer(new_head);
+ assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
+ first_not_peeled->set_req(0, new_head);
+ set_loop(new_head, loop);
+ loop->_body.push(new_head);
+ not_peel.set(new_head->_idx);
+ set_idom(new_head, last_peel, dom_depth(first_not_peeled));
+ set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
+
+ while (sink_list.size() > 0) {
+ Node* n = sink_list.pop();
+ set_ctrl(n, new_head);
+ }
+
+ assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
+
+ clone_loop( loop, old_new, dd );
+
+ const uint clone_exit_idx = 1;
+ const uint orig_exit_idx = 2;
+ assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
+
+ Node* head_clone = old_new[head->_idx];
+ LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop();
+ Node* orig_tail_clone = head_clone->in(2);
+
+ // Add phi if "def" node is in peel set and "use" is not
+
+ for(i = 0; i < peel_list.size(); i++ ) {
+ Node *def = peel_list.at(i);
+ if (!def->is_CFG()) {
+ for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+ Node *use = def->fast_out(j);
+ if (has_node(use) && use->in(0) != C->top() &&
+ (!peel.test(use->_idx) ||
+ (use->is_Phi() && use->in(0) == head)) ) {
+ worklist.push(use);
+ }
+ }
+ while( worklist.size() ) {
+ Node *use = worklist.pop();
+ for (uint j = 1; j < use->req(); j++) {
+ Node* n = use->in(j);
+ if (n == def) {
+
+ // "def" is in peel set, "use" is not in peel set
+ // or "use" is in the entry boundary (a phi) of the peel set
+
+ Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
+
+ if ( loop->is_member(get_loop( use_c )) ) {
+ // use is in loop
+ if (old_new[use->_idx] != NULL) { // null for dead code
+ Node* use_clone = old_new[use->_idx];
+ _igvn.hash_delete(use);
+ use->set_req(j, C->top());
+ _igvn._worklist.push(use);
+ insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
+ }
+ } else {
+ assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
+ // use is not in the loop, check if the live range includes the cut
+ Node* lp_if = use_c->in(orig_exit_idx)->in(0);
+ if (not_peel.test(lp_if->_idx)) {
+ assert(j == orig_exit_idx, "use from original loop");
+ insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Step 3b: retarget control
+
+ // Redirect control to the new loop head if a cloned node in
+ // the not_peeled region has control that points into the peeled region.
+ // This necessary because the cloned peeled region will be outside
+ // the loop.
+ // from to
+ // cloned-peeled <---+
+ // new_head_clone: | <--+
+ // cloned-not_peeled in(0) in(0)
+ // orig-peeled
+
+ for(i = 0; i < loop->_body.size(); i++ ) {
+ Node *n = loop->_body.at(i);
+ if (!n->is_CFG() && n->in(0) != NULL &&
+ not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
+ Node* n_clone = old_new[n->_idx];
+ _igvn.hash_delete(n_clone);
+ n_clone->set_req(0, new_head_clone);
+ _igvn._worklist.push(n_clone);
+ }
+ }
+
+ // Backedge of the surviving new_head (the clone) is original last_peel
+ _igvn.hash_delete(new_head_clone);
+ new_head_clone->set_req(LoopNode::LoopBackControl, last_peel);
+ _igvn._worklist.push(new_head_clone);
+
+ // Cut first node in original not_peel set
+ _igvn.hash_delete(new_head);
+ new_head->set_req(LoopNode::EntryControl, C->top());
+ new_head->set_req(LoopNode::LoopBackControl, C->top());
+ _igvn._worklist.push(new_head);
+
+ // Copy head_clone back-branch info to original head
+ // and remove original head's loop entry and
+ // clone head's back-branch
+ _igvn.hash_delete(head);
+ _igvn.hash_delete(head_clone);
+ head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl));
+ head->set_req(LoopNode::LoopBackControl, C->top());
+ head_clone->set_req(LoopNode::LoopBackControl, C->top());
+ _igvn._worklist.push(head);
+ _igvn._worklist.push(head_clone);
+
+ // Similarly modify the phis
+ for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
+ Node* use = head->fast_out(k);
+ if (use->is_Phi() && use->outcnt() > 0) {
+ Node* use_clone = old_new[use->_idx];
+ _igvn.hash_delete(use);
+ _igvn.hash_delete(use_clone);
+ use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl));
+ use->set_req(LoopNode::LoopBackControl, C->top());
+ use_clone->set_req(LoopNode::LoopBackControl, C->top());
+ _igvn._worklist.push(use);
+ _igvn._worklist.push(use_clone);
+ }
+ }
+
+ // Step 4: update dominator tree and dominator depth
+
+ set_idom(head, orig_tail_clone, dd);
+ recompute_dom_depth();
+
+ // Inhibit more partial peeling on this loop
+ new_head_clone->set_partial_peel_loop();
+ C->set_major_progress();
+
+#if !defined(PRODUCT)
+ if (TracePartialPeeling) {
+ tty->print_cr("\nafter partial peel one iteration");
+ Node_List wl(area);
+ Node* t = last_peel;
+ while (true) {
+ wl.push(t);
+ if (t == head_clone) break;
+ t = idom(t);
+ }
+ while (wl.size() > 0) {
+ Node* tt = wl.pop();
+ if (tt == head) tty->print_cr("orig head");
+ else if (tt == new_head_clone) tty->print_cr("new head");
+ else if (tt == head_clone) tty->print_cr("clone head");
+ tt->dump();
+ }
+ }
+#endif
+ return true;
+}
+
+//------------------------------reorg_offsets----------------------------------
+// Reorganize offset computations to lower register pressure. Mostly
+// prevent loop-fallout uses of the pre-incremented trip counter (which are
+// then alive with the post-incremented trip counter forcing an extra
+// register move)
+void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
+
+ CountedLoopNode *cl = loop->_head->as_CountedLoop();
+ CountedLoopEndNode *cle = cl->loopexit();
+ if( !cle ) return; // The occasional dead loop
+ // Find loop exit control
+ Node *exit = cle->proj_out(false);
+ assert( exit->Opcode() == Op_IfFalse, "" );
+
+ // Check for the special case of folks using the pre-incremented
+ // trip-counter on the fall-out path (forces the pre-incremented
+ // and post-incremented trip counter to be live at the same time).
+ // Fix this by adjusting to use the post-increment trip counter.
+ Node *phi = cl->phi();
+ if( !phi ) return; // Dead infinite loop
+ bool progress = true;
+ while (progress) {
+ progress = false;
+ for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
+ Node* use = phi->fast_out(i); // User of trip-counter
+ if (!has_ctrl(use)) continue;
+ Node *u_ctrl = get_ctrl(use);
+ if( use->is_Phi() ) {
+ u_ctrl = NULL;
+ for( uint j = 1; j < use->req(); j++ )
+ if( use->in(j) == phi )
+ u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) );
+ }
+ IdealLoopTree *u_loop = get_loop(u_ctrl);
+ // Look for loop-invariant use
+ if( u_loop == loop ) continue;
+ if( loop->is_member( u_loop ) ) continue;
+ // Check that use is live out the bottom. Assuming the trip-counter
+ // update is right at the bottom, uses of of the loop middle are ok.
+ if( dom_lca( exit, u_ctrl ) != exit ) continue;
+ // protect against stride not being a constant
+ if( !cle->stride_is_con() ) continue;
+ // Hit! Refactor use to use the post-incremented tripcounter.
+ // Compute a post-increment tripcounter.
+ Node *opaq = new (C, 2) Opaque2Node( cle->incr() );
+ register_new_node( opaq, u_ctrl );
+ Node *neg_stride = _igvn.intcon(-cle->stride_con());
+ set_ctrl(neg_stride, C->root());
+ Node *post = new (C, 3) AddINode( opaq, neg_stride);
+ register_new_node( post, u_ctrl );
+ _igvn.hash_delete(use);
+ _igvn._worklist.push(use);
+ for( uint j = 1; j < use->req(); j++ )
+ if( use->in(j) == phi )
+ use->set_req(j, post);
+ // Since DU info changed, rerun loop
+ progress = true;
+ break;
+ }
+ }
+
+}
diff --git a/src/share/vm/opto/machnode.cpp b/src/share/vm/opto/machnode.cpp
new file mode 100644
index 000000000..8b88f00b5
--- /dev/null
+++ b/src/share/vm/opto/machnode.cpp
@@ -0,0 +1,707 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_machnode.cpp.incl"
+
+//=============================================================================
+// Return the value requested
+// result register lookup, corresponding to int_format
+int MachOper::reg(PhaseRegAlloc *ra_, const Node *node) const {
+ return (int)ra_->get_encode(node);
+}
+// input register lookup, corresponding to ext_format
+int MachOper::reg(PhaseRegAlloc *ra_, const Node *node, int idx) const {
+ return (int)(ra_->get_encode(node->in(idx)));
+}
+intptr_t MachOper::constant() const { return 0x00; }
+bool MachOper::constant_is_oop() const { return false; }
+jdouble MachOper::constantD() const { ShouldNotReachHere(); return 0.0; }
+jfloat MachOper::constantF() const { ShouldNotReachHere(); return 0.0; }
+jlong MachOper::constantL() const { ShouldNotReachHere(); return CONST64(0) ; }
+TypeOopPtr *MachOper::oop() const { return NULL; }
+int MachOper::ccode() const { return 0x00; }
+// A zero, default, indicates this value is not needed.
+// May need to lookup the base register, as done in int_ and ext_format
+int MachOper::base (PhaseRegAlloc *ra_, const Node *node, int idx) const { return 0x00; }
+int MachOper::index(PhaseRegAlloc *ra_, const Node *node, int idx) const { return 0x00; }
+int MachOper::scale() const { return 0x00; }
+int MachOper::disp (PhaseRegAlloc *ra_, const Node *node, int idx) const { return 0x00; }
+int MachOper::constant_disp() const { return 0; }
+int MachOper::base_position() const { return -1; } // no base input
+int MachOper::index_position() const { return -1; } // no index input
+// Check for PC-Relative displacement
+bool MachOper::disp_is_oop() const { return false; }
+// Return the label
+Label* MachOper::label() const { ShouldNotReachHere(); return 0; }
+intptr_t MachOper::method() const { ShouldNotReachHere(); return 0; }
+
+
+//------------------------------negate-----------------------------------------
+// Negate conditional branches. Error for non-branch operands
+void MachOper::negate() {
+ ShouldNotCallThis();
+}
+
+//-----------------------------type--------------------------------------------
+const Type *MachOper::type() const {
+ return Type::BOTTOM;
+}
+
+//------------------------------in_RegMask-------------------------------------
+const RegMask *MachOper::in_RegMask(int index) const {
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//------------------------------dump_spec--------------------------------------
+// Print any per-operand special info
+#ifndef PRODUCT
+void MachOper::dump_spec(outputStream *st) const { }
+#endif
+
+//------------------------------hash-------------------------------------------
+// Print any per-operand special info
+uint MachOper::hash() const {
+ ShouldNotCallThis();
+ return 5;
+}
+
+//------------------------------cmp--------------------------------------------
+// Print any per-operand special info
+uint MachOper::cmp( const MachOper &oper ) const {
+ ShouldNotCallThis();
+ return opcode() == oper.opcode();
+}
+
+//------------------------------hash-------------------------------------------
+// Print any per-operand special info
+uint labelOper::hash() const {
+ return _block_num;
+}
+
+//------------------------------cmp--------------------------------------------
+// Print any per-operand special info
+uint labelOper::cmp( const MachOper &oper ) const {
+ return (opcode() == oper.opcode()) && (_label == oper.label());
+}
+
+//------------------------------hash-------------------------------------------
+// Print any per-operand special info
+uint methodOper::hash() const {
+ return (uint)_method;
+}
+
+//------------------------------cmp--------------------------------------------
+// Print any per-operand special info
+uint methodOper::cmp( const MachOper &oper ) const {
+ return (opcode() == oper.opcode()) && (_method == oper.method());
+}
+
+
+//=============================================================================
+//------------------------------MachNode---------------------------------------
+
+//------------------------------emit-------------------------------------------
+void MachNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ #ifdef ASSERT
+ tty->print("missing MachNode emit function: ");
+ dump();
+ #endif
+ ShouldNotCallThis();
+}
+
+//------------------------------size-------------------------------------------
+// Size of instruction in bytes
+uint MachNode::size(PhaseRegAlloc *ra_) const {
+ // If a virtual was not defined for this specific instruction,
+ // Call the helper which finds the size by emiting the bits.
+ return MachNode::emit_size(ra_);
+}
+
+//------------------------------size-------------------------------------------
+// Helper function that computes size by emitting code
+uint MachNode::emit_size(PhaseRegAlloc *ra_) const {
+ // Emit into a trash buffer and count bytes emitted.
+ assert(ra_ == ra_->C->regalloc(), "sanity");
+ return ra_->C->scratch_emit_size(this);
+}
+
+
+
+//------------------------------hash-------------------------------------------
+uint MachNode::hash() const {
+ uint no = num_opnds();
+ uint sum = rule();
+ for( uint i=0; i<no; i++ )
+ sum += _opnds[i]->hash();
+ return sum+Node::hash();
+}
+
+//-----------------------------cmp---------------------------------------------
+uint MachNode::cmp( const Node &node ) const {
+ MachNode& n = *((Node&)node).as_Mach();
+ uint no = num_opnds();
+ if( no != n.num_opnds() ) return 0;
+ if( rule() != n.rule() ) return 0;
+ for( uint i=0; i<no; i++ ) // All operands must match
+ if( !_opnds[i]->cmp( *n._opnds[i] ) )
+ return 0; // mis-matched operands
+ return 1; // match
+}
+
+// Return an equivalent instruction using memory for cisc_operand position
+MachNode *MachNode::cisc_version(int offset, Compile* C) {
+ ShouldNotCallThis();
+ return NULL;
+}
+
+void MachNode::use_cisc_RegMask() {
+ ShouldNotReachHere();
+}
+
+
+//-----------------------------in_RegMask--------------------------------------
+const RegMask &MachNode::in_RegMask( uint idx ) const {
+ uint numopnds = num_opnds(); // Virtual call for number of operands
+ uint skipped = oper_input_base(); // Sum of leaves skipped so far
+ if( idx < skipped ) {
+ assert( ideal_Opcode() == Op_AddP, "expected base ptr here" );
+ assert( idx == 1, "expected base ptr here" );
+ // debug info can be anywhere
+ return *Compile::current()->matcher()->idealreg2spillmask[Op_RegP];
+ }
+ uint opcnt = 1; // First operand
+ uint num_edges = _opnds[1]->num_edges(); // leaves for first operand
+ while( idx >= skipped+num_edges ) {
+ skipped += num_edges;
+ opcnt++; // Bump operand count
+ assert( opcnt < numopnds, "Accessing non-existent operand" );
+ num_edges = _opnds[opcnt]->num_edges(); // leaves for next operand
+ }
+
+ const RegMask *rm = cisc_RegMask();
+ if( rm == NULL || (int)opcnt != cisc_operand() ) {
+ rm = _opnds[opcnt]->in_RegMask(idx-skipped);
+ }
+ return *rm;
+}
+
+//-----------------------------memory_inputs--------------------------------
+const MachOper* MachNode::memory_inputs(Node* &base, Node* &index) const {
+ const MachOper* oper = memory_operand();
+
+ if (oper == (MachOper*)-1) {
+ base = NodeSentinel;
+ index = NodeSentinel;
+ } else {
+ base = NULL;
+ index = NULL;
+ if (oper != NULL) {
+ // It has a unique memory operand. Find its index.
+ int oper_idx = num_opnds();
+ while (--oper_idx >= 0) {
+ if (_opnds[oper_idx] == oper) break;
+ }
+ int oper_pos = operand_index(oper_idx);
+ int base_pos = oper->base_position();
+ if (base_pos >= 0) {
+ base = _in[oper_pos+base_pos];
+ }
+ int index_pos = oper->index_position();
+ if (index_pos >= 0) {
+ index = _in[oper_pos+index_pos];
+ }
+ }
+ }
+
+ return oper;
+}
+
+//-----------------------------get_base_and_disp----------------------------
+const Node* MachNode::get_base_and_disp(intptr_t &offset, const TypePtr* &adr_type) const {
+
+ // Find the memory inputs using our helper function
+ Node* base;
+ Node* index;
+ const MachOper* oper = memory_inputs(base, index);
+
+ if (oper == NULL) {
+ // Base has been set to NULL
+ offset = 0;
+ } else if (oper == (MachOper*)-1) {
+ // Base has been set to NodeSentinel
+ // There is not a unique memory use here. We will fall to AliasIdxBot.
+ offset = Type::OffsetBot;
+ } else {
+ // Base may be NULL, even if offset turns out to be != 0
+
+ intptr_t disp = oper->constant_disp();
+ int scale = oper->scale();
+ // Now we have collected every part of the ADLC MEMORY_INTER.
+ // See if it adds up to a base + offset.
+ if (index != NULL) {
+ if (!index->is_Con()) {
+ disp = Type::OffsetBot;
+ } else if (disp != Type::OffsetBot) {
+ const TypeX* ti = index->bottom_type()->isa_intptr_t();
+ if (ti == NULL) {
+ disp = Type::OffsetBot; // a random constant??
+ } else {
+ disp += ti->get_con() << scale;
+ }
+ }
+ }
+ offset = disp;
+
+ // In i486.ad, indOffset32X uses base==RegI and disp==RegP,
+ // this will prevent alias analysis without the following support:
+ // Lookup the TypePtr used by indOffset32X, a compile-time constant oop,
+ // Add the offset determined by the "base", or use Type::OffsetBot.
+ if( adr_type == TYPE_PTR_SENTINAL ) {
+ const TypePtr *t_disp = oper->disp_as_type(); // only !NULL for indOffset32X
+ if (t_disp != NULL) {
+ offset = Type::OffsetBot;
+ const Type* t_base = base->bottom_type();
+ if (t_base->isa_intptr_t()) {
+ const TypeX *t_offset = t_base->is_intptr_t();
+ if( t_offset->is_con() ) {
+ offset = t_offset->get_con();
+ }
+ }
+ adr_type = t_disp->add_offset(offset);
+ }
+ }
+
+ }
+ return base;
+}
+
+
+//---------------------------------adr_type---------------------------------
+const class TypePtr *MachNode::adr_type() const {
+ intptr_t offset = 0;
+ const TypePtr *adr_type = TYPE_PTR_SENTINAL; // attempt computing adr_type
+ const Node *base = get_base_and_disp(offset, adr_type);
+ if( adr_type != TYPE_PTR_SENTINAL ) {
+ return adr_type; // get_base_and_disp has the answer
+ }
+
+ // Direct addressing modes have no base node, simply an indirect
+ // offset, which is always to raw memory.
+ // %%%%% Someday we'd like to allow constant oop offsets which
+ // would let Intel load from static globals in 1 instruction.
+ // Currently Intel requires 2 instructions and a register temp.
+ if (base == NULL) {
+ // NULL base, zero offset means no memory at all (a null pointer!)
+ if (offset == 0) {
+ return NULL;
+ }
+ // NULL base, any offset means any pointer whatever
+ if (offset == Type::OffsetBot) {
+ return TypePtr::BOTTOM;
+ }
+ // %%% make offset be intptr_t
+ assert(!Universe::heap()->is_in_reserved((oop)offset), "must be a raw ptr");
+ return TypeRawPtr::BOTTOM;
+ }
+
+ // base of -1 with no particular offset means all of memory
+ if (base == NodeSentinel) return TypePtr::BOTTOM;
+
+ const Type* t = base->bottom_type();
+ if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) {
+ // We cannot assert that the offset does not look oop-ish here.
+ // Depending on the heap layout the cardmark base could land
+ // inside some oopish region. It definitely does for Win2K.
+ // The sum of cardmark-base plus shift-by-9-oop lands outside
+ // the oop-ish area but we can't assert for that statically.
+ return TypeRawPtr::BOTTOM;
+ }
+
+ const TypePtr *tp = t->isa_ptr();
+
+ // be conservative if we do not recognize the type
+ if (tp == NULL) {
+ return TypePtr::BOTTOM;
+ }
+ assert(tp->base() != Type::AnyPtr, "not a bare pointer");
+
+ return tp->add_offset(offset);
+}
+
+
+//-----------------------------operand_index---------------------------------
+int MachNode::operand_index( uint operand ) const {
+ if( operand < 1 ) return -1;
+ assert(operand < num_opnds(), "oob");
+ if( _opnds[operand]->num_edges() == 0 ) return -1;
+
+ uint skipped = oper_input_base(); // Sum of leaves skipped so far
+ for (uint opcnt = 1; opcnt < operand; opcnt++) {
+ uint num_edges = _opnds[opcnt]->num_edges(); // leaves for operand
+ skipped += num_edges;
+ }
+ return skipped;
+}
+
+
+//------------------------------negate-----------------------------------------
+// Negate conditional branches. Error for non-branch Nodes
+void MachNode::negate() {
+ ShouldNotCallThis();
+}
+
+//------------------------------peephole---------------------------------------
+// Apply peephole rule(s) to this instruction
+MachNode *MachNode::peephole( Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile* C ) {
+ return NULL;
+}
+
+//------------------------------add_case_label---------------------------------
+// Adds the label for the case
+void MachNode::add_case_label( int index_num, Label* blockLabel) {
+ ShouldNotCallThis();
+}
+
+//------------------------------label_set--------------------------------------
+// Set the Label for a LabelOper, if an operand for this instruction
+void MachNode::label_set( Label& label, uint block_num ) {
+ ShouldNotCallThis();
+}
+
+//------------------------------method_set-------------------------------------
+// Set the absolute address of a method
+void MachNode::method_set( intptr_t addr ) {
+ ShouldNotCallThis();
+}
+
+//------------------------------rematerialize----------------------------------
+bool MachNode::rematerialize() const {
+ // Temps are always rematerializable
+ if (is_MachTemp()) return true;
+
+ uint r = rule(); // Match rule
+ if( r < Matcher::_begin_rematerialize ||
+ r >= Matcher::_end_rematerialize )
+ return false;
+
+ // For 2-address instructions, the input live range is also the output
+ // live range. Remateralizing does not make progress on the that live range.
+ if( two_adr() ) return false;
+
+ // Check for rematerializing float constants, or not
+ if( !Matcher::rematerialize_float_constants ) {
+ int op = ideal_Opcode();
+ if( op == Op_ConF || op == Op_ConD )
+ return false;
+ }
+
+ // Defining flags - can't spill these! Must remateralize.
+ if( ideal_reg() == Op_RegFlags )
+ return true;
+
+ // Stretching lots of inputs - don't do it.
+ if( req() > 2 )
+ return false;
+
+ // Don't remateralize somebody with bound inputs - it stretches a
+ // fixed register lifetime.
+ uint idx = oper_input_base();
+ if( req() > idx ) {
+ const RegMask &rm = in_RegMask(idx);
+ if( rm.is_bound1() || rm.is_bound2() )
+ return false;
+ }
+
+ return true;
+}
+
+#ifndef PRODUCT
+//------------------------------dump_spec--------------------------------------
+// Print any per-operand special info
+void MachNode::dump_spec(outputStream *st) const {
+ uint cnt = num_opnds();
+ for( uint i=0; i<cnt; i++ )
+ _opnds[i]->dump_spec(st);
+ const TypePtr *t = adr_type();
+ if( t ) {
+ Compile* C = Compile::current();
+ if( C->alias_type(t)->is_volatile() )
+ st->print(" Volatile!");
+ }
+}
+
+//------------------------------dump_format------------------------------------
+// access to virtual
+void MachNode::dump_format(PhaseRegAlloc *ra, outputStream *st) const {
+ format(ra, st); // access to virtual
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachTypeNode::dump_spec(outputStream *st) const {
+ _bottom_type->dump_on(st);
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachNullCheckNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+ int reg = ra_->get_reg_first(in(1)->in(_vidx));
+ tty->print("%s %s", Name(), Matcher::regName[reg]);
+}
+#endif
+
+void MachNullCheckNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+ // only emits entries in the null-pointer exception handler table
+}
+
+const RegMask &MachNullCheckNode::in_RegMask( uint idx ) const {
+ if( idx == 0 ) return RegMask::Empty;
+ else return in(1)->as_Mach()->out_RegMask();
+}
+
+//=============================================================================
+const Type *MachProjNode::bottom_type() const {
+ if( _ideal_reg == fat_proj ) return Type::BOTTOM;
+ // Try the normal mechanism first
+ const Type *t = in(0)->bottom_type();
+ if( t->base() == Type::Tuple ) {
+ const TypeTuple *tt = t->is_tuple();
+ if (_con < tt->cnt())
+ return tt->field_at(_con);
+ }
+ // Else use generic type from ideal register set
+ assert((uint)_ideal_reg < (uint)_last_machine_leaf && Type::mreg2type[_ideal_reg], "in bounds");
+ return Type::mreg2type[_ideal_reg];
+}
+
+const TypePtr *MachProjNode::adr_type() const {
+ if (bottom_type() == Type::MEMORY) {
+ // in(0) might be a narrow MemBar; otherwise we will report TypePtr::BOTTOM
+ const TypePtr* adr_type = in(0)->adr_type();
+ #ifdef ASSERT
+ if (!is_error_reported() && !Node::in_dump())
+ assert(adr_type != NULL, "source must have adr_type");
+ #endif
+ return adr_type;
+ }
+ assert(bottom_type()->base() != Type::Memory, "no other memories?");
+ return NULL;
+}
+
+#ifndef PRODUCT
+void MachProjNode::dump_spec(outputStream *st) const {
+ ProjNode::dump_spec(st);
+ switch (_ideal_reg) {
+ case unmatched_proj: st->print("/unmatched"); break;
+ case fat_proj: st->print("/fat"); if (WizardMode) _rout.dump(); break;
+ }
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachIfNode::dump_spec(outputStream *st) const {
+ st->print("P=%f, C=%f",_prob, _fcnt);
+}
+#endif
+
+//=============================================================================
+uint MachReturnNode::size_of() const { return sizeof(*this); }
+
+//------------------------------Registers--------------------------------------
+const RegMask &MachReturnNode::in_RegMask( uint idx ) const {
+ return _in_rms[idx];
+}
+
+const TypePtr *MachReturnNode::adr_type() const {
+ // most returns and calls are assumed to consume & modify all of memory
+ // the matcher will copy non-wide adr_types from ideal originals
+ return _adr_type;
+}
+
+//=============================================================================
+const Type *MachSafePointNode::bottom_type() const { return TypeTuple::MEMBAR; }
+
+//------------------------------Registers--------------------------------------
+const RegMask &MachSafePointNode::in_RegMask( uint idx ) const {
+ // Values in the domain use the users calling convention, embodied in the
+ // _in_rms array of RegMasks.
+ if( idx < TypeFunc::Parms ) return _in_rms[idx];
+
+ if (SafePointNode::needs_polling_address_input() &&
+ idx == TypeFunc::Parms &&
+ ideal_Opcode() == Op_SafePoint) {
+ return MachNode::in_RegMask(idx);
+ }
+
+ // Values outside the domain represent debug info
+ return *Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()];
+}
+
+
+//=============================================================================
+
+uint MachCallNode::cmp( const Node &n ) const
+{ return _tf == ((MachCallNode&)n)._tf; }
+const Type *MachCallNode::bottom_type() const { return tf()->range(); }
+const Type *MachCallNode::Value(PhaseTransform *phase) const { return tf()->range(); }
+
+#ifndef PRODUCT
+void MachCallNode::dump_spec(outputStream *st) const {
+ st->print("# ");
+ tf()->dump_on(st);
+ if (_cnt != COUNT_UNKNOWN) st->print(" C=%f",_cnt);
+ if (jvms() != NULL) jvms()->dump_spec(st);
+}
+#endif
+
+
+bool MachCallNode::return_value_is_used() const {
+ if (tf()->range()->cnt() == TypeFunc::Parms) {
+ // void return
+ return false;
+ }
+
+ // find the projection corresponding to the return value
+ for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+ Node *use = fast_out(i);
+ if (!use->is_Proj()) continue;
+ if (use->as_Proj()->_con == TypeFunc::Parms) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+//------------------------------Registers--------------------------------------
+const RegMask &MachCallNode::in_RegMask( uint idx ) const {
+ // Values in the domain use the users calling convention, embodied in the
+ // _in_rms array of RegMasks.
+ if (idx < tf()->domain()->cnt()) return _in_rms[idx];
+ // Values outside the domain represent debug info
+ return *Compile::current()->matcher()->idealreg2debugmask[in(idx)->ideal_reg()];
+}
+
+//=============================================================================
+uint MachCallJavaNode::size_of() const { return sizeof(*this); }
+uint MachCallJavaNode::cmp( const Node &n ) const {
+ MachCallJavaNode &call = (MachCallJavaNode&)n;
+ return MachCallNode::cmp(call) && _method->equals(call._method);
+}
+#ifndef PRODUCT
+void MachCallJavaNode::dump_spec(outputStream *st) const {
+ if( _method ) {
+ _method->print_short_name(st);
+ st->print(" ");
+ }
+ MachCallNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint MachCallStaticJavaNode::size_of() const { return sizeof(*this); }
+uint MachCallStaticJavaNode::cmp( const Node &n ) const {
+ MachCallStaticJavaNode &call = (MachCallStaticJavaNode&)n;
+ return MachCallJavaNode::cmp(call) && _name == call._name;
+}
+
+//----------------------------uncommon_trap_request----------------------------
+// If this is an uncommon trap, return the request code, else zero.
+int MachCallStaticJavaNode::uncommon_trap_request() const {
+ if (_name != NULL && !strcmp(_name, "uncommon_trap")) {
+ return CallStaticJavaNode::extract_uncommon_trap_request(this);
+ }
+ return 0;
+}
+
+#ifndef PRODUCT
+// Helper for summarizing uncommon_trap arguments.
+void MachCallStaticJavaNode::dump_trap_args(outputStream *st) const {
+ int trap_req = uncommon_trap_request();
+ if (trap_req != 0) {
+ char buf[100];
+ st->print("(%s)",
+ Deoptimization::format_trap_request(buf, sizeof(buf),
+ trap_req));
+ }
+}
+
+void MachCallStaticJavaNode::dump_spec(outputStream *st) const {
+ st->print("Static ");
+ if (_name != NULL) {
+ st->print("wrapper for: %s", _name );
+ dump_trap_args(st);
+ st->print(" ");
+ }
+ MachCallJavaNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachCallDynamicJavaNode::dump_spec(outputStream *st) const {
+ st->print("Dynamic ");
+ MachCallJavaNode::dump_spec(st);
+}
+#endif
+//=============================================================================
+uint MachCallRuntimeNode::size_of() const { return sizeof(*this); }
+uint MachCallRuntimeNode::cmp( const Node &n ) const {
+ MachCallRuntimeNode &call = (MachCallRuntimeNode&)n;
+ return MachCallNode::cmp(call) && !strcmp(_name,call._name);
+}
+#ifndef PRODUCT
+void MachCallRuntimeNode::dump_spec(outputStream *st) const {
+ st->print("%s ",_name);
+ MachCallNode::dump_spec(st);
+}
+#endif
+//=============================================================================
+// A shared JVMState for all HaltNodes. Indicates the start of debug info
+// is at TypeFunc::Parms. Only required for SOE register spill handling -
+// to indicate where the stack-slot-only debug info inputs begin.
+// There is no other JVM state needed here.
+JVMState jvms_for_throw(0);
+JVMState *MachHaltNode::jvms() const {
+ return &jvms_for_throw;
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void labelOper::int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const {
+ st->print("B%d", _block_num);
+}
+#endif // PRODUCT
+
+//=============================================================================
+#ifndef PRODUCT
+void methodOper::int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const {
+ st->print(INTPTR_FORMAT, _method);
+}
+#endif // PRODUCT
diff --git a/src/share/vm/opto/machnode.hpp b/src/share/vm/opto/machnode.hpp
new file mode 100644
index 000000000..3c24a3e5c
--- /dev/null
+++ b/src/share/vm/opto/machnode.hpp
@@ -0,0 +1,826 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class BufferBlob;
+class CodeBuffer;
+class JVMState;
+class MachCallDynamicJavaNode;
+class MachCallJavaNode;
+class MachCallLeafNode;
+class MachCallNode;
+class MachCallRuntimeNode;
+class MachCallStaticJavaNode;
+class MachEpilogNode;
+class MachIfNode;
+class MachNullCheckNode;
+class MachOper;
+class MachProjNode;
+class MachPrologNode;
+class MachReturnNode;
+class MachSafePointNode;
+class MachSpillCopyNode;
+class Matcher;
+class PhaseRegAlloc;
+class RegMask;
+class State;
+
+//---------------------------MachOper------------------------------------------
+class MachOper : public ResourceObj {
+public:
+ // Allocate right next to the MachNodes in the same arena
+ void *operator new( size_t x, Compile* C ) { return C->node_arena()->Amalloc_D(x); }
+
+ // Opcode
+ virtual uint opcode() const = 0;
+
+ // Number of input edges.
+ // Generally at least 1
+ virtual uint num_edges() const { return 1; }
+ // Array of Register masks
+ virtual const RegMask *in_RegMask(int index) const;
+
+ // Methods to output the encoding of the operand
+
+ // Negate conditional branches. Error for non-branch Nodes
+ virtual void negate();
+
+ // Return the value requested
+ // result register lookup, corresponding to int_format
+ virtual int reg(PhaseRegAlloc *ra_, const Node *node) const;
+ // input register lookup, corresponding to ext_format
+ virtual int reg(PhaseRegAlloc *ra_, const Node *node, int idx) const;
+
+ // helpers for MacroAssembler generation from ADLC
+ Register as_Register(PhaseRegAlloc *ra_, const Node *node) const {
+ return ::as_Register(reg(ra_, node));
+ }
+ Register as_Register(PhaseRegAlloc *ra_, const Node *node, int idx) const {
+ return ::as_Register(reg(ra_, node, idx));
+ }
+ FloatRegister as_FloatRegister(PhaseRegAlloc *ra_, const Node *node) const {
+ return ::as_FloatRegister(reg(ra_, node));
+ }
+ FloatRegister as_FloatRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
+ return ::as_FloatRegister(reg(ra_, node, idx));
+ }
+
+#if defined(IA32) || defined(AMD64)
+ XMMRegister as_XMMRegister(PhaseRegAlloc *ra_, const Node *node) const {
+ return ::as_XMMRegister(reg(ra_, node));
+ }
+ XMMRegister as_XMMRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
+ return ::as_XMMRegister(reg(ra_, node, idx));
+ }
+#endif
+
+ virtual intptr_t constant() const;
+ virtual bool constant_is_oop() const;
+ virtual jdouble constantD() const;
+ virtual jfloat constantF() const;
+ virtual jlong constantL() const;
+ virtual TypeOopPtr *oop() const;
+ virtual int ccode() const;
+ // A zero, default, indicates this value is not needed.
+ // May need to lookup the base register, as done in int_ and ext_format
+ virtual int base (PhaseRegAlloc *ra_, const Node *node, int idx) const;
+ virtual int index(PhaseRegAlloc *ra_, const Node *node, int idx) const;
+ virtual int scale() const;
+ // Parameters needed to support MEMORY_INTERFACE access to stackSlot
+ virtual int disp (PhaseRegAlloc *ra_, const Node *node, int idx) const;
+ // Check for PC-Relative displacement
+ virtual bool disp_is_oop() const;
+ virtual int constant_disp() const; // usu. 0, may return Type::OffsetBot
+ virtual int base_position() const; // base edge position, or -1
+ virtual int index_position() const; // index edge position, or -1
+
+ // Access the TypeKlassPtr of operands with a base==RegI and disp==RegP
+ // Only returns non-null value for i486.ad's indOffset32X
+ virtual const TypePtr *disp_as_type() const { return NULL; }
+
+ // Return the label
+ virtual Label *label() const;
+
+ // Return the method's address
+ virtual intptr_t method() const;
+
+ // Hash and compare over operands are currently identical
+ virtual uint hash() const;
+ virtual uint cmp( const MachOper &oper ) const;
+
+ // Virtual clone, since I do not know how big the MachOper is.
+ virtual MachOper *clone(Compile* C) const = 0;
+
+ // Return ideal Type from simple operands. Fail for complex operands.
+ virtual const Type *type() const;
+
+ // Set an integer offset if we have one, or error otherwise
+ virtual void set_con( jint c0 ) { ShouldNotReachHere(); }
+
+#ifndef PRODUCT
+ // Return name of operand
+ virtual const char *Name() const { return "???";}
+
+ // Methods to output the text version of the operand
+ virtual void int_format(PhaseRegAlloc *,const MachNode *node, outputStream *st) const = 0;
+ virtual void ext_format(PhaseRegAlloc *,const MachNode *node,int idx, outputStream *st) const=0;
+
+ virtual void dump_spec(outputStream *st) const; // Print per-operand info
+#endif
+};
+
+//------------------------------MachNode---------------------------------------
+// Base type for all machine specific nodes. All node classes generated by the
+// ADLC inherit from this class.
+class MachNode : public Node {
+public:
+ MachNode() : Node((uint)0), _num_opnds(0), _opnds(NULL) {
+ init_class_id(Class_Mach);
+ }
+ // Required boilerplate
+ virtual uint size_of() const { return sizeof(MachNode); }
+ virtual int Opcode() const; // Always equal to MachNode
+ virtual uint rule() const = 0; // Machine-specific opcode
+ // Number of inputs which come before the first operand.
+ // Generally at least 1, to skip the Control input
+ virtual uint oper_input_base() const { return 1; }
+
+ // Copy inputs and operands to new node of instruction.
+ // Called from cisc_version() and short_branch_version().
+ // !!!! The method's body is defined in ad_<arch>.cpp file.
+ void fill_new_machnode(MachNode *n, Compile* C) const;
+
+ // Return an equivalent instruction using memory for cisc_operand position
+ virtual MachNode *cisc_version(int offset, Compile* C);
+ // Modify this instruction's register mask to use stack version for cisc_operand
+ virtual void use_cisc_RegMask();
+
+ // Support for short branches
+ virtual MachNode *short_branch_version(Compile* C) { return NULL; }
+ bool may_be_short_branch() const { return (flags() & Flag_may_be_short_branch) != 0; }
+
+ // First index in _in[] corresponding to operand, or -1 if there is none
+ int operand_index(uint operand) const;
+
+ // Register class input is expected in
+ virtual const RegMask &in_RegMask(uint) const;
+
+ // cisc-spillable instructions redefine for use by in_RegMask
+ virtual const RegMask *cisc_RegMask() const { return NULL; }
+
+ // If this instruction is a 2-address instruction, then return the
+ // index of the input which must match the output. Not nessecary
+ // for instructions which bind the input and output register to the
+ // same singleton regiser (e.g., Intel IDIV which binds AX to be
+ // both an input and an output). It is nessecary when the input and
+ // output have choices - but they must use the same choice.
+ virtual uint two_adr( ) const { return 0; }
+
+ // Array of complex operand pointers. Each corresponds to zero or
+ // more leafs. Must be set by MachNode constructor to point to an
+ // internal array of MachOpers. The MachOper array is sized by
+ // specific MachNodes described in the ADL.
+ uint _num_opnds;
+ MachOper **_opnds;
+ uint num_opnds() const { return _num_opnds; }
+
+ // Emit bytes into cbuf
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ // Size of instruction in bytes
+ virtual uint size(PhaseRegAlloc *ra_) const;
+ // Helper function that computes size by emitting code
+ virtual uint emit_size(PhaseRegAlloc *ra_) const;
+
+ // Return the alignment required (in units of relocInfo::addr_unit())
+ // for this instruction (must be a power of 2)
+ virtual int alignment_required() const { return 1; }
+
+ // Return the padding (in bytes) to be emitted before this
+ // instruction to properly align it.
+ virtual int compute_padding(int current_offset) const { return 0; }
+
+ // Return number of relocatable values contained in this instruction
+ virtual int reloc() const { return 0; }
+
+ // Return number of words used for double constants in this instruction
+ virtual int const_size() const { return 0; }
+
+ // Hash and compare over operands. Used to do GVN on machine Nodes.
+ virtual uint hash() const;
+ virtual uint cmp( const Node &n ) const;
+
+ // Expand method for MachNode, replaces nodes representing pseudo
+ // instructions with a set of nodes which represent real machine
+ // instructions and compute the same value.
+ virtual MachNode *Expand( State *, Node_List &proj_list ) { return this; }
+
+ // Bottom_type call; value comes from operand0
+ virtual const class Type *bottom_type() const { return _opnds[0]->type(); }
+ virtual uint ideal_reg() const { const Type *t = _opnds[0]->type(); return t == TypeInt::CC ? Op_RegFlags : Matcher::base2reg[t->base()]; }
+
+ // If this is a memory op, return the base pointer and fixed offset.
+ // If there are no such, return NULL. If there are multiple addresses
+ // or the address is indeterminate (rare cases) then return (Node*)-1,
+ // which serves as node bottom.
+ // If the offset is not statically determined, set it to Type::OffsetBot.
+ // This method is free to ignore stack slots if that helps.
+ #define TYPE_PTR_SENTINAL ((const TypePtr*)-1)
+ // Passing TYPE_PTR_SENTINAL as adr_type asks for computation of the adr_type if possible
+ const Node* get_base_and_disp(intptr_t &offset, const TypePtr* &adr_type) const;
+
+ // Helper for get_base_and_disp: find the base and index input nodes.
+ // Returns the MachOper as determined by memory_operand(), for use, if
+ // needed by the caller. If (MachOper *)-1 is returned, base and index
+ // are set to NodeSentinel. If (MachOper *) NULL is returned, base and
+ // index are set to NULL.
+ const MachOper* memory_inputs(Node* &base, Node* &index) const;
+
+ // Helper for memory_inputs: Which operand carries the necessary info?
+ // By default, returns NULL, which means there is no such operand.
+ // If it returns (MachOper*)-1, this means there are multiple memories.
+ virtual const MachOper* memory_operand() const { return NULL; }
+
+ // Call "get_base_and_disp" to decide which category of memory is used here.
+ virtual const class TypePtr *adr_type() const;
+
+ // Negate conditional branches. Error for non-branch Nodes
+ virtual void negate();
+
+ // Apply peephole rule(s) to this instruction
+ virtual MachNode *peephole( Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile* C );
+
+ // Check for PC-Relative addressing
+ bool is_pc_relative() const { return (flags() & Flag_is_pc_relative) != 0; }
+
+ // Top-level ideal Opcode matched
+ virtual int ideal_Opcode() const { return Op_Node; }
+
+ // Set the branch inside jump MachNodes. Error for non-branch Nodes.
+ virtual void label_set( Label& label, uint block_num );
+
+ // Adds the label for the case
+ virtual void add_case_label( int switch_val, Label* blockLabel);
+
+ // Set the absolute address for methods
+ virtual void method_set( intptr_t addr );
+
+ // Should we clone rather than spill this instruction?
+ bool rematerialize() const;
+
+ // Get the pipeline info
+ static const Pipeline *pipeline_class();
+ virtual const Pipeline *pipeline() const;
+
+#ifndef PRODUCT
+ virtual const char *Name() const = 0; // Machine-specific name
+ virtual void dump_spec(outputStream *st) const; // Print per-node info
+ void dump_format(PhaseRegAlloc *ra, outputStream *st) const; // access to virtual
+#endif
+};
+
+//------------------------------MachIdealNode----------------------------
+// Machine specific versions of nodes that must be defined by user.
+// These are not converted by matcher from ideal nodes to machine nodes
+// but are inserted into the code by the compiler.
+class MachIdealNode : public MachNode {
+public:
+ MachIdealNode( ) {}
+
+ // Define the following defaults for non-matched machine nodes
+ virtual uint oper_input_base() const { return 0; }
+ virtual uint rule() const { return 9999999; }
+ virtual const class Type *bottom_type() const { return _opnds == NULL ? Type::CONTROL : MachNode::bottom_type(); }
+};
+
+//------------------------------MachTypeNode----------------------------
+// Machine Nodes that need to retain a known Type.
+class MachTypeNode : public MachNode {
+ virtual uint size_of() const { return sizeof(*this); } // Size is bigger
+public:
+ const Type *_bottom_type;
+
+ virtual const class Type *bottom_type() const { return _bottom_type; }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachBreakpointNode----------------------------
+// Machine breakpoint or interrupt Node
+class MachBreakpointNode : public MachIdealNode {
+public:
+ MachBreakpointNode( ) {}
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual uint size(PhaseRegAlloc *ra_) const;
+
+#ifndef PRODUCT
+ virtual const char *Name() const { return "Breakpoint"; }
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachUEPNode-----------------------------------
+// Machine Unvalidated Entry Point Node
+class MachUEPNode : public MachIdealNode {
+public:
+ MachUEPNode( ) {}
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual uint size(PhaseRegAlloc *ra_) const;
+
+#ifndef PRODUCT
+ virtual const char *Name() const { return "Unvalidated-Entry-Point"; }
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachPrologNode--------------------------------
+// Machine function Prolog Node
+class MachPrologNode : public MachIdealNode {
+public:
+ MachPrologNode( ) {}
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual uint size(PhaseRegAlloc *ra_) const;
+ virtual int reloc() const;
+
+#ifndef PRODUCT
+ virtual const char *Name() const { return "Prolog"; }
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachEpilogNode--------------------------------
+// Machine function Epilog Node
+class MachEpilogNode : public MachIdealNode {
+public:
+ MachEpilogNode(bool do_poll = false) : _do_polling(do_poll) {}
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual uint size(PhaseRegAlloc *ra_) const;
+ virtual int reloc() const;
+ virtual const Pipeline *pipeline() const;
+
+private:
+ bool _do_polling;
+
+public:
+ bool do_polling() const { return _do_polling; }
+
+ // Offset of safepoint from the beginning of the node
+ int safepoint_offset() const;
+
+#ifndef PRODUCT
+ virtual const char *Name() const { return "Epilog"; }
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachNopNode-----------------------------------
+// Machine function Nop Node
+class MachNopNode : public MachIdealNode {
+private:
+ int _count;
+public:
+ MachNopNode( ) : _count(1) {}
+ MachNopNode( int count ) : _count(count) {}
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual uint size(PhaseRegAlloc *ra_) const;
+
+ virtual const class Type *bottom_type() const { return Type::CONTROL; }
+
+ virtual int ideal_Opcode() const { return Op_Con; } // bogus; see output.cpp
+ virtual const Pipeline *pipeline() const;
+#ifndef PRODUCT
+ virtual const char *Name() const { return "Nop"; }
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+ virtual void dump_spec(outputStream *st) const { } // No per-operand info
+#endif
+};
+
+//------------------------------MachSpillCopyNode------------------------------
+// Machine SpillCopy Node. Copies 1 or 2 words from any location to any
+// location (stack or register).
+class MachSpillCopyNode : public MachIdealNode {
+ const RegMask *_in; // RegMask for input
+ const RegMask *_out; // RegMask for output
+ const Type *_type;
+public:
+ MachSpillCopyNode( Node *n, const RegMask &in, const RegMask &out ) :
+ MachIdealNode(), _in(&in), _out(&out), _type(n->bottom_type()) {
+ init_class_id(Class_MachSpillCopy);
+ init_flags(Flag_is_Copy);
+ add_req(NULL);
+ add_req(n);
+ }
+ virtual uint size_of() const { return sizeof(*this); }
+ void set_out_RegMask(const RegMask &out) { _out = &out; }
+ void set_in_RegMask(const RegMask &in) { _in = &in; }
+ virtual const RegMask &out_RegMask() const { return *_out; }
+ virtual const RegMask &in_RegMask(uint) const { return *_in; }
+ virtual const class Type *bottom_type() const { return _type; }
+ virtual uint ideal_reg() const { return Matcher::base2reg[_type->base()]; }
+ virtual uint oper_input_base() const { return 1; }
+ uint implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const;
+
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual uint size(PhaseRegAlloc *ra_) const;
+
+#ifndef PRODUCT
+ virtual const char *Name() const { return "MachSpillCopy"; }
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachNullChkNode--------------------------------
+// Machine-dependent null-pointer-check Node. Points a real MachNode that is
+// also some kind of memory op. Turns the indicated MachNode into a
+// conditional branch with good latency on the ptr-not-null path and awful
+// latency on the pointer-is-null path.
+
+class MachNullCheckNode : public MachIdealNode {
+public:
+ const uint _vidx; // Index of memop being tested
+ MachNullCheckNode( Node *ctrl, Node *memop, uint vidx ) : MachIdealNode(), _vidx(vidx) {
+ init_class_id(Class_MachNullCheck);
+ init_flags(Flag_is_Branch | Flag_is_pc_relative);
+ add_req(ctrl);
+ add_req(memop);
+ }
+
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ virtual bool pinned() const { return true; };
+ virtual void negate() { }
+ virtual const class Type *bottom_type() const { return TypeTuple::IFBOTH; }
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ virtual const RegMask &in_RegMask(uint) const;
+ virtual const RegMask &out_RegMask() const { return RegMask::Empty; }
+#ifndef PRODUCT
+ virtual const char *Name() const { return "NullCheck"; }
+ virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachProjNode----------------------------------
+// Machine-dependent Ideal projections (how is that for an oxymoron). Really
+// just MachNodes made by the Ideal world that replicate simple projections
+// but with machine-dependent input & output register masks. Generally
+// produced as part of calling conventions. Normally I make MachNodes as part
+// of the Matcher process, but the Matcher is ill suited to issues involving
+// frame handling, so frame handling is all done in the Ideal world with
+// occasional callbacks to the machine model for important info.
+class MachProjNode : public ProjNode {
+public:
+ MachProjNode( Node *multi, uint con, const RegMask &out, uint ideal_reg ) : ProjNode(multi,con), _rout(out), _ideal_reg(ideal_reg) {}
+ RegMask _rout;
+ const uint _ideal_reg;
+ enum projType {
+ unmatched_proj = 0, // Projs for Control, I/O, memory not matched
+ fat_proj = 999 // Projs killing many regs, defined by _rout
+ };
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const;
+ virtual const TypePtr *adr_type() const;
+ virtual const RegMask &in_RegMask(uint) const { return RegMask::Empty; }
+ virtual const RegMask &out_RegMask() const { return _rout; }
+ virtual uint ideal_reg() const { return _ideal_reg; }
+ // Need size_of() for virtual ProjNode::clone()
+ virtual uint size_of() const { return sizeof(MachProjNode); }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachIfNode-------------------------------------
+// Machine-specific versions of IfNodes
+class MachIfNode : public MachNode {
+ virtual uint size_of() const { return sizeof(*this); } // Size is bigger
+public:
+ float _prob; // Probability branch goes either way
+ float _fcnt; // Frequency counter
+ MachIfNode() : MachNode() {
+ init_class_id(Class_MachIf);
+ }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachFastLockNode-------------------------------------
+// Machine-specific versions of FastLockNodes
+class MachFastLockNode : public MachNode {
+ virtual uint size_of() const { return sizeof(*this); } // Size is bigger
+public:
+ BiasedLockingCounters* _counters;
+
+ MachFastLockNode() : MachNode() {}
+};
+
+//------------------------------MachReturnNode--------------------------------
+// Machine-specific versions of subroutine returns
+class MachReturnNode : public MachNode {
+ virtual uint size_of() const; // Size is bigger
+public:
+ RegMask *_in_rms; // Input register masks, set during allocation
+ ReallocMark _nesting; // assertion check for reallocations
+ const TypePtr* _adr_type; // memory effects of call or return
+ MachReturnNode() : MachNode() {
+ init_class_id(Class_MachReturn);
+ _adr_type = TypePtr::BOTTOM; // the default: all of memory
+ }
+
+ void set_adr_type(const TypePtr* atp) { _adr_type = atp; }
+
+ virtual const RegMask &in_RegMask(uint) const;
+ virtual bool pinned() const { return true; };
+ virtual const TypePtr *adr_type() const;
+};
+
+//------------------------------MachSafePointNode-----------------------------
+// Machine-specific versions of safepoints
+class MachSafePointNode : public MachReturnNode {
+public:
+ OopMap* _oop_map; // Array of OopMap info (8-bit char) for GC
+ JVMState* _jvms; // Pointer to list of JVM State Objects
+ uint _jvmadj; // Extra delta to jvms indexes (mach. args)
+ OopMap* oop_map() const { return _oop_map; }
+ void set_oop_map(OopMap* om) { _oop_map = om; }
+
+ MachSafePointNode() : MachReturnNode(), _oop_map(NULL), _jvms(NULL), _jvmadj(0) {
+ init_class_id(Class_MachSafePoint);
+ init_flags(Flag_is_safepoint_node);
+ }
+
+ virtual JVMState* jvms() const { return _jvms; }
+ void set_jvms(JVMState* s) {
+ _jvms = s;
+ }
+ bool is_safepoint_node() const { return (flags() & Flag_is_safepoint_node) != 0; }
+ virtual const Type *bottom_type() const;
+
+ virtual const RegMask &in_RegMask(uint) const;
+
+ // Functionality from old debug nodes
+ Node *returnadr() const { return in(TypeFunc::ReturnAdr); }
+ Node *frameptr () const { return in(TypeFunc::FramePtr); }
+
+ Node *local(const JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(_jvmadj + jvms->locoff() + idx);
+ }
+ Node *stack(const JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(_jvmadj + jvms->stkoff() + idx);
+ }
+ Node *monitor_obj(const JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(_jvmadj + jvms->monitor_obj_offset(idx));
+ }
+ Node *monitor_box(const JVMState* jvms, uint idx) const {
+ assert(verify_jvms(jvms), "jvms must match");
+ return in(_jvmadj + jvms->monitor_box_offset(idx));
+ }
+ void set_local(const JVMState* jvms, uint idx, Node *c) {
+ assert(verify_jvms(jvms), "jvms must match");
+ set_req(_jvmadj + jvms->locoff() + idx, c);
+ }
+ void set_stack(const JVMState* jvms, uint idx, Node *c) {
+ assert(verify_jvms(jvms), "jvms must match");
+ set_req(_jvmadj + jvms->stkoff() + idx, c);
+ }
+ void set_monitor(const JVMState* jvms, uint idx, Node *c) {
+ assert(verify_jvms(jvms), "jvms must match");
+ set_req(_jvmadj + jvms->monoff() + idx, c);
+ }
+};
+
+//------------------------------MachCallNode----------------------------------
+// Machine-specific versions of subroutine calls
+class MachCallNode : public MachSafePointNode {
+protected:
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const = 0; // Size is bigger
+public:
+ const TypeFunc *_tf; // Function type
+ address _entry_point; // Address of the method being called
+ float _cnt; // Estimate of number of times called
+ uint _argsize; // Size of argument block on stack
+
+ const TypeFunc* tf() const { return _tf; }
+ const address entry_point() const { return _entry_point; }
+ const float cnt() const { return _cnt; }
+ uint argsize() const { return _argsize; }
+
+ void set_tf(const TypeFunc* tf) { _tf = tf; }
+ void set_entry_point(address p) { _entry_point = p; }
+ void set_cnt(float c) { _cnt = c; }
+ void set_argsize(int s) { _argsize = s; }
+
+ MachCallNode() : MachSafePointNode() {
+ init_class_id(Class_MachCall);
+ init_flags(Flag_is_Call);
+ }
+
+ virtual const Type *bottom_type() const;
+ virtual bool pinned() const { return false; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const RegMask &in_RegMask(uint) const;
+ virtual int ret_addr_offset() { return 0; }
+
+ bool returns_long() const { return tf()->return_type() == T_LONG; }
+ bool return_value_is_used() const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallJavaNode------------------------------
+// "Base" class for machine-specific versions of subroutine calls
+class MachCallJavaNode : public MachCallNode {
+protected:
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+public:
+ ciMethod* _method; // Method being direct called
+ int _bci; // Byte Code index of call byte code
+ bool _optimized_virtual; // Tells if node is a static call or an optimized virtual
+ MachCallJavaNode() : MachCallNode() {
+ init_class_id(Class_MachCallJava);
+ }
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallStaticJavaNode------------------------
+// Machine-specific versions of monomorphic subroutine calls
+class MachCallStaticJavaNode : public MachCallJavaNode {
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+public:
+ const char *_name; // Runtime wrapper name
+ MachCallStaticJavaNode() : MachCallJavaNode() {
+ init_class_id(Class_MachCallStaticJava);
+ }
+
+ // If this is an uncommon trap, return the request code, else zero.
+ int uncommon_trap_request() const;
+
+ virtual int ret_addr_offset();
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+ void dump_trap_args(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallDynamicJavaNode------------------------
+// Machine-specific versions of possibly megamorphic subroutine calls
+class MachCallDynamicJavaNode : public MachCallJavaNode {
+public:
+ int _vtable_index;
+ MachCallDynamicJavaNode() : MachCallJavaNode() {
+ init_class_id(Class_MachCallDynamicJava);
+ DEBUG_ONLY(_vtable_index = -99); // throw an assert if uninitialized
+ }
+ virtual int ret_addr_offset();
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallRuntimeNode----------------------------
+// Machine-specific versions of subroutine calls
+class MachCallRuntimeNode : public MachCallNode {
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+public:
+ const char *_name; // Printable name, if _method is NULL
+ MachCallRuntimeNode() : MachCallNode() {
+ init_class_id(Class_MachCallRuntime);
+ }
+ virtual int ret_addr_offset();
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+class MachCallLeafNode: public MachCallRuntimeNode {
+public:
+ MachCallLeafNode() : MachCallRuntimeNode() {
+ init_class_id(Class_MachCallLeaf);
+ }
+};
+
+//------------------------------MachHaltNode-----------------------------------
+// Machine-specific versions of halt nodes
+class MachHaltNode : public MachReturnNode {
+public:
+ virtual JVMState* jvms() const;
+};
+
+
+//------------------------------MachTempNode-----------------------------------
+// Node used by the adlc to construct inputs to represent temporary registers
+class MachTempNode : public MachNode {
+private:
+ MachOper *_opnd_array[1];
+
+public:
+ virtual const RegMask &out_RegMask() const { return *_opnds[0]->in_RegMask(0); }
+ virtual uint rule() const { return 9999999; }
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {}
+
+ MachTempNode(MachOper* oper) {
+ init_class_id(Class_MachTemp);
+ _num_opnds = 1;
+ _opnds = _opnd_array;
+ add_req(NULL);
+ _opnds[0] = oper;
+ }
+ virtual uint size_of() const { return sizeof(MachTempNode); }
+
+#ifndef PRODUCT
+ virtual void format(PhaseRegAlloc *, outputStream *st ) const {}
+ virtual const char *Name() const { return "MachTemp";}
+#endif
+};
+
+
+
+//------------------------------labelOper--------------------------------------
+// Machine-independent version of label operand
+class labelOper : public MachOper {
+private:
+ virtual uint num_edges() const { return 0; }
+public:
+ // Supported for fixed size branches
+ Label* _label; // Label for branch(es)
+
+ uint _block_num;
+
+ labelOper() : _block_num(0), _label(0) {}
+
+ labelOper(Label* label, uint block_num) : _label(label), _block_num(block_num) {}
+
+ labelOper(labelOper* l) : _label(l->_label) , _block_num(l->_block_num) {}
+
+ virtual MachOper *clone(Compile* C) const;
+
+ virtual Label *label() const { return _label; }
+
+ virtual uint opcode() const;
+
+ virtual uint hash() const;
+ virtual uint cmp( const MachOper &oper ) const;
+#ifndef PRODUCT
+ virtual const char *Name() const { return "Label";}
+
+ virtual void int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const;
+ virtual void ext_format(PhaseRegAlloc *ra, const MachNode *node, int idx, outputStream *st) const { int_format( ra, node, st ); }
+#endif
+};
+
+
+//------------------------------methodOper--------------------------------------
+// Machine-independent version of method operand
+class methodOper : public MachOper {
+private:
+ virtual uint num_edges() const { return 0; }
+public:
+ intptr_t _method; // Address of method
+ methodOper() : _method(0) {}
+ methodOper(intptr_t method) : _method(method) {}
+
+ virtual MachOper *clone(Compile* C) const;
+
+ virtual intptr_t method() const { return _method; }
+
+ virtual uint opcode() const;
+
+ virtual uint hash() const;
+ virtual uint cmp( const MachOper &oper ) const;
+#ifndef PRODUCT
+ virtual const char *Name() const { return "Method";}
+
+ virtual void int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const;
+ virtual void ext_format(PhaseRegAlloc *ra, const MachNode *node, int idx, outputStream *st) const { int_format( ra, node, st ); }
+#endif
+};
diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp
new file mode 100644
index 000000000..9ba4bc3d4
--- /dev/null
+++ b/src/share/vm/opto/macro.cpp
@@ -0,0 +1,995 @@
+/*
+ * Copyright 2005-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_macro.cpp.incl"
+
+
+//
+// Replace any references to "oldref" in inputs to "use" with "newref".
+// Returns the number of replacements made.
+//
+int PhaseMacroExpand::replace_input(Node *use, Node *oldref, Node *newref) {
+ int nreplacements = 0;
+ uint req = use->req();
+ for (uint j = 0; j < use->len(); j++) {
+ Node *uin = use->in(j);
+ if (uin == oldref) {
+ if (j < req)
+ use->set_req(j, newref);
+ else
+ use->set_prec(j, newref);
+ nreplacements++;
+ } else if (j >= req && uin == NULL) {
+ break;
+ }
+ }
+ return nreplacements;
+}
+
+void PhaseMacroExpand::copy_call_debug_info(CallNode *oldcall, CallNode * newcall) {
+ // Copy debug information and adjust JVMState information
+ uint old_dbg_start = oldcall->tf()->domain()->cnt();
+ uint new_dbg_start = newcall->tf()->domain()->cnt();
+ int jvms_adj = new_dbg_start - old_dbg_start;
+ assert (new_dbg_start == newcall->req(), "argument count mismatch");
+ for (uint i = old_dbg_start; i < oldcall->req(); i++) {
+ newcall->add_req(oldcall->in(i));
+ }
+ newcall->set_jvms(oldcall->jvms());
+ for (JVMState *jvms = newcall->jvms(); jvms != NULL; jvms = jvms->caller()) {
+ jvms->set_map(newcall);
+ jvms->set_locoff(jvms->locoff()+jvms_adj);
+ jvms->set_stkoff(jvms->stkoff()+jvms_adj);
+ jvms->set_monoff(jvms->monoff()+jvms_adj);
+ jvms->set_endoff(jvms->endoff()+jvms_adj);
+ }
+}
+
+Node* PhaseMacroExpand::opt_iff(Node* region, Node* iff) {
+ IfNode *opt_iff = transform_later(iff)->as_If();
+
+ // Fast path taken; set region slot 2
+ Node *fast_taken = transform_later( new (C, 1) IfFalseNode(opt_iff) );
+ region->init_req(2,fast_taken); // Capture fast-control
+
+ // Fast path not-taken, i.e. slow path
+ Node *slow_taken = transform_later( new (C, 1) IfTrueNode(opt_iff) );
+ return slow_taken;
+}
+
+//--------------------copy_predefined_input_for_runtime_call--------------------
+void PhaseMacroExpand::copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call) {
+ // Set fixed predefined input arguments
+ call->init_req( TypeFunc::Control, ctrl );
+ call->init_req( TypeFunc::I_O , oldcall->in( TypeFunc::I_O) );
+ call->init_req( TypeFunc::Memory , oldcall->in( TypeFunc::Memory ) ); // ?????
+ call->init_req( TypeFunc::ReturnAdr, oldcall->in( TypeFunc::ReturnAdr ) );
+ call->init_req( TypeFunc::FramePtr, oldcall->in( TypeFunc::FramePtr ) );
+}
+
+//------------------------------make_slow_call---------------------------------
+CallNode* PhaseMacroExpand::make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call, const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1) {
+
+ // Slow-path call
+ int size = slow_call_type->domain()->cnt();
+ CallNode *call = leaf_name
+ ? (CallNode*)new (C, size) CallLeafNode ( slow_call_type, slow_call, leaf_name, TypeRawPtr::BOTTOM )
+ : (CallNode*)new (C, size) CallStaticJavaNode( slow_call_type, slow_call, OptoRuntime::stub_name(slow_call), oldcall->jvms()->bci(), TypeRawPtr::BOTTOM );
+
+ // Slow path call has no side-effects, uses few values
+ copy_predefined_input_for_runtime_call(slow_path, oldcall, call );
+ if (parm0 != NULL) call->init_req(TypeFunc::Parms+0, parm0);
+ if (parm1 != NULL) call->init_req(TypeFunc::Parms+1, parm1);
+ copy_call_debug_info(oldcall, call);
+ call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
+ _igvn.hash_delete(oldcall);
+ _igvn.subsume_node(oldcall, call);
+ transform_later(call);
+
+ return call;
+}
+
+void PhaseMacroExpand::extract_call_projections(CallNode *call) {
+ _fallthroughproj = NULL;
+ _fallthroughcatchproj = NULL;
+ _ioproj_fallthrough = NULL;
+ _ioproj_catchall = NULL;
+ _catchallcatchproj = NULL;
+ _memproj_fallthrough = NULL;
+ _memproj_catchall = NULL;
+ _resproj = NULL;
+ for (DUIterator_Fast imax, i = call->fast_outs(imax); i < imax; i++) {
+ ProjNode *pn = call->fast_out(i)->as_Proj();
+ switch (pn->_con) {
+ case TypeFunc::Control:
+ {
+ // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj
+ _fallthroughproj = pn;
+ DUIterator_Fast jmax, j = pn->fast_outs(jmax);
+ const Node *cn = pn->fast_out(j);
+ if (cn->is_Catch()) {
+ ProjNode *cpn = NULL;
+ for (DUIterator_Fast kmax, k = cn->fast_outs(kmax); k < kmax; k++) {
+ cpn = cn->fast_out(k)->as_Proj();
+ assert(cpn->is_CatchProj(), "must be a CatchProjNode");
+ if (cpn->_con == CatchProjNode::fall_through_index)
+ _fallthroughcatchproj = cpn;
+ else {
+ assert(cpn->_con == CatchProjNode::catch_all_index, "must be correct index.");
+ _catchallcatchproj = cpn;
+ }
+ }
+ }
+ break;
+ }
+ case TypeFunc::I_O:
+ if (pn->_is_io_use)
+ _ioproj_catchall = pn;
+ else
+ _ioproj_fallthrough = pn;
+ break;
+ case TypeFunc::Memory:
+ if (pn->_is_io_use)
+ _memproj_catchall = pn;
+ else
+ _memproj_fallthrough = pn;
+ break;
+ case TypeFunc::Parms:
+ _resproj = pn;
+ break;
+ default:
+ assert(false, "unexpected projection from allocation node.");
+ }
+ }
+
+}
+
+
+//---------------------------set_eden_pointers-------------------------
+void PhaseMacroExpand::set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_adr) {
+ if (UseTLAB) { // Private allocation: load from TLS
+ Node* thread = transform_later(new (C, 1) ThreadLocalNode());
+ int tlab_top_offset = in_bytes(JavaThread::tlab_top_offset());
+ int tlab_end_offset = in_bytes(JavaThread::tlab_end_offset());
+ eden_top_adr = basic_plus_adr(top()/*not oop*/, thread, tlab_top_offset);
+ eden_end_adr = basic_plus_adr(top()/*not oop*/, thread, tlab_end_offset);
+ } else { // Shared allocation: load from globals
+ CollectedHeap* ch = Universe::heap();
+ address top_adr = (address)ch->top_addr();
+ address end_adr = (address)ch->end_addr();
+ eden_top_adr = makecon(TypeRawPtr::make(top_adr));
+ eden_end_adr = basic_plus_adr(eden_top_adr, end_adr - top_adr);
+ }
+}
+
+
+Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) {
+ Node* adr = basic_plus_adr(base, offset);
+ const TypePtr* adr_type = TypeRawPtr::BOTTOM;
+ Node* value = LoadNode::make(C, ctl, mem, adr, adr_type, value_type, bt);
+ transform_later(value);
+ return value;
+}
+
+
+Node* PhaseMacroExpand::make_store(Node* ctl, Node* mem, Node* base, int offset, Node* value, BasicType bt) {
+ Node* adr = basic_plus_adr(base, offset);
+ mem = StoreNode::make(C, ctl, mem, adr, NULL, value, bt);
+ transform_later(mem);
+ return mem;
+}
+
+//=============================================================================
+//
+// A L L O C A T I O N
+//
+// Allocation attempts to be fast in the case of frequent small objects.
+// It breaks down like this:
+//
+// 1) Size in doublewords is computed. This is a constant for objects and
+// variable for most arrays. Doubleword units are used to avoid size
+// overflow of huge doubleword arrays. We need doublewords in the end for
+// rounding.
+//
+// 2) Size is checked for being 'too large'. Too-large allocations will go
+// the slow path into the VM. The slow path can throw any required
+// exceptions, and does all the special checks for very large arrays. The
+// size test can constant-fold away for objects. For objects with
+// finalizers it constant-folds the otherway: you always go slow with
+// finalizers.
+//
+// 3) If NOT using TLABs, this is the contended loop-back point.
+// Load-Locked the heap top. If using TLABs normal-load the heap top.
+//
+// 4) Check that heap top + size*8 < max. If we fail go the slow ` route.
+// NOTE: "top+size*8" cannot wrap the 4Gig line! Here's why: for largish
+// "size*8" we always enter the VM, where "largish" is a constant picked small
+// enough that there's always space between the eden max and 4Gig (old space is
+// there so it's quite large) and large enough that the cost of entering the VM
+// is dwarfed by the cost to initialize the space.
+//
+// 5) If NOT using TLABs, Store-Conditional the adjusted heap top back
+// down. If contended, repeat at step 3. If using TLABs normal-store
+// adjusted heap top back down; there is no contention.
+//
+// 6) If !ZeroTLAB then Bulk-clear the object/array. Fill in klass & mark
+// fields.
+//
+// 7) Merge with the slow-path; cast the raw memory pointer to the correct
+// oop flavor.
+//
+//=============================================================================
+// FastAllocateSizeLimit value is in DOUBLEWORDS.
+// Allocations bigger than this always go the slow route.
+// This value must be small enough that allocation attempts that need to
+// trigger exceptions go the slow route. Also, it must be small enough so
+// that heap_top + size_in_bytes does not wrap around the 4Gig limit.
+//=============================================================================j//
+// %%% Here is an old comment from parseHelper.cpp; is it outdated?
+// The allocator will coalesce int->oop copies away. See comment in
+// coalesce.cpp about how this works. It depends critically on the exact
+// code shape produced here, so if you are changing this code shape
+// make sure the GC info for the heap-top is correct in and around the
+// slow-path call.
+//
+
+void PhaseMacroExpand::expand_allocate_common(
+ AllocateNode* alloc, // allocation node to be expanded
+ Node* length, // array length for an array allocation
+ const TypeFunc* slow_call_type, // Type of slow call
+ address slow_call_address // Address of slow call
+ )
+{
+
+ Node* ctrl = alloc->in(TypeFunc::Control);
+ Node* mem = alloc->in(TypeFunc::Memory);
+ Node* i_o = alloc->in(TypeFunc::I_O);
+ Node* size_in_bytes = alloc->in(AllocateNode::AllocSize);
+ Node* klass_node = alloc->in(AllocateNode::KlassNode);
+ Node* initial_slow_test = alloc->in(AllocateNode::InitialTest);
+
+ Node* eden_top_adr;
+ Node* eden_end_adr;
+ set_eden_pointers(eden_top_adr, eden_end_adr);
+
+ uint raw_idx = C->get_alias_index(TypeRawPtr::BOTTOM);
+ assert(ctrl != NULL, "must have control");
+
+ // Load Eden::end. Loop invariant and hoisted.
+ //
+ // Note: We set the control input on "eden_end" and "old_eden_top" when using
+ // a TLAB to work around a bug where these values were being moved across
+ // a safepoint. These are not oops, so they cannot be include in the oop
+ // map, but the can be changed by a GC. The proper way to fix this would
+ // be to set the raw memory state when generating a SafepointNode. However
+ // this will require extensive changes to the loop optimization in order to
+ // prevent a degradation of the optimization.
+ // See comment in memnode.hpp, around line 227 in class LoadPNode.
+ Node* eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
+
+ // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
+ // they will not be used if "always_slow" is set
+ enum { slow_result_path = 1, fast_result_path = 2 };
+ Node *result_region;
+ Node *result_phi_rawmem;
+ Node *result_phi_rawoop;
+ Node *result_phi_i_o;
+
+ // The initial slow comparison is a size check, the comparison
+ // we want to do is a BoolTest::gt
+ bool always_slow = false;
+ int tv = _igvn.find_int_con(initial_slow_test, -1);
+ if (tv >= 0) {
+ always_slow = (tv == 1);
+ initial_slow_test = NULL;
+ } else {
+ initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn);
+ }
+
+ if (DTraceAllocProbes) {
+ // Force slow-path allocation
+ always_slow = true;
+ initial_slow_test = NULL;
+ }
+
+ enum { too_big_or_final_path = 1, need_gc_path = 2 };
+ Node *slow_region = NULL;
+ Node *toobig_false = ctrl;
+
+ assert (initial_slow_test == NULL || !always_slow, "arguments must be consistent");
+ // generate the initial test if necessary
+ if (initial_slow_test != NULL ) {
+ slow_region = new (C, 3) RegionNode(3);
+
+ // Now make the initial failure test. Usually a too-big test but
+ // might be a TRUE for finalizers or a fancy class check for
+ // newInstance0.
+ IfNode *toobig_iff = new (C, 2) IfNode(ctrl, initial_slow_test, PROB_MIN, COUNT_UNKNOWN);
+ transform_later(toobig_iff);
+ // Plug the failing-too-big test into the slow-path region
+ Node *toobig_true = new (C, 1) IfTrueNode( toobig_iff );
+ transform_later(toobig_true);
+ slow_region ->init_req( too_big_or_final_path, toobig_true );
+ toobig_false = new (C, 1) IfFalseNode( toobig_iff );
+ transform_later(toobig_false);
+ } else { // No initial test, just fall into next case
+ toobig_false = ctrl;
+ debug_only(slow_region = NodeSentinel);
+ }
+
+ Node *slow_mem = mem; // save the current memory state for slow path
+ // generate the fast allocation code unless we know that the initial test will always go slow
+ if (!always_slow) {
+ // allocate the Region and Phi nodes for the result
+ result_region = new (C, 3) RegionNode(3);
+ result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM );
+ result_phi_rawoop = new (C, 3) PhiNode( result_region, TypeRawPtr::BOTTOM );
+ result_phi_i_o = new (C, 3) PhiNode( result_region, Type::ABIO ); // I/O is used for Prefetch
+
+ // We need a Region for the loop-back contended case.
+ enum { fall_in_path = 1, contended_loopback_path = 2 };
+ Node *contended_region;
+ Node *contended_phi_rawmem;
+ if( UseTLAB ) {
+ contended_region = toobig_false;
+ contended_phi_rawmem = mem;
+ } else {
+ contended_region = new (C, 3) RegionNode(3);
+ contended_phi_rawmem = new (C, 3) PhiNode( contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
+ // Now handle the passing-too-big test. We fall into the contended
+ // loop-back merge point.
+ contended_region ->init_req( fall_in_path, toobig_false );
+ contended_phi_rawmem->init_req( fall_in_path, mem );
+ transform_later(contended_region);
+ transform_later(contended_phi_rawmem);
+ }
+
+ // Load(-locked) the heap top.
+ // See note above concerning the control input when using a TLAB
+ Node *old_eden_top = UseTLAB
+ ? new (C, 3) LoadPNode ( ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM )
+ : new (C, 3) LoadPLockedNode( contended_region, contended_phi_rawmem, eden_top_adr );
+
+ transform_later(old_eden_top);
+ // Add to heap top to get a new heap top
+ Node *new_eden_top = new (C, 4) AddPNode( top(), old_eden_top, size_in_bytes );
+ transform_later(new_eden_top);
+ // Check for needing a GC; compare against heap end
+ Node *needgc_cmp = new (C, 3) CmpPNode( new_eden_top, eden_end );
+ transform_later(needgc_cmp);
+ Node *needgc_bol = new (C, 2) BoolNode( needgc_cmp, BoolTest::ge );
+ transform_later(needgc_bol);
+ IfNode *needgc_iff = new (C, 2) IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN );
+ transform_later(needgc_iff);
+
+ // Plug the failing-heap-space-need-gc test into the slow-path region
+ Node *needgc_true = new (C, 1) IfTrueNode( needgc_iff );
+ transform_later(needgc_true);
+ if( initial_slow_test ) {
+ slow_region ->init_req( need_gc_path, needgc_true );
+ // This completes all paths into the slow merge point
+ transform_later(slow_region);
+ } else { // No initial slow path needed!
+ // Just fall from the need-GC path straight into the VM call.
+ slow_region = needgc_true;
+ }
+ // No need for a GC. Setup for the Store-Conditional
+ Node *needgc_false = new (C, 1) IfFalseNode( needgc_iff );
+ transform_later(needgc_false);
+
+ // Grab regular I/O before optional prefetch may change it.
+ // Slow-path does no I/O so just set it to the original I/O.
+ result_phi_i_o->init_req( slow_result_path, i_o );
+
+ i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
+ old_eden_top, new_eden_top, length);
+
+ // Store (-conditional) the modified eden top back down.
+ // StorePConditional produces flags for a test PLUS a modified raw
+ // memory state.
+ Node *store_eden_top;
+ Node *fast_oop_ctrl;
+ if( UseTLAB ) {
+ store_eden_top = new (C, 4) StorePNode( needgc_false, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, new_eden_top );
+ transform_later(store_eden_top);
+ fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
+ } else {
+ store_eden_top = new (C, 5) StorePConditionalNode( needgc_false, contended_phi_rawmem, eden_top_adr, new_eden_top, old_eden_top );
+ transform_later(store_eden_top);
+ Node *contention_check = new (C, 2) BoolNode( store_eden_top, BoolTest::ne );
+ transform_later(contention_check);
+ store_eden_top = new (C, 1) SCMemProjNode(store_eden_top);
+ transform_later(store_eden_top);
+
+ // If not using TLABs, check to see if there was contention.
+ IfNode *contention_iff = new (C, 2) IfNode ( needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN );
+ transform_later(contention_iff);
+ Node *contention_true = new (C, 1) IfTrueNode( contention_iff );
+ transform_later(contention_true);
+ // If contention, loopback and try again.
+ contended_region->init_req( contended_loopback_path, contention_true );
+ contended_phi_rawmem->init_req( contended_loopback_path, store_eden_top );
+
+ // Fast-path succeeded with no contention!
+ Node *contention_false = new (C, 1) IfFalseNode( contention_iff );
+ transform_later(contention_false);
+ fast_oop_ctrl = contention_false;
+ }
+
+ // Rename successful fast-path variables to make meaning more obvious
+ Node* fast_oop = old_eden_top;
+ Node* fast_oop_rawmem = store_eden_top;
+ fast_oop_rawmem = initialize_object(alloc,
+ fast_oop_ctrl, fast_oop_rawmem, fast_oop,
+ klass_node, length, size_in_bytes);
+
+ if (ExtendedDTraceProbes) {
+ // Slow-path call
+ int size = TypeFunc::Parms + 2;
+ CallLeafNode *call = new (C, size) CallLeafNode(OptoRuntime::dtrace_object_alloc_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc_base),
+ "dtrace_object_alloc",
+ TypeRawPtr::BOTTOM);
+
+ // Get base of thread-local storage area
+ Node* thread = new (C, 1) ThreadLocalNode();
+ transform_later(thread);
+
+ call->init_req(TypeFunc::Parms+0, thread);
+ call->init_req(TypeFunc::Parms+1, fast_oop);
+ call->init_req( TypeFunc::Control, fast_oop_ctrl );
+ call->init_req( TypeFunc::I_O , top() ) ; // does no i/o
+ call->init_req( TypeFunc::Memory , fast_oop_rawmem );
+ call->init_req( TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr) );
+ call->init_req( TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr) );
+ transform_later(call);
+ fast_oop_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control);
+ transform_later(fast_oop_ctrl);
+ fast_oop_rawmem = new (C, 1) ProjNode(call,TypeFunc::Memory);
+ transform_later(fast_oop_rawmem);
+ }
+
+ // Plug in the successful fast-path into the result merge point
+ result_region ->init_req( fast_result_path, fast_oop_ctrl );
+ result_phi_rawoop->init_req( fast_result_path, fast_oop );
+ result_phi_i_o ->init_req( fast_result_path, i_o );
+ result_phi_rawmem->init_req( fast_result_path, fast_oop_rawmem );
+ } else {
+ slow_region = ctrl;
+ }
+
+ // Generate slow-path call
+ CallNode *call = new (C, slow_call_type->domain()->cnt())
+ CallStaticJavaNode(slow_call_type, slow_call_address,
+ OptoRuntime::stub_name(slow_call_address),
+ alloc->jvms()->bci(),
+ TypePtr::BOTTOM);
+ call->init_req( TypeFunc::Control, slow_region );
+ call->init_req( TypeFunc::I_O , top() ) ; // does no i/o
+ call->init_req( TypeFunc::Memory , slow_mem ); // may gc ptrs
+ call->init_req( TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr) );
+ call->init_req( TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr) );
+
+ call->init_req(TypeFunc::Parms+0, klass_node);
+ if (length != NULL) {
+ call->init_req(TypeFunc::Parms+1, length);
+ }
+
+ // Copy debug information and adjust JVMState information, then replace
+ // allocate node with the call
+ copy_call_debug_info((CallNode *) alloc, call);
+ if (!always_slow) {
+ call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
+ }
+ _igvn.hash_delete(alloc);
+ _igvn.subsume_node(alloc, call);
+ transform_later(call);
+
+ // Identify the output projections from the allocate node and
+ // adjust any references to them.
+ // The control and io projections look like:
+ //
+ // v---Proj(ctrl) <-----+ v---CatchProj(ctrl)
+ // Allocate Catch
+ // ^---Proj(io) <-------+ ^---CatchProj(io)
+ //
+ // We are interested in the CatchProj nodes.
+ //
+ extract_call_projections(call);
+
+ // An allocate node has separate memory projections for the uses on the control and i_o paths
+ // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call)
+ if (!always_slow && _memproj_fallthrough != NULL) {
+ for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) {
+ Node *use = _memproj_fallthrough->fast_out(i);
+ _igvn.hash_delete(use);
+ imax -= replace_input(use, _memproj_fallthrough, result_phi_rawmem);
+ _igvn._worklist.push(use);
+ // back up iterator
+ --i;
+ }
+ }
+ // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so
+ // we end up with a call that has only 1 memory projection
+ if (_memproj_catchall != NULL ) {
+ if (_memproj_fallthrough == NULL) {
+ _memproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::Memory);
+ transform_later(_memproj_fallthrough);
+ }
+ for (DUIterator_Fast imax, i = _memproj_catchall->fast_outs(imax); i < imax; i++) {
+ Node *use = _memproj_catchall->fast_out(i);
+ _igvn.hash_delete(use);
+ imax -= replace_input(use, _memproj_catchall, _memproj_fallthrough);
+ _igvn._worklist.push(use);
+ // back up iterator
+ --i;
+ }
+ }
+
+ mem = result_phi_rawmem;
+
+ // An allocate node has separate i_o projections for the uses on the control and i_o paths
+ // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call)
+ if (_ioproj_fallthrough == NULL) {
+ _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
+ transform_later(_ioproj_fallthrough);
+ } else if (!always_slow) {
+ for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) {
+ Node *use = _ioproj_fallthrough->fast_out(i);
+
+ _igvn.hash_delete(use);
+ imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o);
+ _igvn._worklist.push(use);
+ // back up iterator
+ --i;
+ }
+ }
+ // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so
+ // we end up with a call that has only 1 control projection
+ if (_ioproj_catchall != NULL ) {
+ for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) {
+ Node *use = _ioproj_catchall->fast_out(i);
+ _igvn.hash_delete(use);
+ imax -= replace_input(use, _ioproj_catchall, _ioproj_fallthrough);
+ _igvn._worklist.push(use);
+ // back up iterator
+ --i;
+ }
+ }
+
+ // if we generated only a slow call, we are done
+ if (always_slow)
+ return;
+
+
+ if (_fallthroughcatchproj != NULL) {
+ ctrl = _fallthroughcatchproj->clone();
+ transform_later(ctrl);
+ _igvn.hash_delete(_fallthroughcatchproj);
+ _igvn.subsume_node(_fallthroughcatchproj, result_region);
+ } else {
+ ctrl = top();
+ }
+ Node *slow_result;
+ if (_resproj == NULL) {
+ // no uses of the allocation result
+ slow_result = top();
+ } else {
+ slow_result = _resproj->clone();
+ transform_later(slow_result);
+ _igvn.hash_delete(_resproj);
+ _igvn.subsume_node(_resproj, result_phi_rawoop);
+ }
+
+ // Plug slow-path into result merge point
+ result_region ->init_req( slow_result_path, ctrl );
+ result_phi_rawoop->init_req( slow_result_path, slow_result);
+ result_phi_rawmem->init_req( slow_result_path, _memproj_fallthrough );
+ transform_later(result_region);
+ transform_later(result_phi_rawoop);
+ transform_later(result_phi_rawmem);
+ transform_later(result_phi_i_o);
+ // This completes all paths into the result merge point
+}
+
+
+// Helper for PhaseMacroExpand::expand_allocate_common.
+// Initializes the newly-allocated storage.
+Node*
+PhaseMacroExpand::initialize_object(AllocateNode* alloc,
+ Node* control, Node* rawmem, Node* object,
+ Node* klass_node, Node* length,
+ Node* size_in_bytes) {
+ InitializeNode* init = alloc->initialization();
+ // Store the klass & mark bits
+ Node* mark_node = NULL;
+ // For now only enable fast locking for non-array types
+ if (UseBiasedLocking && (length == NULL)) {
+ mark_node = make_load(NULL, rawmem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeRawPtr::BOTTOM, T_ADDRESS);
+ } else {
+ mark_node = makecon(TypeRawPtr::make((address)markOopDesc::prototype()));
+ }
+ rawmem = make_store(control, rawmem, object, oopDesc::mark_offset_in_bytes(), mark_node, T_ADDRESS);
+ rawmem = make_store(control, rawmem, object, oopDesc::klass_offset_in_bytes(), klass_node, T_OBJECT);
+ int header_size = alloc->minimum_header_size(); // conservatively small
+
+ // Array length
+ if (length != NULL) { // Arrays need length field
+ rawmem = make_store(control, rawmem, object, arrayOopDesc::length_offset_in_bytes(), length, T_INT);
+ // conservatively small header size:
+ header_size = sizeof(arrayOopDesc);
+ ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass();
+ if (k->is_array_klass()) // we know the exact header size in most cases:
+ header_size = Klass::layout_helper_header_size(k->layout_helper());
+ }
+
+ // Clear the object body, if necessary.
+ if (init == NULL) {
+ // The init has somehow disappeared; be cautious and clear everything.
+ //
+ // This can happen if a node is allocated but an uncommon trap occurs
+ // immediately. In this case, the Initialize gets associated with the
+ // trap, and may be placed in a different (outer) loop, if the Allocate
+ // is in a loop. If (this is rare) the inner loop gets unrolled, then
+ // there can be two Allocates to one Initialize. The answer in all these
+ // edge cases is safety first. It is always safe to clear immediately
+ // within an Allocate, and then (maybe or maybe not) clear some more later.
+ if (!ZeroTLAB)
+ rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
+ header_size, size_in_bytes,
+ &_igvn);
+ } else {
+ if (!init->is_complete()) {
+ // Try to win by zeroing only what the init does not store.
+ // We can also try to do some peephole optimizations,
+ // such as combining some adjacent subword stores.
+ rawmem = init->complete_stores(control, rawmem, object,
+ header_size, size_in_bytes, &_igvn);
+ }
+
+ // We have no more use for this link, since the AllocateNode goes away:
+ init->set_req(InitializeNode::RawAddress, top());
+ // (If we keep the link, it just confuses the register allocator,
+ // who thinks he sees a real use of the address by the membar.)
+ }
+
+ return rawmem;
+}
+
+// Generate prefetch instructions for next allocations.
+Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
+ Node*& contended_phi_rawmem,
+ Node* old_eden_top, Node* new_eden_top,
+ Node* length) {
+ if( UseTLAB && AllocatePrefetchStyle == 2 ) {
+ // Generate prefetch allocation with watermark check.
+ // As an allocation hits the watermark, we will prefetch starting
+ // at a "distance" away from watermark.
+ enum { fall_in_path = 1, pf_path = 2 };
+
+ Node *pf_region = new (C, 3) RegionNode(3);
+ Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
+ TypeRawPtr::BOTTOM );
+ // I/O is used for Prefetch
+ Node *pf_phi_abio = new (C, 3) PhiNode( pf_region, Type::ABIO );
+
+ Node *thread = new (C, 1) ThreadLocalNode();
+ transform_later(thread);
+
+ Node *eden_pf_adr = new (C, 4) AddPNode( top()/*not oop*/, thread,
+ _igvn.MakeConX(in_bytes(JavaThread::tlab_pf_top_offset())) );
+ transform_later(eden_pf_adr);
+
+ Node *old_pf_wm = new (C, 3) LoadPNode( needgc_false,
+ contended_phi_rawmem, eden_pf_adr,
+ TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM );
+ transform_later(old_pf_wm);
+
+ // check against new_eden_top
+ Node *need_pf_cmp = new (C, 3) CmpPNode( new_eden_top, old_pf_wm );
+ transform_later(need_pf_cmp);
+ Node *need_pf_bol = new (C, 2) BoolNode( need_pf_cmp, BoolTest::ge );
+ transform_later(need_pf_bol);
+ IfNode *need_pf_iff = new (C, 2) IfNode( needgc_false, need_pf_bol,
+ PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN );
+ transform_later(need_pf_iff);
+
+ // true node, add prefetchdistance
+ Node *need_pf_true = new (C, 1) IfTrueNode( need_pf_iff );
+ transform_later(need_pf_true);
+
+ Node *need_pf_false = new (C, 1) IfFalseNode( need_pf_iff );
+ transform_later(need_pf_false);
+
+ Node *new_pf_wmt = new (C, 4) AddPNode( top(), old_pf_wm,
+ _igvn.MakeConX(AllocatePrefetchDistance) );
+ transform_later(new_pf_wmt );
+ new_pf_wmt->set_req(0, need_pf_true);
+
+ Node *store_new_wmt = new (C, 4) StorePNode( need_pf_true,
+ contended_phi_rawmem, eden_pf_adr,
+ TypeRawPtr::BOTTOM, new_pf_wmt );
+ transform_later(store_new_wmt);
+
+ // adding prefetches
+ pf_phi_abio->init_req( fall_in_path, i_o );
+
+ Node *prefetch_adr;
+ Node *prefetch;
+ uint lines = AllocatePrefetchDistance / AllocatePrefetchStepSize;
+ uint step_size = AllocatePrefetchStepSize;
+ uint distance = 0;
+
+ for ( uint i = 0; i < lines; i++ ) {
+ prefetch_adr = new (C, 4) AddPNode( old_pf_wm, new_pf_wmt,
+ _igvn.MakeConX(distance) );
+ transform_later(prefetch_adr);
+ prefetch = new (C, 3) PrefetchWriteNode( i_o, prefetch_adr );
+ transform_later(prefetch);
+ distance += step_size;
+ i_o = prefetch;
+ }
+ pf_phi_abio->set_req( pf_path, i_o );
+
+ pf_region->init_req( fall_in_path, need_pf_false );
+ pf_region->init_req( pf_path, need_pf_true );
+
+ pf_phi_rawmem->init_req( fall_in_path, contended_phi_rawmem );
+ pf_phi_rawmem->init_req( pf_path, store_new_wmt );
+
+ transform_later(pf_region);
+ transform_later(pf_phi_rawmem);
+ transform_later(pf_phi_abio);
+
+ needgc_false = pf_region;
+ contended_phi_rawmem = pf_phi_rawmem;
+ i_o = pf_phi_abio;
+ } else if( AllocatePrefetchStyle > 0 ) {
+ // Insert a prefetch for each allocation only on the fast-path
+ Node *prefetch_adr;
+ Node *prefetch;
+ // Generate several prefetch instructions only for arrays.
+ uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
+ uint step_size = AllocatePrefetchStepSize;
+ uint distance = AllocatePrefetchDistance;
+ for ( uint i = 0; i < lines; i++ ) {
+ prefetch_adr = new (C, 4) AddPNode( old_eden_top, new_eden_top,
+ _igvn.MakeConX(distance) );
+ transform_later(prefetch_adr);
+ prefetch = new (C, 3) PrefetchWriteNode( i_o, prefetch_adr );
+ // Do not let it float too high, since if eden_top == eden_end,
+ // both might be null.
+ if( i == 0 ) { // Set control for first prefetch, next follows it
+ prefetch->init_req(0, needgc_false);
+ }
+ transform_later(prefetch);
+ distance += step_size;
+ i_o = prefetch;
+ }
+ }
+ return i_o;
+}
+
+
+void PhaseMacroExpand::expand_allocate(AllocateNode *alloc) {
+ expand_allocate_common(alloc, NULL,
+ OptoRuntime::new_instance_Type(),
+ OptoRuntime::new_instance_Java());
+}
+
+void PhaseMacroExpand::expand_allocate_array(AllocateArrayNode *alloc) {
+ Node* length = alloc->in(AllocateNode::ALength);
+ expand_allocate_common(alloc, length,
+ OptoRuntime::new_array_Type(),
+ OptoRuntime::new_array_Java());
+}
+
+
+// we have determined that this lock/unlock can be eliminated, we simply
+// eliminate the node without expanding it.
+//
+// Note: The membar's associated with the lock/unlock are currently not
+// eliminated. This should be investigated as a future enhancement.
+//
+void PhaseMacroExpand::eliminate_locking_node(AbstractLockNode *alock) {
+ Node* mem = alock->in(TypeFunc::Memory);
+
+ // The memory projection from a lock/unlock is RawMem
+ // The input to a Lock is merged memory, so extract its RawMem input
+ // (unless the MergeMem has been optimized away.)
+ if (alock->is_Lock()) {
+ if (mem->is_MergeMem())
+ mem = mem->as_MergeMem()->in(Compile::AliasIdxRaw);
+ }
+
+ extract_call_projections(alock);
+ // There are 2 projections from the lock. The lock node will
+ // be deleted when its last use is subsumed below.
+ assert(alock->outcnt() == 2 && _fallthroughproj != NULL &&
+ _memproj_fallthrough != NULL, "Unexpected projections from Lock/Unlock");
+ _igvn.hash_delete(_fallthroughproj);
+ _igvn.subsume_node(_fallthroughproj, alock->in(TypeFunc::Control));
+ _igvn.hash_delete(_memproj_fallthrough);
+ _igvn.subsume_node(_memproj_fallthrough, mem);
+ return;
+}
+
+
+//------------------------------expand_lock_node----------------------
+void PhaseMacroExpand::expand_lock_node(LockNode *lock) {
+
+ Node* ctrl = lock->in(TypeFunc::Control);
+ Node* mem = lock->in(TypeFunc::Memory);
+ Node* obj = lock->obj_node();
+ Node* box = lock->box_node();
+ Node *flock = lock->fastlock_node();
+
+ if (lock->is_eliminated()) {
+ eliminate_locking_node(lock);
+ return;
+ }
+
+ // Make the merge point
+ Node *region = new (C, 3) RegionNode(3);
+
+ Node *bol = transform_later(new (C, 2) BoolNode(flock,BoolTest::ne));
+ Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
+ // Optimize test; set region slot 2
+ Node *slow_path = opt_iff(region,iff);
+
+ // Make slow path call
+ CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box );
+
+ extract_call_projections(call);
+
+ // Slow path can only throw asynchronous exceptions, which are always
+ // de-opted. So the compiler thinks the slow-call can never throw an
+ // exception. If it DOES throw an exception we would need the debug
+ // info removed first (since if it throws there is no monitor).
+ assert ( _ioproj_fallthrough == NULL && _ioproj_catchall == NULL &&
+ _memproj_catchall == NULL && _catchallcatchproj == NULL, "Unexpected projection from Lock");
+
+ // Capture slow path
+ // disconnect fall-through projection from call and create a new one
+ // hook up users of fall-through projection to region
+ Node *slow_ctrl = _fallthroughproj->clone();
+ transform_later(slow_ctrl);
+ _igvn.hash_delete(_fallthroughproj);
+ _fallthroughproj->disconnect_inputs(NULL);
+ region->init_req(1, slow_ctrl);
+ // region inputs are now complete
+ transform_later(region);
+ _igvn.subsume_node(_fallthroughproj, region);
+
+ // create a Phi for the memory state
+ Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+ Node *memproj = transform_later( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+ mem_phi->init_req(1, memproj );
+ mem_phi->init_req(2, mem);
+ transform_later(mem_phi);
+ _igvn.hash_delete(_memproj_fallthrough);
+ _igvn.subsume_node(_memproj_fallthrough, mem_phi);
+
+
+}
+
+//------------------------------expand_unlock_node----------------------
+void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) {
+
+ Node *ctrl = unlock->in(TypeFunc::Control);
+ Node* mem = unlock->in(TypeFunc::Memory);
+ Node* obj = unlock->obj_node();
+ Node* box = unlock->box_node();
+
+
+ if (unlock->is_eliminated()) {
+ eliminate_locking_node(unlock);
+ return;
+ }
+
+ // No need for a null check on unlock
+
+ // Make the merge point
+ RegionNode *region = new (C, 3) RegionNode(3);
+
+ FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box );
+ funlock = transform_later( funlock )->as_FastUnlock();
+ Node *bol = transform_later(new (C, 2) BoolNode(funlock,BoolTest::ne));
+ Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
+ // Optimize test; set region slot 2
+ Node *slow_path = opt_iff(region,iff);
+
+ CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box );
+
+ extract_call_projections(call);
+
+ assert ( _ioproj_fallthrough == NULL && _ioproj_catchall == NULL &&
+ _memproj_catchall == NULL && _catchallcatchproj == NULL, "Unexpected projection from Lock");
+
+ // No exceptions for unlocking
+ // Capture slow path
+ // disconnect fall-through projection from call and create a new one
+ // hook up users of fall-through projection to region
+ Node *slow_ctrl = _fallthroughproj->clone();
+ transform_later(slow_ctrl);
+ _igvn.hash_delete(_fallthroughproj);
+ _fallthroughproj->disconnect_inputs(NULL);
+ region->init_req(1, slow_ctrl);
+ // region inputs are now complete
+ transform_later(region);
+ _igvn.subsume_node(_fallthroughproj, region);
+
+ // create a Phi for the memory state
+ Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+ Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
+ mem_phi->init_req(1, memproj );
+ mem_phi->init_req(2, mem);
+ transform_later(mem_phi);
+ _igvn.hash_delete(_memproj_fallthrough);
+ _igvn.subsume_node(_memproj_fallthrough, mem_phi);
+
+
+}
+
+//------------------------------expand_macro_nodes----------------------
+// Returns true if a failure occurred.
+bool PhaseMacroExpand::expand_macro_nodes() {
+ if (C->macro_count() == 0)
+ return false;
+ // Make sure expansion will not cause node limit to be exceeded. Worst case is a
+ // macro node gets expanded into about 50 nodes. Allow 50% more for optimization
+ if (C->check_node_count(C->macro_count() * 75, "out of nodes before macro expansion" ) )
+ return true;
+ // expand "macro" nodes
+ // nodes are removed from the macro list as they are processed
+ while (C->macro_count() > 0) {
+ Node * n = C->macro_node(0);
+ assert(n->is_macro(), "only macro nodes expected here");
+ if (_igvn.type(n) == Type::TOP || n->in(0)->is_top() ) {
+ // node is unreachable, so don't try to expand it
+ C->remove_macro_node(n);
+ continue;
+ }
+ switch (n->class_id()) {
+ case Node::Class_Allocate:
+ expand_allocate(n->as_Allocate());
+ break;
+ case Node::Class_AllocateArray:
+ expand_allocate_array(n->as_AllocateArray());
+ break;
+ case Node::Class_Lock:
+ expand_lock_node(n->as_Lock());
+ break;
+ case Node::Class_Unlock:
+ expand_unlock_node(n->as_Unlock());
+ break;
+ default:
+ assert(false, "unknown node type in macro list");
+ }
+ if (C->failing()) return true;
+ }
+ _igvn.optimize();
+ return false;
+}
diff --git a/src/share/vm/opto/macro.hpp b/src/share/vm/opto/macro.hpp
new file mode 100644
index 000000000..20dd65c40
--- /dev/null
+++ b/src/share/vm/opto/macro.hpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class AllocateNode;
+class AllocateArrayNode;
+class CallNode;
+class Node;
+class PhaseIterGVN;
+
+class PhaseMacroExpand : public Phase {
+private:
+ PhaseIterGVN &_igvn;
+
+ // Helper methods roughly modelled after GraphKit:
+ Node* top() const { return C->top(); }
+ Node* intcon(jint con) const { return _igvn.intcon(con); }
+ Node* longcon(jlong con) const { return _igvn.longcon(con); }
+ Node* makecon(const Type *t) const { return _igvn.makecon(t); }
+ Node* basic_plus_adr(Node* base, int offset) {
+ return (offset == 0)? base: basic_plus_adr(base, MakeConX(offset));
+ }
+ Node* basic_plus_adr(Node* base, Node* ptr, int offset) {
+ return (offset == 0)? ptr: basic_plus_adr(base, ptr, MakeConX(offset));
+ }
+ Node* basic_plus_adr(Node* base, Node* offset) {
+ return basic_plus_adr(base, base, offset);
+ }
+ Node* basic_plus_adr(Node* base, Node* ptr, Node* offset) {
+ Node* adr = new (C, 4) AddPNode(base, ptr, offset);
+ return transform_later(adr);
+ }
+ Node* transform_later(Node* n) {
+ // equivalent to _gvn.transform in GraphKit, Ideal, etc.
+ _igvn.register_new_node_with_optimizer(n);
+ return n;
+ }
+ void set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_adr);
+ Node* make_load( Node* ctl, Node* mem, Node* base, int offset,
+ const Type* value_type, BasicType bt);
+ Node* make_store(Node* ctl, Node* mem, Node* base, int offset,
+ Node* value, BasicType bt);
+
+ // projections extracted from a call node
+ ProjNode *_fallthroughproj;
+ ProjNode *_fallthroughcatchproj;
+ ProjNode *_ioproj_fallthrough;
+ ProjNode *_ioproj_catchall;
+ ProjNode *_catchallcatchproj;
+ ProjNode *_memproj_fallthrough;
+ ProjNode *_memproj_catchall;
+ ProjNode *_resproj;
+
+
+ void expand_allocate(AllocateNode *alloc);
+ void expand_allocate_array(AllocateArrayNode *alloc);
+ void expand_allocate_common(AllocateNode* alloc,
+ Node* length,
+ const TypeFunc* slow_call_type,
+ address slow_call_address);
+ void eliminate_locking_node(AbstractLockNode *alock);
+ void expand_lock_node(LockNode *lock);
+ void expand_unlock_node(UnlockNode *unlock);
+
+ int replace_input(Node *use, Node *oldref, Node *newref);
+ void copy_call_debug_info(CallNode *oldcall, CallNode * newcall);
+ Node* opt_iff(Node* region, Node* iff);
+ void copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call);
+ CallNode* make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call,
+ const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1);
+ void extract_call_projections(CallNode *call);
+
+ Node* initialize_object(AllocateNode* alloc,
+ Node* control, Node* rawmem, Node* object,
+ Node* klass_node, Node* length,
+ Node* size_in_bytes);
+
+ Node* prefetch_allocation(Node* i_o,
+ Node*& needgc_false, Node*& contended_phi_rawmem,
+ Node* old_eden_top, Node* new_eden_top,
+ Node* length);
+
+public:
+ PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn) {}
+ bool expand_macro_nodes();
+
+};
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
new file mode 100644
index 000000000..a8d673a7f
--- /dev/null
+++ b/src/share/vm/opto/matcher.cpp
@@ -0,0 +1,2123 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_matcher.cpp.incl"
+
+OptoReg::Name OptoReg::c_frame_pointer;
+
+
+
+const int Matcher::base2reg[Type::lastype] = {
+ Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0,
+ Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+ Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+ 0, 0/*abio*/,
+ Op_RegP /* Return address */, 0, /* the memories */
+ Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+ 0 /*bottom*/
+};
+
+const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
+RegMask Matcher::mreg2regmask[_last_Mach_Reg];
+RegMask Matcher::STACK_ONLY_mask;
+RegMask Matcher::c_frame_ptr_mask;
+const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
+const uint Matcher::_end_rematerialize = _END_REMATERIALIZE;
+
+//---------------------------Matcher-------------------------------------------
+Matcher::Matcher( Node_List &proj_list ) :
+ PhaseTransform( Phase::Ins_Select ),
+#ifdef ASSERT
+ _old2new_map(C->comp_arena()),
+#endif
+ _shared_constants(C->comp_arena()),
+ _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp),
+ _swallowed(swallowed),
+ _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
+ _end_inst_chain_rule(_END_INST_CHAIN_RULE),
+ _must_clone(must_clone), _proj_list(proj_list),
+ _register_save_policy(register_save_policy),
+ _c_reg_save_policy(c_reg_save_policy),
+ _register_save_type(register_save_type),
+ _ruleName(ruleName),
+ _allocation_started(false),
+ _states_arena(Chunk::medium_size),
+ _visited(&_states_arena),
+ _shared(&_states_arena),
+ _dontcare(&_states_arena) {
+ C->set_matcher(this);
+
+ idealreg2spillmask[Op_RegI] = NULL;
+ idealreg2spillmask[Op_RegL] = NULL;
+ idealreg2spillmask[Op_RegF] = NULL;
+ idealreg2spillmask[Op_RegD] = NULL;
+ idealreg2spillmask[Op_RegP] = NULL;
+
+ idealreg2debugmask[Op_RegI] = NULL;
+ idealreg2debugmask[Op_RegL] = NULL;
+ idealreg2debugmask[Op_RegF] = NULL;
+ idealreg2debugmask[Op_RegD] = NULL;
+ idealreg2debugmask[Op_RegP] = NULL;
+}
+
+//------------------------------warp_incoming_stk_arg------------------------
+// This warps a VMReg into an OptoReg::Name
+OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
+ OptoReg::Name warped;
+ if( reg->is_stack() ) { // Stack slot argument?
+ warped = OptoReg::add(_old_SP, reg->reg2stack() );
+ warped = OptoReg::add(warped, C->out_preserve_stack_slots());
+ if( warped >= _in_arg_limit )
+ _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
+ if (!RegMask::can_represent(warped)) {
+ // the compiler cannot represent this method's calling sequence
+ C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
+ return OptoReg::Bad;
+ }
+ return warped;
+ }
+ return OptoReg::as_OptoReg(reg);
+}
+
+//---------------------------compute_old_SP------------------------------------
+OptoReg::Name Compile::compute_old_SP() {
+ int fixed = fixed_slots();
+ int preserve = in_preserve_stack_slots();
+ return OptoReg::stack2reg(round_to(fixed + preserve, Matcher::stack_alignment_in_slots()));
+}
+
+
+
+#ifdef ASSERT
+void Matcher::verify_new_nodes_only(Node* xroot) {
+ // Make sure that the new graph only references new nodes
+ ResourceMark rm;
+ Unique_Node_List worklist;
+ VectorSet visited(Thread::current()->resource_area());
+ worklist.push(xroot);
+ while (worklist.size() > 0) {
+ Node* n = worklist.pop();
+ visited <<= n->_idx;
+ assert(C->node_arena()->contains(n), "dead node");
+ for (uint j = 0; j < n->req(); j++) {
+ Node* in = n->in(j);
+ if (in != NULL) {
+ assert(C->node_arena()->contains(in), "dead node");
+ if (!visited.test(in->_idx)) {
+ worklist.push(in);
+ }
+ }
+ }
+ }
+}
+#endif
+
+
+//---------------------------match---------------------------------------------
+void Matcher::match( ) {
+ // One-time initialization of some register masks.
+ init_spill_mask( C->root()->in(1) );
+ _return_addr_mask = return_addr();
+#ifdef _LP64
+ // Pointers take 2 slots in 64-bit land
+ _return_addr_mask.Insert(OptoReg::add(return_addr(),1));
+#endif
+
+ // Map a Java-signature return type into return register-value
+ // machine registers for 0, 1 and 2 returned values.
+ const TypeTuple *range = C->tf()->range();
+ if( range->cnt() > TypeFunc::Parms ) { // If not a void function
+ // Get ideal-register return type
+ int ireg = base2reg[range->field_at(TypeFunc::Parms)->base()];
+ // Get machine return register
+ uint sop = C->start()->Opcode();
+ OptoRegPair regs = return_value(ireg, false);
+
+ // And mask for same
+ _return_value_mask = RegMask(regs.first());
+ if( OptoReg::is_valid(regs.second()) )
+ _return_value_mask.Insert(regs.second());
+ }
+
+ // ---------------
+ // Frame Layout
+
+ // Need the method signature to determine the incoming argument types,
+ // because the types determine which registers the incoming arguments are
+ // in, and this affects the matched code.
+ const TypeTuple *domain = C->tf()->domain();
+ uint argcnt = domain->cnt() - TypeFunc::Parms;
+ BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt );
+ VMRegPair *vm_parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
+ _parm_regs = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
+ _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
+ uint i;
+ for( i = 0; i<argcnt; i++ ) {
+ sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
+ }
+
+ // Pass array of ideal registers and length to USER code (from the AD file)
+ // that will convert this to an array of register numbers.
+ const StartNode *start = C->start();
+ start->calling_convention( sig_bt, vm_parm_regs, argcnt );
+#ifdef ASSERT
+ // Sanity check users' calling convention. Real handy while trying to
+ // get the initial port correct.
+ { for (uint i = 0; i<argcnt; i++) {
+ if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
+ assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" );
+ _parm_regs[i].set_bad();
+ continue;
+ }
+ VMReg parm_reg = vm_parm_regs[i].first();
+ assert(parm_reg->is_valid(), "invalid arg?");
+ if (parm_reg->is_reg()) {
+ OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg);
+ assert(can_be_java_arg(opto_parm_reg) ||
+ C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) ||
+ opto_parm_reg == inline_cache_reg(),
+ "parameters in register must be preserved by runtime stubs");
+ }
+ for (uint j = 0; j < i; j++) {
+ assert(parm_reg != vm_parm_regs[j].first(),
+ "calling conv. must produce distinct regs");
+ }
+ }
+ }
+#endif
+
+ // Do some initial frame layout.
+
+ // Compute the old incoming SP (may be called FP) as
+ // OptoReg::stack0() + locks + in_preserve_stack_slots + pad2.
+ _old_SP = C->compute_old_SP();
+ assert( is_even(_old_SP), "must be even" );
+
+ // Compute highest incoming stack argument as
+ // _old_SP + out_preserve_stack_slots + incoming argument size.
+ _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
+ assert( is_even(_in_arg_limit), "out_preserve must be even" );
+ for( i = 0; i < argcnt; i++ ) {
+ // Permit args to have no register
+ _calling_convention_mask[i].Clear();
+ if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
+ continue;
+ }
+ // calling_convention returns stack arguments as a count of
+ // slots beyond OptoReg::stack0()/VMRegImpl::stack0. We need to convert this to
+ // the allocators point of view, taking into account all the
+ // preserve area, locks & pad2.
+
+ OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first());
+ if( OptoReg::is_valid(reg1))
+ _calling_convention_mask[i].Insert(reg1);
+
+ OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second());
+ if( OptoReg::is_valid(reg2))
+ _calling_convention_mask[i].Insert(reg2);
+
+ // Saved biased stack-slot register number
+ _parm_regs[i].set_pair(reg2, reg1);
+ }
+
+ // Finally, make sure the incoming arguments take up an even number of
+ // words, in case the arguments or locals need to contain doubleword stack
+ // slots. The rest of the system assumes that stack slot pairs (in
+ // particular, in the spill area) which look aligned will in fact be
+ // aligned relative to the stack pointer in the target machine. Double
+ // stack slots will always be allocated aligned.
+ _new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong));
+
+ // Compute highest outgoing stack argument as
+ // _new_SP + out_preserve_stack_slots + max(outgoing argument size).
+ _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
+ assert( is_even(_out_arg_limit), "out_preserve must be even" );
+
+ if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) {
+ // the compiler cannot represent this method's calling sequence
+ C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
+ }
+
+ if (C->failing()) return; // bailed out on incoming arg failure
+
+ // ---------------
+ // Collect roots of matcher trees. Every node for which
+ // _shared[_idx] is cleared is guaranteed to not be shared, and thus
+ // can be a valid interior of some tree.
+ find_shared( C->root() );
+ find_shared( C->top() );
+
+ C->print_method("Before Matching", 2);
+
+ // Swap out to old-space; emptying new-space
+ Arena *old = C->node_arena()->move_contents(C->old_arena());
+
+ // Save debug and profile information for nodes in old space:
+ _old_node_note_array = C->node_note_array();
+ if (_old_node_note_array != NULL) {
+ C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*>
+ (C->comp_arena(), _old_node_note_array->length(),
+ 0, NULL));
+ }
+
+ // Pre-size the new_node table to avoid the need for range checks.
+ grow_new_node_array(C->unique());
+
+ // Reset node counter so MachNodes start with _idx at 0
+ int nodes = C->unique(); // save value
+ C->set_unique(0);
+
+ // Recursively match trees from old space into new space.
+ // Correct leaves of new-space Nodes; they point to old-space.
+ _visited.Clear(); // Clear visit bits for xform call
+ C->set_cached_top_node(xform( C->top(), nodes ));
+ if (!C->failing()) {
+ Node* xroot = xform( C->root(), 1 );
+ if (xroot == NULL) {
+ Matcher::soft_match_failure(); // recursive matching process failed
+ C->record_method_not_compilable("instruction match failed");
+ } else {
+ // During matching shared constants were attached to C->root()
+ // because xroot wasn't available yet, so transfer the uses to
+ // the xroot.
+ for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) {
+ Node* n = C->root()->fast_out(j);
+ if (C->node_arena()->contains(n)) {
+ assert(n->in(0) == C->root(), "should be control user");
+ n->set_req(0, xroot);
+ --j;
+ --jmax;
+ }
+ }
+
+ C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL);
+#ifdef ASSERT
+ verify_new_nodes_only(xroot);
+#endif
+ }
+ }
+ if (C->top() == NULL || C->root() == NULL) {
+ C->record_method_not_compilable("graph lost"); // %%% cannot happen?
+ }
+ if (C->failing()) {
+ // delete old;
+ old->destruct_contents();
+ return;
+ }
+ assert( C->top(), "" );
+ assert( C->root(), "" );
+ validate_null_checks();
+
+ // Now smoke old-space
+ NOT_DEBUG( old->destruct_contents() );
+
+ // ------------------------
+ // Set up save-on-entry registers
+ Fixup_Save_On_Entry( );
+}
+
+
+//------------------------------Fixup_Save_On_Entry----------------------------
+// The stated purpose of this routine is to take care of save-on-entry
+// registers. However, the overall goal of the Match phase is to convert into
+// machine-specific instructions which have RegMasks to guide allocation.
+// So what this procedure really does is put a valid RegMask on each input
+// to the machine-specific variations of all Return, TailCall and Halt
+// instructions. It also adds edgs to define the save-on-entry values (and of
+// course gives them a mask).
+
+static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
+ RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size );
+ // Do all the pre-defined register masks
+ rms[TypeFunc::Control ] = RegMask::Empty;
+ rms[TypeFunc::I_O ] = RegMask::Empty;
+ rms[TypeFunc::Memory ] = RegMask::Empty;
+ rms[TypeFunc::ReturnAdr] = ret_adr;
+ rms[TypeFunc::FramePtr ] = fp;
+ return rms;
+}
+
+//---------------------------init_first_stack_mask-----------------------------
+// Create the initial stack mask used by values spilling to the stack.
+// Disallow any debug info in outgoing argument areas by setting the
+// initial mask accordingly.
+void Matcher::init_first_stack_mask() {
+
+ // Allocate storage for spill masks as masks for the appropriate load type.
+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask)*10);
+ idealreg2spillmask[Op_RegI] = &rms[0];
+ idealreg2spillmask[Op_RegL] = &rms[1];
+ idealreg2spillmask[Op_RegF] = &rms[2];
+ idealreg2spillmask[Op_RegD] = &rms[3];
+ idealreg2spillmask[Op_RegP] = &rms[4];
+ idealreg2debugmask[Op_RegI] = &rms[5];
+ idealreg2debugmask[Op_RegL] = &rms[6];
+ idealreg2debugmask[Op_RegF] = &rms[7];
+ idealreg2debugmask[Op_RegD] = &rms[8];
+ idealreg2debugmask[Op_RegP] = &rms[9];
+
+ OptoReg::Name i;
+
+ // At first, start with the empty mask
+ C->FIRST_STACK_mask().Clear();
+
+ // Add in the incoming argument area
+ OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
+ for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1))
+ C->FIRST_STACK_mask().Insert(i);
+
+ // Add in all bits past the outgoing argument area
+ guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)),
+ "must be able to represent all call arguments in reg mask");
+ init = _out_arg_limit;
+ for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
+ C->FIRST_STACK_mask().Insert(i);
+
+ // Finally, set the "infinite stack" bit.
+ C->FIRST_STACK_mask().set_AllStack();
+
+ // Make spill masks. Registers for their class, plus FIRST_STACK_mask.
+ *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
+ idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
+ *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
+ idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask());
+ *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
+ idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
+ *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
+ idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask());
+ *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
+ idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
+
+ // Make up debug masks. Any spill slot plus callee-save registers.
+ // Caller-save registers are assumed to be trashable by the various
+ // inline-cache fixup routines.
+ *idealreg2debugmask[Op_RegI]= *idealreg2spillmask[Op_RegI];
+ *idealreg2debugmask[Op_RegL]= *idealreg2spillmask[Op_RegL];
+ *idealreg2debugmask[Op_RegF]= *idealreg2spillmask[Op_RegF];
+ *idealreg2debugmask[Op_RegD]= *idealreg2spillmask[Op_RegD];
+ *idealreg2debugmask[Op_RegP]= *idealreg2spillmask[Op_RegP];
+
+ // Prevent stub compilations from attempting to reference
+ // callee-saved registers from debug info
+ bool exclude_soe = !Compile::current()->is_method_compilation();
+
+ for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
+ // registers the caller has to save do not work
+ if( _register_save_policy[i] == 'C' ||
+ _register_save_policy[i] == 'A' ||
+ (_register_save_policy[i] == 'E' && exclude_soe) ) {
+ idealreg2debugmask[Op_RegI]->Remove(i); // Exclude save-on-call
+ idealreg2debugmask[Op_RegL]->Remove(i); // registers from debug
+ idealreg2debugmask[Op_RegF]->Remove(i); // masks
+ idealreg2debugmask[Op_RegD]->Remove(i);
+ idealreg2debugmask[Op_RegP]->Remove(i);
+ }
+ }
+}
+
+//---------------------------is_save_on_entry----------------------------------
+bool Matcher::is_save_on_entry( int reg ) {
+ return
+ _register_save_policy[reg] == 'E' ||
+ _register_save_policy[reg] == 'A' || // Save-on-entry register?
+ // Also save argument registers in the trampolining stubs
+ (C->save_argument_registers() && is_spillable_arg(reg));
+}
+
+//---------------------------Fixup_Save_On_Entry-------------------------------
+void Matcher::Fixup_Save_On_Entry( ) {
+ init_first_stack_mask();
+
+ Node *root = C->root(); // Short name for root
+ // Count number of save-on-entry registers.
+ uint soe_cnt = number_of_saved_registers();
+ uint i;
+
+ // Find the procedure Start Node
+ StartNode *start = C->start();
+ assert( start, "Expect a start node" );
+
+ // Save argument registers in the trampolining stubs
+ if( C->save_argument_registers() )
+ for( i = 0; i < _last_Mach_Reg; i++ )
+ if( is_spillable_arg(i) )
+ soe_cnt++;
+
+ // Input RegMask array shared by all Returns.
+ // The type for doubles and longs has a count of 2, but
+ // there is only 1 returned value
+ uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
+ RegMask *ret_rms = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+ // Returns have 0 or 1 returned values depending on call signature.
+ // Return register is specified by return_value in the AD file.
+ if (ret_edge_cnt > TypeFunc::Parms)
+ ret_rms[TypeFunc::Parms+0] = _return_value_mask;
+
+ // Input RegMask array shared by all Rethrows.
+ uint reth_edge_cnt = TypeFunc::Parms+1;
+ RegMask *reth_rms = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+ // Rethrow takes exception oop only, but in the argument 0 slot.
+ reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)];
+#ifdef _LP64
+ // Need two slots for ptrs in 64-bit land
+ reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1));
+#endif
+
+ // Input RegMask array shared by all TailCalls
+ uint tail_call_edge_cnt = TypeFunc::Parms+2;
+ RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+
+ // Input RegMask array shared by all TailJumps
+ uint tail_jump_edge_cnt = TypeFunc::Parms+2;
+ RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+
+ // TailCalls have 2 returned values (target & moop), whose masks come
+ // from the usual MachNode/MachOper mechanism. Find a sample
+ // TailCall to extract these masks and put the correct masks into
+ // the tail_call_rms array.
+ for( i=1; i < root->req(); i++ ) {
+ MachReturnNode *m = root->in(i)->as_MachReturn();
+ if( m->ideal_Opcode() == Op_TailCall ) {
+ tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
+ tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
+ break;
+ }
+ }
+
+ // TailJumps have 2 returned values (target & ex_oop), whose masks come
+ // from the usual MachNode/MachOper mechanism. Find a sample
+ // TailJump to extract these masks and put the correct masks into
+ // the tail_jump_rms array.
+ for( i=1; i < root->req(); i++ ) {
+ MachReturnNode *m = root->in(i)->as_MachReturn();
+ if( m->ideal_Opcode() == Op_TailJump ) {
+ tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
+ tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
+ break;
+ }
+ }
+
+ // Input RegMask array shared by all Halts
+ uint halt_edge_cnt = TypeFunc::Parms;
+ RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+
+ // Capture the return input masks into each exit flavor
+ for( i=1; i < root->req(); i++ ) {
+ MachReturnNode *exit = root->in(i)->as_MachReturn();
+ switch( exit->ideal_Opcode() ) {
+ case Op_Return : exit->_in_rms = ret_rms; break;
+ case Op_Rethrow : exit->_in_rms = reth_rms; break;
+ case Op_TailCall : exit->_in_rms = tail_call_rms; break;
+ case Op_TailJump : exit->_in_rms = tail_jump_rms; break;
+ case Op_Halt : exit->_in_rms = halt_rms; break;
+ default : ShouldNotReachHere();
+ }
+ }
+
+ // Next unused projection number from Start.
+ int proj_cnt = C->tf()->domain()->cnt();
+
+ // Do all the save-on-entry registers. Make projections from Start for
+ // them, and give them a use at the exit points. To the allocator, they
+ // look like incoming register arguments.
+ for( i = 0; i < _last_Mach_Reg; i++ ) {
+ if( is_save_on_entry(i) ) {
+
+ // Add the save-on-entry to the mask array
+ ret_rms [ ret_edge_cnt] = mreg2regmask[i];
+ reth_rms [ reth_edge_cnt] = mreg2regmask[i];
+ tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i];
+ tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i];
+ // Halts need the SOE registers, but only in the stack as debug info.
+ // A just-prior uncommon-trap or deoptimization will use the SOE regs.
+ halt_rms [ halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]];
+
+ Node *mproj;
+
+ // Is this a RegF low half of a RegD? Double up 2 adjacent RegF's
+ // into a single RegD.
+ if( (i&1) == 0 &&
+ _register_save_type[i ] == Op_RegF &&
+ _register_save_type[i+1] == Op_RegF &&
+ is_save_on_entry(i+1) ) {
+ // Add other bit for double
+ ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1));
+ reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1));
+ tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
+ tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
+ halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1));
+ mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD );
+ proj_cnt += 2; // Skip 2 for doubles
+ }
+ else if( (i&1) == 1 && // Else check for high half of double
+ _register_save_type[i-1] == Op_RegF &&
+ _register_save_type[i ] == Op_RegF &&
+ is_save_on_entry(i-1) ) {
+ ret_rms [ ret_edge_cnt] = RegMask::Empty;
+ reth_rms [ reth_edge_cnt] = RegMask::Empty;
+ tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
+ tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
+ halt_rms [ halt_edge_cnt] = RegMask::Empty;
+ mproj = C->top();
+ }
+ // Is this a RegI low half of a RegL? Double up 2 adjacent RegI's
+ // into a single RegL.
+ else if( (i&1) == 0 &&
+ _register_save_type[i ] == Op_RegI &&
+ _register_save_type[i+1] == Op_RegI &&
+ is_save_on_entry(i+1) ) {
+ // Add other bit for long
+ ret_rms [ ret_edge_cnt].Insert(OptoReg::Name(i+1));
+ reth_rms [ reth_edge_cnt].Insert(OptoReg::Name(i+1));
+ tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
+ tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
+ halt_rms [ halt_edge_cnt].Insert(OptoReg::Name(i+1));
+ mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL );
+ proj_cnt += 2; // Skip 2 for longs
+ }
+ else if( (i&1) == 1 && // Else check for high half of long
+ _register_save_type[i-1] == Op_RegI &&
+ _register_save_type[i ] == Op_RegI &&
+ is_save_on_entry(i-1) ) {
+ ret_rms [ ret_edge_cnt] = RegMask::Empty;
+ reth_rms [ reth_edge_cnt] = RegMask::Empty;
+ tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
+ tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
+ halt_rms [ halt_edge_cnt] = RegMask::Empty;
+ mproj = C->top();
+ } else {
+ // Make a projection for it off the Start
+ mproj = new (C, 1) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] );
+ }
+
+ ret_edge_cnt ++;
+ reth_edge_cnt ++;
+ tail_call_edge_cnt ++;
+ tail_jump_edge_cnt ++;
+ halt_edge_cnt ++;
+
+ // Add a use of the SOE register to all exit paths
+ for( uint j=1; j < root->req(); j++ )
+ root->in(j)->add_req(mproj);
+ } // End of if a save-on-entry register
+ } // End of for all machine registers
+}
+
+//------------------------------init_spill_mask--------------------------------
+void Matcher::init_spill_mask( Node *ret ) {
+ if( idealreg2regmask[Op_RegI] ) return; // One time only init
+
+ OptoReg::c_frame_pointer = c_frame_pointer();
+ c_frame_ptr_mask = c_frame_pointer();
+#ifdef _LP64
+ // pointers are twice as big
+ c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1));
+#endif
+
+ // Start at OptoReg::stack0()
+ STACK_ONLY_mask.Clear();
+ OptoReg::Name init = OptoReg::stack2reg(0);
+ // STACK_ONLY_mask is all stack bits
+ OptoReg::Name i;
+ for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
+ STACK_ONLY_mask.Insert(i);
+ // Also set the "infinite stack" bit.
+ STACK_ONLY_mask.set_AllStack();
+
+ // Copy the register names over into the shared world
+ for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
+ // SharedInfo::regName[i] = regName[i];
+ // Handy RegMasks per machine register
+ mreg2regmask[i].Insert(i);
+ }
+
+ // Grab the Frame Pointer
+ Node *fp = ret->in(TypeFunc::FramePtr);
+ Node *mem = ret->in(TypeFunc::Memory);
+ const TypePtr* atp = TypePtr::BOTTOM;
+ // Share frame pointer while making spill ops
+ set_shared(fp);
+
+ // Compute generic short-offset Loads
+ MachNode *spillI = match_tree(new (C, 3) LoadINode(NULL,mem,fp,atp));
+ MachNode *spillL = match_tree(new (C, 3) LoadLNode(NULL,mem,fp,atp));
+ MachNode *spillF = match_tree(new (C, 3) LoadFNode(NULL,mem,fp,atp));
+ MachNode *spillD = match_tree(new (C, 3) LoadDNode(NULL,mem,fp,atp));
+ MachNode *spillP = match_tree(new (C, 3) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM));
+ assert(spillI != NULL && spillL != NULL && spillF != NULL &&
+ spillD != NULL && spillP != NULL, "");
+
+ // Get the ADLC notion of the right regmask, for each basic type.
+ idealreg2regmask[Op_RegI] = &spillI->out_RegMask();
+ idealreg2regmask[Op_RegL] = &spillL->out_RegMask();
+ idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
+ idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
+ idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
+}
+
+#ifdef ASSERT
+static void match_alias_type(Compile* C, Node* n, Node* m) {
+ if (!VerifyAliases) return; // do not go looking for trouble by default
+ const TypePtr* nat = n->adr_type();
+ const TypePtr* mat = m->adr_type();
+ int nidx = C->get_alias_index(nat);
+ int midx = C->get_alias_index(mat);
+ // Detune the assert for cases like (AndI 0xFF (LoadB p)).
+ if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) {
+ for (uint i = 1; i < n->req(); i++) {
+ Node* n1 = n->in(i);
+ const TypePtr* n1at = n1->adr_type();
+ if (n1at != NULL) {
+ nat = n1at;
+ nidx = C->get_alias_index(n1at);
+ }
+ }
+ }
+ // %%% Kludgery. Instead, fix ideal adr_type methods for all these cases:
+ if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) {
+ switch (n->Opcode()) {
+ case Op_PrefetchRead:
+ case Op_PrefetchWrite:
+ nidx = Compile::AliasIdxRaw;
+ nat = TypeRawPtr::BOTTOM;
+ break;
+ }
+ }
+ if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) {
+ switch (n->Opcode()) {
+ case Op_ClearArray:
+ midx = Compile::AliasIdxRaw;
+ mat = TypeRawPtr::BOTTOM;
+ break;
+ }
+ }
+ if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) {
+ switch (n->Opcode()) {
+ case Op_Return:
+ case Op_Rethrow:
+ case Op_Halt:
+ case Op_TailCall:
+ case Op_TailJump:
+ nidx = Compile::AliasIdxBot;
+ nat = TypePtr::BOTTOM;
+ break;
+ }
+ }
+ if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
+ switch (n->Opcode()) {
+ case Op_StrComp:
+ case Op_MemBarVolatile:
+ case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
+ nidx = Compile::AliasIdxTop;
+ nat = NULL;
+ break;
+ }
+ }
+ if (nidx != midx) {
+ if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) {
+ tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx);
+ n->dump();
+ m->dump();
+ }
+ assert(C->subsume_loads() && C->must_alias(nat, midx),
+ "must not lose alias info when matching");
+ }
+}
+#endif
+
+
+//------------------------------MStack-----------------------------------------
+// State and MStack class used in xform() and find_shared() iterative methods.
+enum Node_State { Pre_Visit, // node has to be pre-visited
+ Visit, // visit node
+ Post_Visit, // post-visit node
+ Alt_Post_Visit // alternative post-visit path
+ };
+
+class MStack: public Node_Stack {
+ public:
+ MStack(int size) : Node_Stack(size) { }
+
+ void push(Node *n, Node_State ns) {
+ Node_Stack::push(n, (uint)ns);
+ }
+ void push(Node *n, Node_State ns, Node *parent, int indx) {
+ ++_inode_top;
+ if ((_inode_top + 1) >= _inode_max) grow();
+ _inode_top->node = parent;
+ _inode_top->indx = (uint)indx;
+ ++_inode_top;
+ _inode_top->node = n;
+ _inode_top->indx = (uint)ns;
+ }
+ Node *parent() {
+ pop();
+ return node();
+ }
+ Node_State state() const {
+ return (Node_State)index();
+ }
+ void set_state(Node_State ns) {
+ set_index((uint)ns);
+ }
+};
+
+
+//------------------------------xform------------------------------------------
+// Given a Node in old-space, Match him (Label/Reduce) to produce a machine
+// Node in new-space. Given a new-space Node, recursively walk his children.
+Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; }
+Node *Matcher::xform( Node *n, int max_stack ) {
+ // Use one stack to keep both: child's node/state and parent's node/index
+ MStack mstack(max_stack * 2 * 2); // C->unique() * 2 * 2
+ mstack.push(n, Visit, NULL, -1); // set NULL as parent to indicate root
+
+ while (mstack.is_nonempty()) {
+ n = mstack.node(); // Leave node on stack
+ Node_State nstate = mstack.state();
+ if (nstate == Visit) {
+ mstack.set_state(Post_Visit);
+ Node *oldn = n;
+ // Old-space or new-space check
+ if (!C->node_arena()->contains(n)) {
+ // Old space!
+ Node* m;
+ if (has_new_node(n)) { // Not yet Label/Reduced
+ m = new_node(n);
+ } else {
+ if (!is_dontcare(n)) { // Matcher can match this guy
+ // Calls match special. They match alone with no children.
+ // Their children, the incoming arguments, match normally.
+ m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n);
+ if (C->failing()) return NULL;
+ if (m == NULL) { Matcher::soft_match_failure(); return NULL; }
+ } else { // Nothing the matcher cares about
+ if( n->is_Proj() && n->in(0)->is_Multi()) { // Projections?
+ // Convert to machine-dependent projection
+ m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
+ if (m->in(0) != NULL) // m might be top
+ collect_null_checks(m);
+ } else { // Else just a regular 'ol guy
+ m = n->clone(); // So just clone into new-space
+ // Def-Use edges will be added incrementally as Uses
+ // of this node are matched.
+ assert(m->outcnt() == 0, "no Uses of this clone yet");
+ }
+ }
+
+ set_new_node(n, m); // Map old to new
+ if (_old_node_note_array != NULL) {
+ Node_Notes* nn = C->locate_node_notes(_old_node_note_array,
+ n->_idx);
+ C->set_node_notes_at(m->_idx, nn);
+ }
+ debug_only(match_alias_type(C, n, m));
+ }
+ n = m; // n is now a new-space node
+ mstack.set_node(n);
+ }
+
+ // New space!
+ if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty())
+
+ int i;
+ // Put precedence edges on stack first (match them last).
+ for (i = oldn->req(); (uint)i < oldn->len(); i++) {
+ Node *m = oldn->in(i);
+ if (m == NULL) break;
+ // set -1 to call add_prec() instead of set_req() during Step1
+ mstack.push(m, Visit, n, -1);
+ }
+
+ // For constant debug info, I'd rather have unmatched constants.
+ int cnt = n->req();
+ JVMState* jvms = n->jvms();
+ int debug_cnt = jvms ? jvms->debug_start() : cnt;
+
+ // Now do only debug info. Clone constants rather than matching.
+ // Constants are represented directly in the debug info without
+ // the need for executable machine instructions.
+ // Monitor boxes are also represented directly.
+ for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do
+ Node *m = n->in(i); // Get input
+ int op = m->Opcode();
+ assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites");
+ if( op == Op_ConI || op == Op_ConP ||
+ op == Op_ConF || op == Op_ConD || op == Op_ConL
+ // || op == Op_BoxLock // %%%% enable this and remove (+++) in chaitin.cpp
+ ) {
+ m = m->clone();
+ mstack.push(m, Post_Visit, n, i); // Don't neet to visit
+ mstack.push(m->in(0), Visit, m, 0);
+ } else {
+ mstack.push(m, Visit, n, i);
+ }
+ }
+
+ // And now walk his children, and convert his inputs to new-space.
+ for( ; i >= 0; --i ) { // For all normal inputs do
+ Node *m = n->in(i); // Get input
+ if(m != NULL)
+ mstack.push(m, Visit, n, i);
+ }
+
+ }
+ else if (nstate == Post_Visit) {
+ // Set xformed input
+ Node *p = mstack.parent();
+ if (p != NULL) { // root doesn't have parent
+ int i = (int)mstack.index();
+ if (i >= 0)
+ p->set_req(i, n); // required input
+ else if (i == -1)
+ p->add_prec(n); // precedence input
+ else
+ ShouldNotReachHere();
+ }
+ mstack.pop(); // remove processed node from stack
+ }
+ else {
+ ShouldNotReachHere();
+ }
+ } // while (mstack.is_nonempty())
+ return n; // Return new-space Node
+}
+
+//------------------------------warp_outgoing_stk_arg------------------------
+OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) {
+ // Convert outgoing argument location to a pre-biased stack offset
+ if (reg->is_stack()) {
+ OptoReg::Name warped = reg->reg2stack();
+ // Adjust the stack slot offset to be the register number used
+ // by the allocator.
+ warped = OptoReg::add(begin_out_arg_area, warped);
+ // Keep track of the largest numbered stack slot used for an arg.
+ // Largest used slot per call-site indicates the amount of stack
+ // that is killed by the call.
+ if( warped >= out_arg_limit_per_call )
+ out_arg_limit_per_call = OptoReg::add(warped,1);
+ if (!RegMask::can_represent(warped)) {
+ C->record_method_not_compilable_all_tiers("unsupported calling sequence");
+ return OptoReg::Bad;
+ }
+ return warped;
+ }
+ return OptoReg::as_OptoReg(reg);
+}
+
+
+//------------------------------match_sfpt-------------------------------------
+// Helper function to match call instructions. Calls match special.
+// They match alone with no children. Their children, the incoming
+// arguments, match normally.
+MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
+ MachSafePointNode *msfpt = NULL;
+ MachCallNode *mcall = NULL;
+ uint cnt;
+ // Split out case for SafePoint vs Call
+ CallNode *call;
+ const TypeTuple *domain;
+ ciMethod* method = NULL;
+ if( sfpt->is_Call() ) {
+ call = sfpt->as_Call();
+ domain = call->tf()->domain();
+ cnt = domain->cnt();
+
+ // Match just the call, nothing else
+ MachNode *m = match_tree(call);
+ if (C->failing()) return NULL;
+ if( m == NULL ) { Matcher::soft_match_failure(); return NULL; }
+
+ // Copy data from the Ideal SafePoint to the machine version
+ mcall = m->as_MachCall();
+
+ mcall->set_tf( call->tf());
+ mcall->set_entry_point(call->entry_point());
+ mcall->set_cnt( call->cnt());
+
+ if( mcall->is_MachCallJava() ) {
+ MachCallJavaNode *mcall_java = mcall->as_MachCallJava();
+ const CallJavaNode *call_java = call->as_CallJava();
+ method = call_java->method();
+ mcall_java->_method = method;
+ mcall_java->_bci = call_java->_bci;
+ mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
+ if( mcall_java->is_MachCallStaticJava() )
+ mcall_java->as_MachCallStaticJava()->_name =
+ call_java->as_CallStaticJava()->_name;
+ if( mcall_java->is_MachCallDynamicJava() )
+ mcall_java->as_MachCallDynamicJava()->_vtable_index =
+ call_java->as_CallDynamicJava()->_vtable_index;
+ }
+ else if( mcall->is_MachCallRuntime() ) {
+ mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name;
+ }
+ msfpt = mcall;
+ }
+ // This is a non-call safepoint
+ else {
+ call = NULL;
+ domain = NULL;
+ MachNode *mn = match_tree(sfpt);
+ if (C->failing()) return NULL;
+ msfpt = mn->as_MachSafePoint();
+ cnt = TypeFunc::Parms;
+ }
+
+ // Advertise the correct memory effects (for anti-dependence computation).
+ msfpt->set_adr_type(sfpt->adr_type());
+
+ // Allocate a private array of RegMasks. These RegMasks are not shared.
+ msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt );
+ // Empty them all.
+ memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt );
+
+ // Do all the pre-defined non-Empty register masks
+ msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask;
+ msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask;
+
+ // Place first outgoing argument can possibly be put.
+ OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
+ assert( is_even(begin_out_arg_area), "" );
+ // Compute max outgoing register number per call site.
+ OptoReg::Name out_arg_limit_per_call = begin_out_arg_area;
+ // Calls to C may hammer extra stack slots above and beyond any arguments.
+ // These are usually backing store for register arguments for varargs.
+ if( call != NULL && call->is_CallRuntime() )
+ out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
+
+
+ // Do the normal argument list (parameters) register masks
+ int argcnt = cnt - TypeFunc::Parms;
+ if( argcnt > 0 ) { // Skip it all if we have no args
+ BasicType *sig_bt = NEW_RESOURCE_ARRAY( BasicType, argcnt );
+ VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
+ int i;
+ for( i = 0; i < argcnt; i++ ) {
+ sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
+ }
+ // V-call to pick proper calling convention
+ call->calling_convention( sig_bt, parm_regs, argcnt );
+
+#ifdef ASSERT
+ // Sanity check users' calling convention. Really handy during
+ // the initial porting effort. Fairly expensive otherwise.
+ { for (int i = 0; i<argcnt; i++) {
+ if( !parm_regs[i].first()->is_valid() &&
+ !parm_regs[i].second()->is_valid() ) continue;
+ VMReg reg1 = parm_regs[i].first();
+ VMReg reg2 = parm_regs[i].second();
+ for (int j = 0; j < i; j++) {
+ if( !parm_regs[j].first()->is_valid() &&
+ !parm_regs[j].second()->is_valid() ) continue;
+ VMReg reg3 = parm_regs[j].first();
+ VMReg reg4 = parm_regs[j].second();
+ if( !reg1->is_valid() ) {
+ assert( !reg2->is_valid(), "valid halvsies" );
+ } else if( !reg3->is_valid() ) {
+ assert( !reg4->is_valid(), "valid halvsies" );
+ } else {
+ assert( reg1 != reg2, "calling conv. must produce distinct regs");
+ assert( reg1 != reg3, "calling conv. must produce distinct regs");
+ assert( reg1 != reg4, "calling conv. must produce distinct regs");
+ assert( reg2 != reg3, "calling conv. must produce distinct regs");
+ assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs");
+ assert( reg3 != reg4, "calling conv. must produce distinct regs");
+ }
+ }
+ }
+ }
+#endif
+
+ // Visit each argument. Compute its outgoing register mask.
+ // Return results now can have 2 bits returned.
+ // Compute max over all outgoing arguments both per call-site
+ // and over the entire method.
+ for( i = 0; i < argcnt; i++ ) {
+ // Address of incoming argument mask to fill in
+ RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms];
+ if( !parm_regs[i].first()->is_valid() &&
+ !parm_regs[i].second()->is_valid() ) {
+ continue; // Avoid Halves
+ }
+ // Grab first register, adjust stack slots and insert in mask.
+ OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call );
+ if (OptoReg::is_valid(reg1))
+ rm->Insert( reg1 );
+ // Grab second register (if any), adjust stack slots and insert in mask.
+ OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call );
+ if (OptoReg::is_valid(reg2))
+ rm->Insert( reg2 );
+ } // End of for all arguments
+
+ // Compute number of stack slots needed to restore stack in case of
+ // Pascal-style argument popping.
+ mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area;
+ }
+
+ // Compute the max stack slot killed by any call. These will not be
+ // available for debug info, and will be used to adjust FIRST_STACK_mask
+ // after all call sites have been visited.
+ if( _out_arg_limit < out_arg_limit_per_call)
+ _out_arg_limit = out_arg_limit_per_call;
+
+ if (mcall) {
+ // Kill the outgoing argument area, including any non-argument holes and
+ // any legacy C-killed slots. Use Fat-Projections to do the killing.
+ // Since the max-per-method covers the max-per-call-site and debug info
+ // is excluded on the max-per-method basis, debug info cannot land in
+ // this killed area.
+ uint r_cnt = mcall->tf()->range()->cnt();
+ MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
+ if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) {
+ C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
+ } else {
+ for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
+ proj->_rout.Insert(OptoReg::Name(i));
+ }
+ if( proj->_rout.is_NotEmpty() )
+ _proj_list.push(proj);
+ }
+ // Transfer the safepoint information from the call to the mcall
+ // Move the JVMState list
+ msfpt->set_jvms(sfpt->jvms());
+ for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) {
+ jvms->set_map(sfpt);
+ }
+
+ // Debug inputs begin just after the last incoming parameter
+ assert( (mcall == NULL) || (mcall->jvms() == NULL) ||
+ (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "" );
+
+ // Move the OopMap
+ msfpt->_oop_map = sfpt->_oop_map;
+
+ // Registers killed by the call are set in the local scheduling pass
+ // of Global Code Motion.
+ return msfpt;
+}
+
+//---------------------------match_tree----------------------------------------
+// Match a Ideal Node DAG - turn it into a tree; Label & Reduce. Used as part
+// of the whole-sale conversion from Ideal to Mach Nodes. Also used for
+// making GotoNodes while building the CFG and in init_spill_mask() to identify
+// a Load's result RegMask for memoization in idealreg2regmask[]
+MachNode *Matcher::match_tree( const Node *n ) {
+ assert( n->Opcode() != Op_Phi, "cannot match" );
+ assert( !n->is_block_start(), "cannot match" );
+ // Set the mark for all locally allocated State objects.
+ // When this call returns, the _states_arena arena will be reset
+ // freeing all State objects.
+ ResourceMark rm( &_states_arena );
+
+ LabelRootDepth = 0;
+
+ // StoreNodes require their Memory input to match any LoadNodes
+ Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
+
+ // State object for root node of match tree
+ // Allocate it on _states_arena - stack allocation can cause stack overflow.
+ State *s = new (&_states_arena) State;
+ s->_kids[0] = NULL;
+ s->_kids[1] = NULL;
+ s->_leaf = (Node*)n;
+ // Label the input tree, allocating labels from top-level arena
+ Label_Root( n, s, n->in(0), mem );
+ if (C->failing()) return NULL;
+
+ // The minimum cost match for the whole tree is found at the root State
+ uint mincost = max_juint;
+ uint cost = max_juint;
+ uint i;
+ for( i = 0; i < NUM_OPERANDS; i++ ) {
+ if( s->valid(i) && // valid entry and
+ s->_cost[i] < cost && // low cost and
+ s->_rule[i] >= NUM_OPERANDS ) // not an operand
+ cost = s->_cost[mincost=i];
+ }
+ if (mincost == max_juint) {
+#ifndef PRODUCT
+ tty->print("No matching rule for:");
+ s->dump();
+#endif
+ Matcher::soft_match_failure();
+ return NULL;
+ }
+ // Reduce input tree based upon the state labels to machine Nodes
+ MachNode *m = ReduceInst( s, s->_rule[mincost], mem );
+#ifdef ASSERT
+ _old2new_map.map(n->_idx, m);
+#endif
+
+ // Add any Matcher-ignored edges
+ uint cnt = n->req();
+ uint start = 1;
+ if( mem != (Node*)1 ) start = MemNode::Memory+1;
+ if( n->Opcode() == Op_AddP ) {
+ assert( mem == (Node*)1, "" );
+ start = AddPNode::Base+1;
+ }
+ for( i = start; i < cnt; i++ ) {
+ if( !n->match_edge(i) ) {
+ if( i < m->req() )
+ m->ins_req( i, n->in(i) );
+ else
+ m->add_req( n->in(i) );
+ }
+ }
+
+ return m;
+}
+
+
+//------------------------------match_into_reg---------------------------------
+// Choose to either match this Node in a register or part of the current
+// match tree. Return true for requiring a register and false for matching
+// as part of the current match tree.
+static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) {
+
+ const Type *t = m->bottom_type();
+
+ if( t->singleton() ) {
+ // Never force constants into registers. Allow them to match as
+ // constants or registers. Copies of the same value will share
+ // the same register. See find_shared_constant.
+ return false;
+ } else { // Not a constant
+ // Stop recursion if they have different Controls.
+ // Slot 0 of constants is not really a Control.
+ if( control && m->in(0) && control != m->in(0) ) {
+
+ // Actually, we can live with the most conservative control we
+ // find, if it post-dominates the others. This allows us to
+ // pick up load/op/store trees where the load can float a little
+ // above the store.
+ Node *x = control;
+ const uint max_scan = 6; // Arbitrary scan cutoff
+ uint j;
+ for( j=0; j<max_scan; j++ ) {
+ if( x->is_Region() ) // Bail out at merge points
+ return true;
+ x = x->in(0);
+ if( x == m->in(0) ) // Does 'control' post-dominate
+ break; // m->in(0)? If so, we can use it
+ }
+ if( j == max_scan ) // No post-domination before scan end?
+ return true; // Then break the match tree up
+ }
+ }
+
+ // Not forceably cloning. If shared, put it into a register.
+ return shared;
+}
+
+
+//------------------------------Instruction Selection--------------------------
+// Label method walks a "tree" of nodes, using the ADLC generated DFA to match
+// ideal nodes to machine instructions. Trees are delimited by shared Nodes,
+// things the Matcher does not match (e.g., Memory), and things with different
+// Controls (hence forced into different blocks). We pass in the Control
+// selected for this entire State tree.
+
+// The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the
+// Store and the Load must have identical Memories (as well as identical
+// pointers). Since the Matcher does not have anything for Memory (and
+// does not handle DAGs), I have to match the Memory input myself. If the
+// Tree root is a Store, I require all Loads to have the identical memory.
+Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){
+ // Since Label_Root is a recursive function, its possible that we might run
+ // out of stack space. See bugs 6272980 & 6227033 for more info.
+ LabelRootDepth++;
+ if (LabelRootDepth > MaxLabelRootDepth) {
+ C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth");
+ return NULL;
+ }
+ uint care = 0; // Edges matcher cares about
+ uint cnt = n->req();
+ uint i = 0;
+
+ // Examine children for memory state
+ // Can only subsume a child into your match-tree if that child's memory state
+ // is not modified along the path to another input.
+ // It is unsafe even if the other inputs are separate roots.
+ Node *input_mem = NULL;
+ for( i = 1; i < cnt; i++ ) {
+ if( !n->match_edge(i) ) continue;
+ Node *m = n->in(i); // Get ith input
+ assert( m, "expect non-null children" );
+ if( m->is_Load() ) {
+ if( input_mem == NULL ) {
+ input_mem = m->in(MemNode::Memory);
+ } else if( input_mem != m->in(MemNode::Memory) ) {
+ input_mem = NodeSentinel;
+ }
+ }
+ }
+
+ for( i = 1; i < cnt; i++ ){// For my children
+ if( !n->match_edge(i) ) continue;
+ Node *m = n->in(i); // Get ith input
+ // Allocate states out of a private arena
+ State *s = new (&_states_arena) State;
+ svec->_kids[care++] = s;
+ assert( care <= 2, "binary only for now" );
+
+ // Recursively label the State tree.
+ s->_kids[0] = NULL;
+ s->_kids[1] = NULL;
+ s->_leaf = m;
+
+ // Check for leaves of the State Tree; things that cannot be a part of
+ // the current tree. If it finds any, that value is matched as a
+ // register operand. If not, then the normal matching is used.
+ if( match_into_reg(n, m, control, i, is_shared(m)) ||
+ //
+ // Stop recursion if this is LoadNode and the root of this tree is a
+ // StoreNode and the load & store have different memories.
+ ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ||
+ // Can NOT include the match of a subtree when its memory state
+ // is used by any of the other subtrees
+ (input_mem == NodeSentinel) ) {
+#ifndef PRODUCT
+ // Print when we exclude matching due to different memory states at input-loads
+ if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
+ && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) {
+ tty->print_cr("invalid input_mem");
+ }
+#endif
+ // Switch to a register-only opcode; this value must be in a register
+ // and cannot be subsumed as part of a larger instruction.
+ s->DFA( m->ideal_reg(), m );
+
+ } else {
+ // If match tree has no control and we do, adopt it for entire tree
+ if( control == NULL && m->in(0) != NULL && m->req() > 1 )
+ control = m->in(0); // Pick up control
+ // Else match as a normal part of the match tree.
+ control = Label_Root(m,s,control,mem);
+ if (C->failing()) return NULL;
+ }
+ }
+
+
+ // Call DFA to match this node, and return
+ svec->DFA( n->Opcode(), n );
+
+#ifdef ASSERT
+ uint x;
+ for( x = 0; x < _LAST_MACH_OPER; x++ )
+ if( svec->valid(x) )
+ break;
+
+ if (x >= _LAST_MACH_OPER) {
+ n->dump();
+ svec->dump();
+ assert( false, "bad AD file" );
+ }
+#endif
+ return control;
+}
+
+
+// Con nodes reduced using the same rule can share their MachNode
+// which reduces the number of copies of a constant in the final
+// program. The register allocator is free to split uses later to
+// split live ranges.
+MachNode* Matcher::find_shared_constant(Node* leaf, uint rule) {
+ if (!leaf->is_Con()) return NULL;
+
+ // See if this Con has already been reduced using this rule.
+ if (_shared_constants.Size() <= leaf->_idx) return NULL;
+ MachNode* last = (MachNode*)_shared_constants.at(leaf->_idx);
+ if (last != NULL && rule == last->rule()) {
+ // Get the new space root.
+ Node* xroot = new_node(C->root());
+ if (xroot == NULL) {
+ // This shouldn't happen give the order of matching.
+ return NULL;
+ }
+
+ // Shared constants need to have their control be root so they
+ // can be scheduled properly.
+ Node* control = last->in(0);
+ if (control != xroot) {
+ if (control == NULL || control == C->root()) {
+ last->set_req(0, xroot);
+ } else {
+ assert(false, "unexpected control");
+ return NULL;
+ }
+ }
+ return last;
+ }
+ return NULL;
+}
+
+
+//------------------------------ReduceInst-------------------------------------
+// Reduce a State tree (with given Control) into a tree of MachNodes.
+// This routine (and it's cohort ReduceOper) convert Ideal Nodes into
+// complicated machine Nodes. Each MachNode covers some tree of Ideal Nodes.
+// Each MachNode has a number of complicated MachOper operands; each
+// MachOper also covers a further tree of Ideal Nodes.
+
+// The root of the Ideal match tree is always an instruction, so we enter
+// the recursion here. After building the MachNode, we need to recurse
+// the tree checking for these cases:
+// (1) Child is an instruction -
+// Build the instruction (recursively), add it as an edge.
+// Build a simple operand (register) to hold the result of the instruction.
+// (2) Child is an interior part of an instruction -
+// Skip over it (do nothing)
+// (3) Child is the start of a operand -
+// Build the operand, place it inside the instruction
+// Call ReduceOper.
+MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
+ assert( rule >= NUM_OPERANDS, "called with operand rule" );
+
+ MachNode* shared_con = find_shared_constant(s->_leaf, rule);
+ if (shared_con != NULL) {
+ return shared_con;
+ }
+
+ // Build the object to represent this state & prepare for recursive calls
+ MachNode *mach = s->MachNodeGenerator( rule, C );
+ mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C );
+ assert( mach->_opnds[0] != NULL, "Missing result operand" );
+ Node *leaf = s->_leaf;
+ // Check for instruction or instruction chain rule
+ if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) {
+ // Instruction
+ mach->add_req( leaf->in(0) ); // Set initial control
+ // Reduce interior of complex instruction
+ ReduceInst_Interior( s, rule, mem, mach, 1 );
+ } else {
+ // Instruction chain rules are data-dependent on their inputs
+ mach->add_req(0); // Set initial control to none
+ ReduceInst_Chain_Rule( s, rule, mem, mach );
+ }
+
+ // If a Memory was used, insert a Memory edge
+ if( mem != (Node*)1 )
+ mach->ins_req(MemNode::Memory,mem);
+
+ // If the _leaf is an AddP, insert the base edge
+ if( leaf->Opcode() == Op_AddP )
+ mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
+
+ uint num_proj = _proj_list.size();
+
+ // Perform any 1-to-many expansions required
+ MachNode *ex = mach->Expand(s,_proj_list);
+ if( ex != mach ) {
+ assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
+ if( ex->in(1)->is_Con() )
+ ex->in(1)->set_req(0, C->root());
+ // Remove old node from the graph
+ for( uint i=0; i<mach->req(); i++ ) {
+ mach->set_req(i,NULL);
+ }
+ }
+
+ // PhaseChaitin::fixup_spills will sometimes generate spill code
+ // via the matcher. By the time, nodes have been wired into the CFG,
+ // and any further nodes generated by expand rules will be left hanging
+ // in space, and will not get emitted as output code. Catch this.
+ // Also, catch any new register allocation constraints ("projections")
+ // generated belatedly during spill code generation.
+ if (_allocation_started) {
+ guarantee(ex == mach, "no expand rules during spill generation");
+ guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
+ }
+
+ if (leaf->is_Con()) {
+ // Record the con for sharing
+ _shared_constants.map(leaf->_idx, ex);
+ }
+
+ return ex;
+}
+
+void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) {
+ // 'op' is what I am expecting to receive
+ int op = _leftOp[rule];
+ // Operand type to catch childs result
+ // This is what my child will give me.
+ int opnd_class_instance = s->_rule[op];
+ // Choose between operand class or not.
+ // This is what I will recieve.
+ int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
+ // New rule for child. Chase operand classes to get the actual rule.
+ int newrule = s->_rule[catch_op];
+
+ if( newrule < NUM_OPERANDS ) {
+ // Chain from operand or operand class, may be output of shared node
+ assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS,
+ "Bad AD file: Instruction chain rule must chain from operand");
+ // Insert operand into array of operands for this instruction
+ mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C );
+
+ ReduceOper( s, newrule, mem, mach );
+ } else {
+ // Chain from the result of an instruction
+ assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
+ mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C );
+ Node *mem1 = (Node*)1;
+ mach->add_req( ReduceInst(s, newrule, mem1) );
+ }
+ return;
+}
+
+
+uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
+ if( s->_leaf->is_Load() ) {
+ Node *mem2 = s->_leaf->in(MemNode::Memory);
+ assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
+ mem = mem2;
+ }
+ if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) {
+ if( mach->in(0) == NULL )
+ mach->set_req(0, s->_leaf->in(0));
+ }
+
+ // Now recursively walk the state tree & add operand list.
+ for( uint i=0; i<2; i++ ) { // binary tree
+ State *newstate = s->_kids[i];
+ if( newstate == NULL ) break; // Might only have 1 child
+ // 'op' is what I am expecting to receive
+ int op;
+ if( i == 0 ) {
+ op = _leftOp[rule];
+ } else {
+ op = _rightOp[rule];
+ }
+ // Operand type to catch childs result
+ // This is what my child will give me.
+ int opnd_class_instance = newstate->_rule[op];
+ // Choose between operand class or not.
+ // This is what I will receive.
+ int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
+ // New rule for child. Chase operand classes to get the actual rule.
+ int newrule = newstate->_rule[catch_op];
+
+ if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction?
+ // Operand/operandClass
+ // Insert operand into array of operands for this instruction
+ mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C );
+ ReduceOper( newstate, newrule, mem, mach );
+
+ } else { // Child is internal operand or new instruction
+ if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction?
+ // internal operand --> call ReduceInst_Interior
+ // Interior of complex instruction. Do nothing but recurse.
+ num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds );
+ } else {
+ // instruction --> call build operand( ) to catch result
+ // --> ReduceInst( newrule )
+ mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C );
+ Node *mem1 = (Node*)1;
+ mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
+ }
+ }
+ assert( mach->_opnds[num_opnds-1], "" );
+ }
+ return num_opnds;
+}
+
+// This routine walks the interior of possible complex operands.
+// At each point we check our children in the match tree:
+// (1) No children -
+// We are a leaf; add _leaf field as an input to the MachNode
+// (2) Child is an internal operand -
+// Skip over it ( do nothing )
+// (3) Child is an instruction -
+// Call ReduceInst recursively and
+// and instruction as an input to the MachNode
+void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
+ assert( rule < _LAST_MACH_OPER, "called with operand rule" );
+ State *kid = s->_kids[0];
+ assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" );
+
+ // Leaf? And not subsumed?
+ if( kid == NULL && !_swallowed[rule] ) {
+ mach->add_req( s->_leaf ); // Add leaf pointer
+ return; // Bail out
+ }
+
+ if( s->_leaf->is_Load() ) {
+ assert( mem == (Node*)1, "multiple Memories being matched at once?" );
+ mem = s->_leaf->in(MemNode::Memory);
+ }
+ if( s->_leaf->in(0) && s->_leaf->req() > 1) {
+ if( !mach->in(0) )
+ mach->set_req(0,s->_leaf->in(0));
+ else {
+ assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" );
+ }
+ }
+
+ for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) { // binary tree
+ int newrule;
+ if( i == 0 )
+ newrule = kid->_rule[_leftOp[rule]];
+ else
+ newrule = kid->_rule[_rightOp[rule]];
+
+ if( newrule < _LAST_MACH_OPER ) { // Operand or instruction?
+ // Internal operand; recurse but do nothing else
+ ReduceOper( kid, newrule, mem, mach );
+
+ } else { // Child is a new instruction
+ // Reduce the instruction, and add a direct pointer from this
+ // machine instruction to the newly reduced one.
+ Node *mem1 = (Node*)1;
+ mach->add_req( ReduceInst( kid, newrule, mem1 ) );
+ }
+ }
+}
+
+
+// -------------------------------------------------------------------------
+// Java-Java calling convention
+// (what you use when Java calls Java)
+
+//------------------------------find_receiver----------------------------------
+// For a given signature, return the OptoReg for parameter 0.
+OptoReg::Name Matcher::find_receiver( bool is_outgoing ) {
+ VMRegPair regs;
+ BasicType sig_bt = T_OBJECT;
+ calling_convention(&sig_bt, &regs, 1, is_outgoing);
+ // Return argument 0 register. In the LP64 build pointers
+ // take 2 registers, but the VM wants only the 'main' name.
+ return OptoReg::as_OptoReg(regs.first());
+}
+
+// A method-klass-holder may be passed in the inline_cache_reg
+// and then expanded into the inline_cache_reg and a method_oop register
+// defined in ad_<arch>.cpp
+
+
+//------------------------------find_shared------------------------------------
+// Set bits if Node is shared or otherwise a root
+void Matcher::find_shared( Node *n ) {
+ // Allocate stack of size C->unique() * 2 to avoid frequent realloc
+ MStack mstack(C->unique() * 2);
+ mstack.push(n, Visit); // Don't need to pre-visit root node
+ while (mstack.is_nonempty()) {
+ n = mstack.node(); // Leave node on stack
+ Node_State nstate = mstack.state();
+ if (nstate == Pre_Visit) {
+ if (is_visited(n)) { // Visited already?
+ // Node is shared and has no reason to clone. Flag it as shared.
+ // This causes it to match into a register for the sharing.
+ set_shared(n); // Flag as shared and
+ mstack.pop(); // remove node from stack
+ continue;
+ }
+ nstate = Visit; // Not already visited; so visit now
+ }
+ if (nstate == Visit) {
+ mstack.set_state(Post_Visit);
+ set_visited(n); // Flag as visited now
+ bool mem_op = false;
+
+ switch( n->Opcode() ) { // Handle some opcodes special
+ case Op_Phi: // Treat Phis as shared roots
+ case Op_Parm:
+ case Op_Proj: // All handled specially during matching
+ set_shared(n);
+ set_dontcare(n);
+ break;
+ case Op_If:
+ case Op_CountedLoopEnd:
+ mstack.set_state(Alt_Post_Visit); // Alternative way
+ // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)). Helps
+ // with matching cmp/branch in 1 instruction. The Matcher needs the
+ // Bool and CmpX side-by-side, because it can only get at constants
+ // that are at the leaves of Match trees, and the Bool's condition acts
+ // as a constant here.
+ mstack.push(n->in(1), Visit); // Clone the Bool
+ mstack.push(n->in(0), Pre_Visit); // Visit control input
+ continue; // while (mstack.is_nonempty())
+ case Op_ConvI2D: // These forms efficiently match with a prior
+ case Op_ConvI2F: // Load but not a following Store
+ if( n->in(1)->is_Load() && // Prior load
+ n->outcnt() == 1 && // Not already shared
+ n->unique_out()->is_Store() ) // Following store
+ set_shared(n); // Force it to be a root
+ break;
+ case Op_ReverseBytesI:
+ case Op_ReverseBytesL:
+ if( n->in(1)->is_Load() && // Prior load
+ n->outcnt() == 1 ) // Not already shared
+ set_shared(n); // Force it to be a root
+ break;
+ case Op_BoxLock: // Cant match until we get stack-regs in ADLC
+ case Op_IfFalse:
+ case Op_IfTrue:
+ case Op_MachProj:
+ case Op_MergeMem:
+ case Op_Catch:
+ case Op_CatchProj:
+ case Op_CProj:
+ case Op_JumpProj:
+ case Op_JProj:
+ case Op_NeverBranch:
+ set_dontcare(n);
+ break;
+ case Op_Jump:
+ mstack.push(n->in(1), Visit); // Switch Value
+ mstack.push(n->in(0), Pre_Visit); // Visit Control input
+ continue; // while (mstack.is_nonempty())
+ case Op_StrComp:
+ set_shared(n); // Force result into register (it will be anyways)
+ break;
+ case Op_ConP: { // Convert pointers above the centerline to NUL
+ TypeNode *tn = n->as_Type(); // Constants derive from type nodes
+ const TypePtr* tp = tn->type()->is_ptr();
+ if (tp->_ptr == TypePtr::AnyNull) {
+ tn->set_type(TypePtr::NULL_PTR);
+ }
+ break;
+ }
+ case Op_Binary: // These are introduced in the Post_Visit state.
+ ShouldNotReachHere();
+ break;
+ case Op_StoreB: // Do match these, despite no ideal reg
+ case Op_StoreC:
+ case Op_StoreCM:
+ case Op_StoreD:
+ case Op_StoreF:
+ case Op_StoreI:
+ case Op_StoreL:
+ case Op_StoreP:
+ case Op_Store16B:
+ case Op_Store8B:
+ case Op_Store4B:
+ case Op_Store8C:
+ case Op_Store4C:
+ case Op_Store2C:
+ case Op_Store4I:
+ case Op_Store2I:
+ case Op_Store2L:
+ case Op_Store4F:
+ case Op_Store2F:
+ case Op_Store2D:
+ case Op_ClearArray:
+ case Op_SafePoint:
+ mem_op = true;
+ break;
+ case Op_LoadB:
+ case Op_LoadC:
+ case Op_LoadD:
+ case Op_LoadF:
+ case Op_LoadI:
+ case Op_LoadKlass:
+ case Op_LoadL:
+ case Op_LoadS:
+ case Op_LoadP:
+ case Op_LoadRange:
+ case Op_LoadD_unaligned:
+ case Op_LoadL_unaligned:
+ case Op_Load16B:
+ case Op_Load8B:
+ case Op_Load4B:
+ case Op_Load4C:
+ case Op_Load2C:
+ case Op_Load8C:
+ case Op_Load8S:
+ case Op_Load4S:
+ case Op_Load2S:
+ case Op_Load4I:
+ case Op_Load2I:
+ case Op_Load2L:
+ case Op_Load4F:
+ case Op_Load2F:
+ case Op_Load2D:
+ mem_op = true;
+ // Must be root of match tree due to prior load conflict
+ if( C->subsume_loads() == false ) {
+ set_shared(n);
+ }
+ // Fall into default case
+ default:
+ if( !n->ideal_reg() )
+ set_dontcare(n); // Unmatchable Nodes
+ } // end_switch
+
+ for(int i = n->req() - 1; i >= 0; --i) { // For my children
+ Node *m = n->in(i); // Get ith input
+ if (m == NULL) continue; // Ignore NULLs
+ uint mop = m->Opcode();
+
+ // Must clone all producers of flags, or we will not match correctly.
+ // Suppose a compare setting int-flags is shared (e.g., a switch-tree)
+ // then it will match into an ideal Op_RegFlags. Alas, the fp-flags
+ // are also there, so we may match a float-branch to int-flags and
+ // expect the allocator to haul the flags from the int-side to the
+ // fp-side. No can do.
+ if( _must_clone[mop] ) {
+ mstack.push(m, Visit);
+ continue; // for(int i = ...)
+ }
+
+ // Clone addressing expressions as they are "free" in most instructions
+ if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
+ Node *off = m->in(AddPNode::Offset);
+ if( off->is_Con() ) {
+ set_visited(m); // Flag as visited now
+ Node *adr = m->in(AddPNode::Address);
+
+ // Intel, ARM and friends can handle 2 adds in addressing mode
+ if( clone_shift_expressions && adr->Opcode() == Op_AddP &&
+ // AtomicAdd is not an addressing expression.
+ // Cheap to find it by looking for screwy base.
+ !adr->in(AddPNode::Base)->is_top() ) {
+ set_visited(adr); // Flag as visited now
+ Node *shift = adr->in(AddPNode::Offset);
+ // Check for shift by small constant as well
+ if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
+ shift->in(2)->get_int() <= 3 ) {
+ set_visited(shift); // Flag as visited now
+ mstack.push(shift->in(2), Visit);
+#ifdef _LP64
+ // Allow Matcher to match the rule which bypass
+ // ConvI2L operation for an array index on LP64
+ // if the index value is positive.
+ if( shift->in(1)->Opcode() == Op_ConvI2L &&
+ shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) {
+ set_visited(shift->in(1)); // Flag as visited now
+ mstack.push(shift->in(1)->in(1), Pre_Visit);
+ } else
+#endif
+ mstack.push(shift->in(1), Pre_Visit);
+ } else {
+ mstack.push(shift, Pre_Visit);
+ }
+ mstack.push(adr->in(AddPNode::Address), Pre_Visit);
+ mstack.push(adr->in(AddPNode::Base), Pre_Visit);
+ } else { // Sparc, Alpha, PPC and friends
+ mstack.push(adr, Pre_Visit);
+ }
+
+ // Clone X+offset as it also folds into most addressing expressions
+ mstack.push(off, Visit);
+ mstack.push(m->in(AddPNode::Base), Pre_Visit);
+ continue; // for(int i = ...)
+ } // if( off->is_Con() )
+ } // if( mem_op &&
+ mstack.push(m, Pre_Visit);
+ } // for(int i = ...)
+ }
+ else if (nstate == Alt_Post_Visit) {
+ mstack.pop(); // Remove node from stack
+ // We cannot remove the Cmp input from the Bool here, as the Bool may be
+ // shared and all users of the Bool need to move the Cmp in parallel.
+ // This leaves both the Bool and the If pointing at the Cmp. To
+ // prevent the Matcher from trying to Match the Cmp along both paths
+ // BoolNode::match_edge always returns a zero.
+
+ // We reorder the Op_If in a pre-order manner, so we can visit without
+ // accidently sharing the Cmp (the Bool and the If make 2 users).
+ n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
+ }
+ else if (nstate == Post_Visit) {
+ mstack.pop(); // Remove node from stack
+
+ // Now hack a few special opcodes
+ switch( n->Opcode() ) { // Handle some opcodes special
+ case Op_StorePConditional:
+ case Op_StoreLConditional:
+ case Op_CompareAndSwapI:
+ case Op_CompareAndSwapL:
+ case Op_CompareAndSwapP: { // Convert trinary to binary-tree
+ Node *newval = n->in(MemNode::ValueIn );
+ Node *oldval = n->in(LoadStoreNode::ExpectedIn);
+ Node *pair = new (C, 3) BinaryNode( oldval, newval );
+ n->set_req(MemNode::ValueIn,pair);
+ n->del_req(LoadStoreNode::ExpectedIn);
+ break;
+ }
+ case Op_CMoveD: // Convert trinary to binary-tree
+ case Op_CMoveF:
+ case Op_CMoveI:
+ case Op_CMoveL:
+ case Op_CMoveP: {
+ // Restructure into a binary tree for Matching. It's possible that
+ // we could move this code up next to the graph reshaping for IfNodes
+ // or vice-versa, but I do not want to debug this for Ladybird.
+ // 10/2/2000 CNC.
+ Node *pair1 = new (C, 3) BinaryNode(n->in(1),n->in(1)->in(1));
+ n->set_req(1,pair1);
+ Node *pair2 = new (C, 3) BinaryNode(n->in(2),n->in(3));
+ n->set_req(2,pair2);
+ n->del_req(3);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ else {
+ ShouldNotReachHere();
+ }
+ } // end of while (mstack.is_nonempty())
+}
+
+#ifdef ASSERT
+// machine-independent root to machine-dependent root
+void Matcher::dump_old2new_map() {
+ _old2new_map.dump();
+}
+#endif
+
+//---------------------------collect_null_checks-------------------------------
+// Find null checks in the ideal graph; write a machine-specific node for
+// it. Used by later implicit-null-check handling. Actually collects
+// either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
+// value being tested.
+void Matcher::collect_null_checks( Node *proj ) {
+ Node *iff = proj->in(0);
+ if( iff->Opcode() == Op_If ) {
+ // During matching If's have Bool & Cmp side-by-side
+ BoolNode *b = iff->in(1)->as_Bool();
+ Node *cmp = iff->in(2);
+ if( cmp->Opcode() == Op_CmpP ) {
+ if( cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
+
+ if( proj->Opcode() == Op_IfTrue ) {
+ extern int all_null_checks_found;
+ all_null_checks_found++;
+ if( b->_test._test == BoolTest::ne ) {
+ _null_check_tests.push(proj);
+ _null_check_tests.push(cmp->in(1));
+ }
+ } else {
+ assert( proj->Opcode() == Op_IfFalse, "" );
+ if( b->_test._test == BoolTest::eq ) {
+ _null_check_tests.push(proj);
+ _null_check_tests.push(cmp->in(1));
+ }
+ }
+ }
+ }
+ }
+}
+
+//---------------------------validate_null_checks------------------------------
+// Its possible that the value being NULL checked is not the root of a match
+// tree. If so, I cannot use the value in an implicit null check.
+void Matcher::validate_null_checks( ) {
+ uint cnt = _null_check_tests.size();
+ for( uint i=0; i < cnt; i+=2 ) {
+ Node *test = _null_check_tests[i];
+ Node *val = _null_check_tests[i+1];
+ if (has_new_node(val)) {
+ // Is a match-tree root, so replace with the matched value
+ _null_check_tests.map(i+1, new_node(val));
+ } else {
+ // Yank from candidate list
+ _null_check_tests.map(i+1,_null_check_tests[--cnt]);
+ _null_check_tests.map(i,_null_check_tests[--cnt]);
+ _null_check_tests.pop();
+ _null_check_tests.pop();
+ i-=2;
+ }
+ }
+}
+
+
+// Used by the DFA in dfa_sparc.cpp. Check for a prior FastLock
+// acting as an Acquire and thus we don't need an Acquire here. We
+// retain the Node to act as a compiler ordering barrier.
+bool Matcher::prior_fast_lock( const Node *acq ) {
+ Node *r = acq->in(0);
+ if( !r->is_Region() || r->req() <= 1 ) return false;
+ Node *proj = r->in(1);
+ if( !proj->is_Proj() ) return false;
+ Node *call = proj->in(0);
+ if( !call->is_Call() || call->as_Call()->entry_point() != OptoRuntime::complete_monitor_locking_Java() )
+ return false;
+
+ return true;
+}
+
+// Used by the DFA in dfa_sparc.cpp. Check for a following FastUnLock
+// acting as a Release and thus we don't need a Release here. We
+// retain the Node to act as a compiler ordering barrier.
+bool Matcher::post_fast_unlock( const Node *rel ) {
+ Compile *C = Compile::current();
+ assert( rel->Opcode() == Op_MemBarRelease, "" );
+ const MemBarReleaseNode *mem = (const MemBarReleaseNode*)rel;
+ DUIterator_Fast imax, i = mem->fast_outs(imax);
+ Node *ctrl = NULL;
+ while( true ) {
+ ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found
+ assert( ctrl->is_Proj(), "only projections here" );
+ ProjNode *proj = (ProjNode*)ctrl;
+ if( proj->_con == TypeFunc::Control &&
+ !C->node_arena()->contains(ctrl) ) // Unmatched old-space only
+ break;
+ i++;
+ }
+ Node *iff = NULL;
+ for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) {
+ Node *x = ctrl->fast_out(j);
+ if( x->is_If() && x->req() > 1 &&
+ !C->node_arena()->contains(x) ) { // Unmatched old-space only
+ iff = x;
+ break;
+ }
+ }
+ if( !iff ) return false;
+ Node *bol = iff->in(1);
+ // The iff might be some random subclass of If or bol might be Con-Top
+ if (!bol->is_Bool()) return false;
+ assert( bol->req() > 1, "" );
+ return (bol->in(1)->Opcode() == Op_FastUnlock);
+}
+
+// Used by the DFA in dfa_xxx.cpp. Check for a following barrier or
+// atomic instruction acting as a store_load barrier without any
+// intervening volatile load, and thus we don't need a barrier here.
+// We retain the Node to act as a compiler ordering barrier.
+bool Matcher::post_store_load_barrier(const Node *vmb) {
+ Compile *C = Compile::current();
+ assert( vmb->is_MemBar(), "" );
+ assert( vmb->Opcode() != Op_MemBarAcquire, "" );
+ const MemBarNode *mem = (const MemBarNode*)vmb;
+
+ // Get the Proj node, ctrl, that can be used to iterate forward
+ Node *ctrl = NULL;
+ DUIterator_Fast imax, i = mem->fast_outs(imax);
+ while( true ) {
+ ctrl = mem->fast_out(i); // Throw out-of-bounds if proj not found
+ assert( ctrl->is_Proj(), "only projections here" );
+ ProjNode *proj = (ProjNode*)ctrl;
+ if( proj->_con == TypeFunc::Control &&
+ !C->node_arena()->contains(ctrl) ) // Unmatched old-space only
+ break;
+ i++;
+ }
+
+ for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) {
+ Node *x = ctrl->fast_out(j);
+ int xop = x->Opcode();
+
+ // We don't need current barrier if we see another or a lock
+ // before seeing volatile load.
+ //
+ // Op_Fastunlock previously appeared in the Op_* list below.
+ // With the advent of 1-0 lock operations we're no longer guaranteed
+ // that a monitor exit operation contains a serializing instruction.
+
+ if (xop == Op_MemBarVolatile ||
+ xop == Op_FastLock ||
+ xop == Op_CompareAndSwapL ||
+ xop == Op_CompareAndSwapP ||
+ xop == Op_CompareAndSwapI)
+ return true;
+
+ if (x->is_MemBar()) {
+ // We must retain this membar if there is an upcoming volatile
+ // load, which will be preceded by acquire membar.
+ if (xop == Op_MemBarAcquire)
+ return false;
+ // For other kinds of barriers, check by pretending we
+ // are them, and seeing if we can be removed.
+ else
+ return post_store_load_barrier((const MemBarNode*)x);
+ }
+
+ // Delicate code to detect case of an upcoming fastlock block
+ if( x->is_If() && x->req() > 1 &&
+ !C->node_arena()->contains(x) ) { // Unmatched old-space only
+ Node *iff = x;
+ Node *bol = iff->in(1);
+ // The iff might be some random subclass of If or bol might be Con-Top
+ if (!bol->is_Bool()) return false;
+ assert( bol->req() > 1, "" );
+ return (bol->in(1)->Opcode() == Op_FastUnlock);
+ }
+ // probably not necessary to check for these
+ if (x->is_Call() || x->is_SafePoint() || x->is_block_proj())
+ return false;
+ }
+ return false;
+}
+
+//=============================================================================
+//---------------------------State---------------------------------------------
+State::State(void) {
+#ifdef ASSERT
+ _id = 0;
+ _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
+ _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
+ //memset(_cost, -1, sizeof(_cost));
+ //memset(_rule, -1, sizeof(_rule));
+#endif
+ memset(_valid, 0, sizeof(_valid));
+}
+
+#ifdef ASSERT
+State::~State() {
+ _id = 99;
+ _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
+ _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
+ memset(_cost, -3, sizeof(_cost));
+ memset(_rule, -3, sizeof(_rule));
+}
+#endif
+
+#ifndef PRODUCT
+//---------------------------dump----------------------------------------------
+void State::dump() {
+ tty->print("\n");
+ dump(0);
+}
+
+void State::dump(int depth) {
+ for( int j = 0; j < depth; j++ )
+ tty->print(" ");
+ tty->print("--N: ");
+ _leaf->dump();
+ uint i;
+ for( i = 0; i < _LAST_MACH_OPER; i++ )
+ // Check for valid entry
+ if( valid(i) ) {
+ for( int j = 0; j < depth; j++ )
+ tty->print(" ");
+ assert(_cost[i] != max_juint, "cost must be a valid value");
+ assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule");
+ tty->print_cr("%s %d %s",
+ ruleName[i], _cost[i], ruleName[_rule[i]] );
+ }
+ tty->print_cr("");
+
+ for( i=0; i<2; i++ )
+ if( _kids[i] )
+ _kids[i]->dump(depth+1);
+}
+#endif
diff --git a/src/share/vm/opto/matcher.hpp b/src/share/vm/opto/matcher.hpp
new file mode 100644
index 000000000..a33c4e92d
--- /dev/null
+++ b/src/share/vm/opto/matcher.hpp
@@ -0,0 +1,392 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Compile;
+class Node;
+class MachNode;
+class MachTypeNode;
+class MachOper;
+
+//---------------------------Matcher-------------------------------------------
+class Matcher : public PhaseTransform {
+ friend class VMStructs;
+ // Private arena of State objects
+ ResourceArea _states_arena;
+
+ VectorSet _visited; // Visit bits
+
+ // Used to control the Label pass
+ VectorSet _shared; // Shared Ideal Node
+ VectorSet _dontcare; // Nothing the matcher cares about
+
+ // Private methods which perform the actual matching and reduction
+ // Walks the label tree, generating machine nodes
+ MachNode *ReduceInst( State *s, int rule, Node *&mem);
+ void ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach);
+ uint ReduceInst_Interior(State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds);
+ void ReduceOper( State *s, int newrule, Node *&mem, MachNode *mach );
+
+ // If this node already matched using "rule", return the MachNode for it.
+ MachNode* find_shared_constant(Node* con, uint rule);
+
+ // Convert a dense opcode number to an expanded rule number
+ const int *_reduceOp;
+ const int *_leftOp;
+ const int *_rightOp;
+
+ // Map dense opcode number to info on when rule is swallowed constant.
+ const bool *_swallowed;
+
+ // Map dense rule number to determine if this is an instruction chain rule
+ const uint _begin_inst_chain_rule;
+ const uint _end_inst_chain_rule;
+
+ // We want to clone constants and possible CmpI-variants.
+ // If we do not clone CmpI, then we can have many instances of
+ // condition codes alive at once. This is OK on some chips and
+ // bad on others. Hence the machine-dependent table lookup.
+ const char *_must_clone;
+
+ // Find shared Nodes, or Nodes that otherwise are Matcher roots
+ void find_shared( Node *n );
+
+ // Debug and profile information for nodes in old space:
+ GrowableArray<Node_Notes*>* _old_node_note_array;
+
+ // Node labeling iterator for instruction selection
+ Node *Label_Root( const Node *n, State *svec, Node *control, const Node *mem );
+
+ Node *transform( Node *dummy );
+
+ Node_List &_proj_list; // For Machine nodes killing many values
+
+ Node_Array _shared_constants;
+
+ debug_only(Node_Array _old2new_map;) // Map roots of ideal-trees to machine-roots
+
+ // Accessors for the inherited field PhaseTransform::_nodes:
+ void grow_new_node_array(uint idx_limit) {
+ _nodes.map(idx_limit-1, NULL);
+ }
+ bool has_new_node(const Node* n) const {
+ return _nodes.at(n->_idx) != NULL;
+ }
+ Node* new_node(const Node* n) const {
+ assert(has_new_node(n), "set before get");
+ return _nodes.at(n->_idx);
+ }
+ void set_new_node(const Node* n, Node *nn) {
+ assert(!has_new_node(n), "set only once");
+ _nodes.map(n->_idx, nn);
+ }
+
+#ifdef ASSERT
+ // Make sure only new nodes are reachable from this node
+ void verify_new_nodes_only(Node* root);
+#endif
+
+public:
+ int LabelRootDepth;
+ static const int base2reg[]; // Map Types to machine register types
+ // Convert ideal machine register to a register mask for spill-loads
+ static const RegMask *idealreg2regmask[];
+ RegMask *idealreg2spillmask[_last_machine_leaf];
+ RegMask *idealreg2debugmask[_last_machine_leaf];
+ void init_spill_mask( Node *ret );
+ // Convert machine register number to register mask
+ static uint mreg2regmask_max;
+ static RegMask mreg2regmask[];
+ static RegMask STACK_ONLY_mask;
+
+ bool is_shared( Node *n ) { return _shared.test(n->_idx) != 0; }
+ void set_shared( Node *n ) { _shared.set(n->_idx); }
+ bool is_visited( Node *n ) { return _visited.test(n->_idx) != 0; }
+ void set_visited( Node *n ) { _visited.set(n->_idx); }
+ bool is_dontcare( Node *n ) { return _dontcare.test(n->_idx) != 0; }
+ void set_dontcare( Node *n ) { _dontcare.set(n->_idx); }
+
+ // Mode bit to tell DFA and expand rules whether we are running after
+ // (or during) register selection. Usually, the matcher runs before,
+ // but it will also get called to generate post-allocation spill code.
+ // In this situation, it is a deadly error to attempt to allocate more
+ // temporary registers.
+ bool _allocation_started;
+
+ // Machine register names
+ static const char *regName[];
+ // Machine register encodings
+ static const unsigned char _regEncode[];
+ // Machine Node names
+ const char **_ruleName;
+ // Rules that are cheaper to rematerialize than to spill
+ static const uint _begin_rematerialize;
+ static const uint _end_rematerialize;
+
+ // An array of chars, from 0 to _last_Mach_Reg.
+ // No Save = 'N' (for register windows)
+ // Save on Entry = 'E'
+ // Save on Call = 'C'
+ // Always Save = 'A' (same as SOE + SOC)
+ const char *_register_save_policy;
+ const char *_c_reg_save_policy;
+ // Convert a machine register to a machine register type, so-as to
+ // properly match spill code.
+ const int *_register_save_type;
+ // Maps from machine register to boolean; true if machine register can
+ // be holding a call argument in some signature.
+ static bool can_be_java_arg( int reg );
+ // Maps from machine register to boolean; true if machine register holds
+ // a spillable argument.
+ static bool is_spillable_arg( int reg );
+
+ // List of IfFalse or IfTrue Nodes that indicate a taken null test.
+ // List is valid in the post-matching space.
+ Node_List _null_check_tests;
+ void collect_null_checks( Node *proj );
+ void validate_null_checks( );
+
+ Matcher( Node_List &proj_list );
+
+ // Select instructions for entire method
+ void match( );
+ // Helper for match
+ OptoReg::Name warp_incoming_stk_arg( VMReg reg );
+
+ // Transform, then walk. Does implicit DCE while walking.
+ // Name changed from "transform" to avoid it being virtual.
+ Node *xform( Node *old_space_node, int Nodes );
+
+ // Match a single Ideal Node - turn it into a 1-Node tree; Label & Reduce.
+ MachNode *match_tree( const Node *n );
+ MachNode *match_sfpt( SafePointNode *sfpt );
+ // Helper for match_sfpt
+ OptoReg::Name warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call );
+
+ // Initialize first stack mask and related masks.
+ void init_first_stack_mask();
+
+ // If we should save-on-entry this register
+ bool is_save_on_entry( int reg );
+
+ // Fixup the save-on-entry registers
+ void Fixup_Save_On_Entry( );
+
+ // --- Frame handling ---
+
+ // Register number of the stack slot corresponding to the incoming SP.
+ // Per the Big Picture in the AD file, it is:
+ // SharedInfo::stack0 + locks + in_preserve_stack_slots + pad2.
+ OptoReg::Name _old_SP;
+
+ // Register number of the stack slot corresponding to the highest incoming
+ // argument on the stack. Per the Big Picture in the AD file, it is:
+ // _old_SP + out_preserve_stack_slots + incoming argument size.
+ OptoReg::Name _in_arg_limit;
+
+ // Register number of the stack slot corresponding to the new SP.
+ // Per the Big Picture in the AD file, it is:
+ // _in_arg_limit + pad0
+ OptoReg::Name _new_SP;
+
+ // Register number of the stack slot corresponding to the highest outgoing
+ // argument on the stack. Per the Big Picture in the AD file, it is:
+ // _new_SP + max outgoing arguments of all calls
+ OptoReg::Name _out_arg_limit;
+
+ OptoRegPair *_parm_regs; // Array of machine registers per argument
+ RegMask *_calling_convention_mask; // Array of RegMasks per argument
+
+ // Does matcher support this ideal node?
+ static const bool has_match_rule(int opcode);
+ static const bool _hasMatchRule[_last_opcode];
+
+ // Used to determine if we have fast l2f conversion
+ // USII has it, USIII doesn't
+ static const bool convL2FSupported(void);
+
+ // Vector width in bytes
+ static const uint vector_width_in_bytes(void);
+
+ // Vector ideal reg
+ static const uint vector_ideal_reg(void);
+
+ // Used to determine a "low complexity" 64-bit constant. (Zero is simple.)
+ // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
+ // Depends on the details of 64-bit constant generation on the CPU.
+ static const bool isSimpleConstant64(jlong con);
+
+ // These calls are all generated by the ADLC
+
+ // TRUE - grows up, FALSE - grows down (Intel)
+ virtual bool stack_direction() const;
+
+ // Java-Java calling convention
+ // (what you use when Java calls Java)
+
+ // Alignment of stack in bytes, standard Intel word alignment is 4.
+ // Sparc probably wants at least double-word (8).
+ static uint stack_alignment_in_bytes();
+ // Alignment of stack, measured in stack slots.
+ // The size of stack slots is defined by VMRegImpl::stack_slot_size.
+ static uint stack_alignment_in_slots() {
+ return stack_alignment_in_bytes() / (VMRegImpl::stack_slot_size);
+ }
+
+ // Array mapping arguments to registers. Argument 0 is usually the 'this'
+ // pointer. Registers can include stack-slots and regular registers.
+ static void calling_convention( BasicType *, VMRegPair *, uint len, bool is_outgoing );
+
+ // Convert a sig into a calling convention register layout
+ // and find interesting things about it.
+ static OptoReg::Name find_receiver( bool is_outgoing );
+ // Return address register. On Intel it is a stack-slot. On PowerPC
+ // it is the Link register. On Sparc it is r31?
+ virtual OptoReg::Name return_addr() const;
+ RegMask _return_addr_mask;
+ // Return value register. On Intel it is EAX. On Sparc i0/o0.
+ static OptoRegPair return_value(int ideal_reg, bool is_outgoing);
+ static OptoRegPair c_return_value(int ideal_reg, bool is_outgoing);
+ RegMask _return_value_mask;
+ // Inline Cache Register
+ static OptoReg::Name inline_cache_reg();
+ static const RegMask &inline_cache_reg_mask();
+ static int inline_cache_reg_encode();
+
+ // Register for DIVI projection of divmodI
+ static RegMask divI_proj_mask();
+ // Register for MODI projection of divmodI
+ static RegMask modI_proj_mask();
+
+ // Register for DIVL projection of divmodL
+ static RegMask divL_proj_mask();
+ // Register for MODL projection of divmodL
+ static RegMask modL_proj_mask();
+
+ // Java-Interpreter calling convention
+ // (what you use when calling between compiled-Java and Interpreted-Java
+
+ // Number of callee-save + always-save registers
+ // Ignores frame pointer and "special" registers
+ static int number_of_saved_registers();
+
+ // The Method-klass-holder may be passed in the inline_cache_reg
+ // and then expanded into the inline_cache_reg and a method_oop register
+
+ static OptoReg::Name interpreter_method_oop_reg();
+ static const RegMask &interpreter_method_oop_reg_mask();
+ static int interpreter_method_oop_reg_encode();
+
+ static OptoReg::Name compiler_method_oop_reg();
+ static const RegMask &compiler_method_oop_reg_mask();
+ static int compiler_method_oop_reg_encode();
+
+ // Interpreter's Frame Pointer Register
+ static OptoReg::Name interpreter_frame_pointer_reg();
+ static const RegMask &interpreter_frame_pointer_reg_mask();
+
+ // Java-Native calling convention
+ // (what you use when intercalling between Java and C++ code)
+
+ // Array mapping arguments to registers. Argument 0 is usually the 'this'
+ // pointer. Registers can include stack-slots and regular registers.
+ static void c_calling_convention( BasicType*, VMRegPair *, uint );
+ // Frame pointer. The frame pointer is kept at the base of the stack
+ // and so is probably the stack pointer for most machines. On Intel
+ // it is ESP. On the PowerPC it is R1. On Sparc it is SP.
+ OptoReg::Name c_frame_pointer() const;
+ static RegMask c_frame_ptr_mask;
+
+ // !!!!! Special stuff for building ScopeDescs
+ virtual int regnum_to_fpu_offset(int regnum);
+
+ // Is this branch offset small enough to be addressed by a short branch?
+ bool is_short_branch_offset(int offset);
+
+ // Optional scaling for the parameter to the ClearArray/CopyArray node.
+ static const bool init_array_count_is_in_bytes;
+
+ // Threshold small size (in bytes) for a ClearArray/CopyArray node.
+ // Anything this size or smaller may get converted to discrete scalar stores.
+ static const int init_array_short_size;
+
+ // Should the Matcher clone shifts on addressing modes, expecting them to
+ // be subsumed into complex addressing expressions or compute them into
+ // registers? True for Intel but false for most RISCs
+ static const bool clone_shift_expressions;
+
+ // Is it better to copy float constants, or load them directly from memory?
+ // Intel can load a float constant from a direct address, requiring no
+ // extra registers. Most RISCs will have to materialize an address into a
+ // register first, so they may as well materialize the constant immediately.
+ static const bool rematerialize_float_constants;
+
+ // If CPU can load and store mis-aligned doubles directly then no fixup is
+ // needed. Else we split the double into 2 integer pieces and move it
+ // piece-by-piece. Only happens when passing doubles into C code or when
+ // calling i2c adapters as the Java calling convention forces doubles to be
+ // aligned.
+ static const bool misaligned_doubles_ok;
+
+ // Perform a platform dependent implicit null fixup. This is needed
+ // on windows95 to take care of some unusual register constraints.
+ void pd_implicit_null_fixup(MachNode *load, uint idx);
+
+ // Advertise here if the CPU requires explicit rounding operations
+ // to implement the UseStrictFP mode.
+ static const bool strict_fp_requires_explicit_rounding;
+
+ // Do floats take an entire double register or just half?
+ static const bool float_in_double;
+ // Do ints take an entire long register or just half?
+ static const bool int_in_long;
+
+ // This routine is run whenever a graph fails to match.
+ // If it returns, the compiler should bailout to interpreter without error.
+ // In non-product mode, SoftMatchFailure is false to detect non-canonical
+ // graphs. Print a message and exit.
+ static void soft_match_failure() {
+ if( SoftMatchFailure ) return;
+ else { fatal("SoftMatchFailure is not allowed except in product"); }
+ }
+
+ // Used by the DFA in dfa_sparc.cpp. Check for a prior FastLock
+ // acting as an Acquire and thus we don't need an Acquire here. We
+ // retain the Node to act as a compiler ordering barrier.
+ static bool prior_fast_lock( const Node *acq );
+
+ // Used by the DFA in dfa_sparc.cpp. Check for a following
+ // FastUnLock acting as a Release and thus we don't need a Release
+ // here. We retain the Node to act as a compiler ordering barrier.
+ static bool post_fast_unlock( const Node *rel );
+
+ // Check for a following volatile memory barrier without an
+ // intervening load and thus we don't need a barrier here. We
+ // retain the Node to act as a compiler ordering barrier.
+ static bool post_store_load_barrier(const Node* mb);
+
+
+#ifdef ASSERT
+ void dump_old2new_map(); // machine-independent to machine-dependent
+#endif
+};
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
new file mode 100644
index 000000000..26904be58
--- /dev/null
+++ b/src/share/vm/opto/memnode.cpp
@@ -0,0 +1,3222 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_memnode.cpp.incl"
+
+//=============================================================================
+uint MemNode::size_of() const { return sizeof(*this); }
+
+const TypePtr *MemNode::adr_type() const {
+ Node* adr = in(Address);
+ const TypePtr* cross_check = NULL;
+ DEBUG_ONLY(cross_check = _adr_type);
+ return calculate_adr_type(adr->bottom_type(), cross_check);
+}
+
+#ifndef PRODUCT
+void MemNode::dump_spec(outputStream *st) const {
+ if (in(Address) == NULL) return; // node is dead
+#ifndef ASSERT
+ // fake the missing field
+ const TypePtr* _adr_type = NULL;
+ if (in(Address) != NULL)
+ _adr_type = in(Address)->bottom_type()->isa_ptr();
+#endif
+ dump_adr_type(this, _adr_type, st);
+
+ Compile* C = Compile::current();
+ if( C->alias_type(_adr_type)->is_volatile() )
+ st->print(" Volatile!");
+}
+
+void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) {
+ st->print(" @");
+ if (adr_type == NULL) {
+ st->print("NULL");
+ } else {
+ adr_type->dump_on(st);
+ Compile* C = Compile::current();
+ Compile::AliasType* atp = NULL;
+ if (C->have_alias_type(adr_type)) atp = C->alias_type(adr_type);
+ if (atp == NULL)
+ st->print(", idx=?\?;");
+ else if (atp->index() == Compile::AliasIdxBot)
+ st->print(", idx=Bot;");
+ else if (atp->index() == Compile::AliasIdxTop)
+ st->print(", idx=Top;");
+ else if (atp->index() == Compile::AliasIdxRaw)
+ st->print(", idx=Raw;");
+ else {
+ ciField* field = atp->field();
+ if (field) {
+ st->print(", name=");
+ field->print_name_on(st);
+ }
+ st->print(", idx=%d;", atp->index());
+ }
+ }
+}
+
+extern void print_alias_types();
+
+#endif
+
+//--------------------------Ideal_common---------------------------------------
+// Look for degenerate control and memory inputs. Bypass MergeMem inputs.
+// Unhook non-raw memories from complete (macro-expanded) initializations.
+Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
+ // If our control input is a dead region, kill all below the region
+ Node *ctl = in(MemNode::Control);
+ if (ctl && remove_dead_region(phase, can_reshape))
+ return this;
+
+ // Ignore if memory is dead, or self-loop
+ Node *mem = in(MemNode::Memory);
+ if( phase->type( mem ) == Type::TOP ) return NodeSentinel; // caller will return NULL
+ assert( mem != this, "dead loop in MemNode::Ideal" );
+
+ Node *address = in(MemNode::Address);
+ const Type *t_adr = phase->type( address );
+ if( t_adr == Type::TOP ) return NodeSentinel; // caller will return NULL
+
+ // Avoid independent memory operations
+ Node* old_mem = mem;
+
+ if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+ InitializeNode* init = mem->in(0)->as_Initialize();
+ if (init->is_complete()) { // i.e., after macro expansion
+ const TypePtr* tp = t_adr->is_ptr();
+ uint alias_idx = phase->C->get_alias_index(tp);
+ // Free this slice from the init. It was hooked, temporarily,
+ // by GraphKit::set_output_for_allocation.
+ if (alias_idx > Compile::AliasIdxRaw) {
+ mem = init->memory(alias_idx);
+ // ...but not with the raw-pointer slice.
+ }
+ }
+ }
+
+ if (mem->is_MergeMem()) {
+ MergeMemNode* mmem = mem->as_MergeMem();
+ const TypePtr *tp = t_adr->is_ptr();
+ uint alias_idx = phase->C->get_alias_index(tp);
+#ifdef ASSERT
+ {
+ // Check that current type is consistent with the alias index used during graph construction
+ assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx");
+ const TypePtr *adr_t = adr_type();
+ bool consistent = adr_t == NULL || adr_t->empty() || phase->C->must_alias(adr_t, alias_idx );
+ // Sometimes dead array references collapse to a[-1], a[-2], or a[-3]
+ if( !consistent && adr_t != NULL && !adr_t->empty() &&
+ tp->isa_aryptr() && tp->offset() == Type::OffsetBot &&
+ adr_t->isa_aryptr() && adr_t->offset() != Type::OffsetBot &&
+ ( adr_t->offset() == arrayOopDesc::length_offset_in_bytes() ||
+ adr_t->offset() == oopDesc::klass_offset_in_bytes() ||
+ adr_t->offset() == oopDesc::mark_offset_in_bytes() ) ) {
+ // don't assert if it is dead code.
+ consistent = true;
+ }
+ if( !consistent ) {
+ tty->print("alias_idx==%d, adr_type()==", alias_idx); if( adr_t == NULL ) { tty->print("NULL"); } else { adr_t->dump(); }
+ tty->cr();
+ print_alias_types();
+ assert(consistent, "adr_type must match alias idx");
+ }
+ }
+#endif
+ // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally
+ // means an array I have not precisely typed yet. Do not do any
+ // alias stuff with it any time soon.
+ const TypeInstPtr *tinst = tp->isa_instptr();
+ if( tp->base() != Type::AnyPtr &&
+ !(tinst &&
+ tinst->klass()->is_java_lang_Object() &&
+ tinst->offset() == Type::OffsetBot) ) {
+ // compress paths and change unreachable cycles to TOP
+ // If not, we can update the input infinitely along a MergeMem cycle
+ // Equivalent code in PhiNode::Ideal
+ Node* m = phase->transform(mmem);
+ // If tranformed to a MergeMem, get the desired slice
+ // Otherwise the returned node represents memory for every slice
+ mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m;
+ // Update input if it is progress over what we have now
+ }
+ }
+
+ if (mem != old_mem) {
+ set_req(MemNode::Memory, mem);
+ return this;
+ }
+
+ // let the subclass continue analyzing...
+ return NULL;
+}
+
+// Helper function for proving some simple control dominations.
+// Attempt to prove that control input 'dom' dominates (or equals) 'sub'.
+// Already assumes that 'dom' is available at 'sub', and that 'sub'
+// is not a constant (dominated by the method's StartNode).
+// Used by MemNode::find_previous_store to prove that the
+// control input of a memory operation predates (dominates)
+// an allocation it wants to look past.
+bool MemNode::detect_dominating_control(Node* dom, Node* sub) {
+ if (dom == NULL) return false;
+ if (dom->is_Proj()) dom = dom->in(0);
+ if (dom->is_Start()) return true; // anything inside the method
+ if (dom->is_Root()) return true; // dom 'controls' a constant
+ int cnt = 20; // detect cycle or too much effort
+ while (sub != NULL) { // walk 'sub' up the chain to 'dom'
+ if (--cnt < 0) return false; // in a cycle or too complex
+ if (sub == dom) return true;
+ if (sub->is_Start()) return false;
+ if (sub->is_Root()) return false;
+ Node* up = sub->in(0);
+ if (sub == up && sub->is_Region()) {
+ for (uint i = 1; i < sub->req(); i++) {
+ Node* in = sub->in(i);
+ if (in != NULL && !in->is_top() && in != sub) {
+ up = in; break; // take any path on the way up to 'dom'
+ }
+ }
+ }
+ if (sub == up) return false; // some kind of tight cycle
+ sub = up;
+ }
+ return false;
+}
+
+//---------------------detect_ptr_independence---------------------------------
+// Used by MemNode::find_previous_store to prove that two base
+// pointers are never equal.
+// The pointers are accompanied by their associated allocations,
+// if any, which have been previously discovered by the caller.
+bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1,
+ Node* p2, AllocateNode* a2,
+ PhaseTransform* phase) {
+ // Attempt to prove that these two pointers cannot be aliased.
+ // They may both manifestly be allocations, and they should differ.
+ // Or, if they are not both allocations, they can be distinct constants.
+ // Otherwise, one is an allocation and the other a pre-existing value.
+ if (a1 == NULL && a2 == NULL) { // neither an allocation
+ return (p1 != p2) && p1->is_Con() && p2->is_Con();
+ } else if (a1 != NULL && a2 != NULL) { // both allocations
+ return (a1 != a2);
+ } else if (a1 != NULL) { // one allocation a1
+ // (Note: p2->is_Con implies p2->in(0)->is_Root, which dominates.)
+ return detect_dominating_control(p2->in(0), a1->in(0));
+ } else { //(a2 != NULL) // one allocation a2
+ return detect_dominating_control(p1->in(0), a2->in(0));
+ }
+ return false;
+}
+
+
+// The logic for reordering loads and stores uses four steps:
+// (a) Walk carefully past stores and initializations which we
+// can prove are independent of this load.
+// (b) Observe that the next memory state makes an exact match
+// with self (load or store), and locate the relevant store.
+// (c) Ensure that, if we were to wire self directly to the store,
+// the optimizer would fold it up somehow.
+// (d) Do the rewiring, and return, depending on some other part of
+// the optimizer to fold up the load.
+// This routine handles steps (a) and (b). Steps (c) and (d) are
+// specific to loads and stores, so they are handled by the callers.
+// (Currently, only LoadNode::Ideal has steps (c), (d). More later.)
+//
+Node* MemNode::find_previous_store(PhaseTransform* phase) {
+ Node* ctrl = in(MemNode::Control);
+ Node* adr = in(MemNode::Address);
+ intptr_t offset = 0;
+ Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+ AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase);
+
+ if (offset == Type::OffsetBot)
+ return NULL; // cannot unalias unless there are precise offsets
+
+ intptr_t size_in_bytes = memory_size();
+
+ Node* mem = in(MemNode::Memory); // start searching here...
+
+ int cnt = 50; // Cycle limiter
+ for (;;) { // While we can dance past unrelated stores...
+ if (--cnt < 0) break; // Caught in cycle or a complicated dance?
+
+ if (mem->is_Store()) {
+ Node* st_adr = mem->in(MemNode::Address);
+ intptr_t st_offset = 0;
+ Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
+ if (st_base == NULL)
+ break; // inscrutable pointer
+ if (st_offset != offset && st_offset != Type::OffsetBot) {
+ const int MAX_STORE = BytesPerLong;
+ if (st_offset >= offset + size_in_bytes ||
+ st_offset <= offset - MAX_STORE ||
+ st_offset <= offset - mem->as_Store()->memory_size()) {
+ // Success: The offsets are provably independent.
+ // (You may ask, why not just test st_offset != offset and be done?
+ // The answer is that stores of different sizes can co-exist
+ // in the same sequence of RawMem effects. We sometimes initialize
+ // a whole 'tile' of array elements with a single jint or jlong.)
+ mem = mem->in(MemNode::Memory);
+ continue; // (a) advance through independent store memory
+ }
+ }
+ if (st_base != base &&
+ detect_ptr_independence(base, alloc,
+ st_base,
+ AllocateNode::Ideal_allocation(st_base, phase),
+ phase)) {
+ // Success: The bases are provably independent.
+ mem = mem->in(MemNode::Memory);
+ continue; // (a) advance through independent store memory
+ }
+
+ // (b) At this point, if the bases or offsets do not agree, we lose,
+ // since we have not managed to prove 'this' and 'mem' independent.
+ if (st_base == base && st_offset == offset) {
+ return mem; // let caller handle steps (c), (d)
+ }
+
+ } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+ InitializeNode* st_init = mem->in(0)->as_Initialize();
+ AllocateNode* st_alloc = st_init->allocation();
+ if (st_alloc == NULL)
+ break; // something degenerated
+ bool known_identical = false;
+ bool known_independent = false;
+ if (alloc == st_alloc)
+ known_identical = true;
+ else if (alloc != NULL)
+ known_independent = true;
+ else if (ctrl != NULL &&
+ detect_dominating_control(ctrl, st_alloc->in(0)))
+ known_independent = true;
+
+ if (known_independent) {
+ // The bases are provably independent: Either they are
+ // manifestly distinct allocations, or else the control
+ // of this load dominates the store's allocation.
+ int alias_idx = phase->C->get_alias_index(adr_type());
+ if (alias_idx == Compile::AliasIdxRaw) {
+ mem = st_alloc->in(TypeFunc::Memory);
+ } else {
+ mem = st_init->memory(alias_idx);
+ }
+ continue; // (a) advance through independent store memory
+ }
+
+ // (b) at this point, if we are not looking at a store initializing
+ // the same allocation we are loading from, we lose.
+ if (known_identical) {
+ // From caller, can_see_stored_value will consult find_captured_store.
+ return mem; // let caller handle steps (c), (d)
+ }
+
+ }
+
+ // Unless there is an explicit 'continue', we must bail out here,
+ // because 'mem' is an inscrutable memory state (e.g., a call).
+ break;
+ }
+
+ return NULL; // bail out
+}
+
+//----------------------calculate_adr_type-------------------------------------
+// Helper function. Notices when the given type of address hits top or bottom.
+// Also, asserts a cross-check of the type against the expected address type.
+const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) {
+ if (t == Type::TOP) return NULL; // does not touch memory any more?
+ #ifdef PRODUCT
+ cross_check = NULL;
+ #else
+ if (!VerifyAliases || is_error_reported() || Node::in_dump()) cross_check = NULL;
+ #endif
+ const TypePtr* tp = t->isa_ptr();
+ if (tp == NULL) {
+ assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide");
+ return TypePtr::BOTTOM; // touches lots of memory
+ } else {
+ #ifdef ASSERT
+ // %%%% [phh] We don't check the alias index if cross_check is
+ // TypeRawPtr::BOTTOM. Needs to be investigated.
+ if (cross_check != NULL &&
+ cross_check != TypePtr::BOTTOM &&
+ cross_check != TypeRawPtr::BOTTOM) {
+ // Recheck the alias index, to see if it has changed (due to a bug).
+ Compile* C = Compile::current();
+ assert(C->get_alias_index(cross_check) == C->get_alias_index(tp),
+ "must stay in the original alias category");
+ // The type of the address must be contained in the adr_type,
+ // disregarding "null"-ness.
+ // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.)
+ const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr();
+ assert(cross_check->meet(tp_notnull) == cross_check,
+ "real address must not escape from expected memory type");
+ }
+ #endif
+ return tp;
+ }
+}
+
+//------------------------adr_phi_is_loop_invariant----------------------------
+// A helper function for Ideal_DU_postCCP to check if a Phi in a counted
+// loop is loop invariant. Make a quick traversal of Phi and associated
+// CastPP nodes, looking to see if they are a closed group within the loop.
+bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) {
+ // The idea is that the phi-nest must boil down to only CastPP nodes
+ // with the same data. This implies that any path into the loop already
+ // includes such a CastPP, and so the original cast, whatever its input,
+ // must be covered by an equivalent cast, with an earlier control input.
+ ResourceMark rm;
+
+ // The loop entry input of the phi should be the unique dominating
+ // node for every Phi/CastPP in the loop.
+ Unique_Node_List closure;
+ closure.push(adr_phi->in(LoopNode::EntryControl));
+
+ // Add the phi node and the cast to the worklist.
+ Unique_Node_List worklist;
+ worklist.push(adr_phi);
+ if( cast != NULL ){
+ if( !cast->is_ConstraintCast() ) return false;
+ worklist.push(cast);
+ }
+
+ // Begin recursive walk of phi nodes.
+ while( worklist.size() ){
+ // Take a node off the worklist
+ Node *n = worklist.pop();
+ if( !closure.member(n) ){
+ // Add it to the closure.
+ closure.push(n);
+ // Make a sanity check to ensure we don't waste too much time here.
+ if( closure.size() > 20) return false;
+ // This node is OK if:
+ // - it is a cast of an identical value
+ // - or it is a phi node (then we add its inputs to the worklist)
+ // Otherwise, the node is not OK, and we presume the cast is not invariant
+ if( n->is_ConstraintCast() ){
+ worklist.push(n->in(1));
+ } else if( n->is_Phi() ) {
+ for( uint i = 1; i < n->req(); i++ ) {
+ worklist.push(n->in(i));
+ }
+ } else {
+ return false;
+ }
+ }
+ }
+
+ // Quit when the worklist is empty, and we've found no offending nodes.
+ return true;
+}
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// Find any cast-away of null-ness and keep its control. Null cast-aways are
+// going away in this pass and we need to make this memory op depend on the
+// gating null check.
+
+// I tried to leave the CastPP's in. This makes the graph more accurate in
+// some sense; we get to keep around the knowledge that an oop is not-null
+// after some test. Alas, the CastPP's interfere with GVN (some values are
+// the regular oop, some are the CastPP of the oop, all merge at Phi's which
+// cannot collapse, etc). This cost us 10% on SpecJVM, even when I removed
+// some of the more trivial cases in the optimizer. Removing more useless
+// Phi's started allowing Loads to illegally float above null checks. I gave
+// up on this approach. CNC 10/20/2000
+Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
+ Node *ctr = in(MemNode::Control);
+ Node *mem = in(MemNode::Memory);
+ Node *adr = in(MemNode::Address);
+ Node *skipped_cast = NULL;
+ // Need a null check? Regular static accesses do not because they are
+ // from constant addresses. Array ops are gated by the range check (which
+ // always includes a NULL check). Just check field ops.
+ if( !ctr ) {
+ // Scan upwards for the highest location we can place this memory op.
+ while( true ) {
+ switch( adr->Opcode() ) {
+
+ case Op_AddP: // No change to NULL-ness, so peek thru AddP's
+ adr = adr->in(AddPNode::Base);
+ continue;
+
+ case Op_CastPP:
+ // If the CastPP is useless, just peek on through it.
+ if( ccp->type(adr) == ccp->type(adr->in(1)) ) {
+ // Remember the cast that we've peeked though. If we peek
+ // through more than one, then we end up remembering the highest
+ // one, that is, if in a loop, the one closest to the top.
+ skipped_cast = adr;
+ adr = adr->in(1);
+ continue;
+ }
+ // CastPP is going away in this pass! We need this memory op to be
+ // control-dependent on the test that is guarding the CastPP.
+ ccp->hash_delete(this);
+ set_req(MemNode::Control, adr->in(0));
+ ccp->hash_insert(this);
+ return this;
+
+ case Op_Phi:
+ // Attempt to float above a Phi to some dominating point.
+ if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) {
+ // If we've already peeked through a Cast (which could have set the
+ // control), we can't float above a Phi, because the skipped Cast
+ // may not be loop invariant.
+ if (adr_phi_is_loop_invariant(adr, skipped_cast)) {
+ adr = adr->in(1);
+ continue;
+ }
+ }
+
+ // Intentional fallthrough!
+
+ // No obvious dominating point. The mem op is pinned below the Phi
+ // by the Phi itself. If the Phi goes away (no true value is merged)
+ // then the mem op can float, but not indefinitely. It must be pinned
+ // behind the controls leading to the Phi.
+ case Op_CheckCastPP:
+ // These usually stick around to change address type, however a
+ // useless one can be elided and we still need to pick up a control edge
+ if (adr->in(0) == NULL) {
+ // This CheckCastPP node has NO control and is likely useless. But we
+ // need check further up the ancestor chain for a control input to keep
+ // the node in place. 4959717.
+ skipped_cast = adr;
+ adr = adr->in(1);
+ continue;
+ }
+ ccp->hash_delete(this);
+ set_req(MemNode::Control, adr->in(0));
+ ccp->hash_insert(this);
+ return this;
+
+ // List of "safe" opcodes; those that implicitly block the memory
+ // op below any null check.
+ case Op_CastX2P: // no null checks on native pointers
+ case Op_Parm: // 'this' pointer is not null
+ case Op_LoadP: // Loading from within a klass
+ case Op_LoadKlass: // Loading from within a klass
+ case Op_ConP: // Loading from a klass
+ case Op_CreateEx: // Sucking up the guts of an exception oop
+ case Op_Con: // Reading from TLS
+ case Op_CMoveP: // CMoveP is pinned
+ break; // No progress
+
+ case Op_Proj: // Direct call to an allocation routine
+ case Op_SCMemProj: // Memory state from store conditional ops
+#ifdef ASSERT
+ {
+ assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value");
+ const Node* call = adr->in(0);
+ if (call->is_CallStaticJava()) {
+ const CallStaticJavaNode* call_java = call->as_CallStaticJava();
+ assert(call_java && call_java->method() == NULL, "must be runtime call");
+ // We further presume that this is one of
+ // new_instance_Java, new_array_Java, or
+ // the like, but do not assert for this.
+ } else if (call->is_Allocate()) {
+ // similar case to new_instance_Java, etc.
+ } else if (!call->is_CallLeaf()) {
+ // Projections from fetch_oop (OSR) are allowed as well.
+ ShouldNotReachHere();
+ }
+ }
+#endif
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ break;
+ }
+ }
+
+ return NULL; // No progress
+}
+
+
+//=============================================================================
+uint LoadNode::size_of() const { return sizeof(*this); }
+uint LoadNode::cmp( const Node &n ) const
+{ return !Type::cmp( _type, ((LoadNode&)n)._type ); }
+const Type *LoadNode::bottom_type() const { return _type; }
+uint LoadNode::ideal_reg() const {
+ return Matcher::base2reg[_type->base()];
+}
+
+#ifndef PRODUCT
+void LoadNode::dump_spec(outputStream *st) const {
+ MemNode::dump_spec(st);
+ if( !Verbose && !WizardMode ) {
+ // standard dump does this in Verbose and WizardMode
+ st->print(" #"); _type->dump_on(st);
+ }
+}
+#endif
+
+
+//----------------------------LoadNode::make-----------------------------------
+// Polymorphic factory method:
+LoadNode *LoadNode::make( Compile *C, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt ) {
+ // sanity check the alias category against the created node type
+ assert(!(adr_type->isa_oopptr() &&
+ adr_type->offset() == oopDesc::klass_offset_in_bytes()),
+ "use LoadKlassNode instead");
+ assert(!(adr_type->isa_aryptr() &&
+ adr_type->offset() == arrayOopDesc::length_offset_in_bytes()),
+ "use LoadRangeNode instead");
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE: return new (C, 3) LoadBNode(ctl, mem, adr, adr_type, rt->is_int() );
+ case T_INT: return new (C, 3) LoadINode(ctl, mem, adr, adr_type, rt->is_int() );
+ case T_CHAR: return new (C, 3) LoadCNode(ctl, mem, adr, adr_type, rt->is_int() );
+ case T_SHORT: return new (C, 3) LoadSNode(ctl, mem, adr, adr_type, rt->is_int() );
+ case T_LONG: return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long() );
+ case T_FLOAT: return new (C, 3) LoadFNode(ctl, mem, adr, adr_type, rt );
+ case T_DOUBLE: return new (C, 3) LoadDNode(ctl, mem, adr, adr_type, rt );
+ case T_ADDRESS: return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr() );
+ case T_OBJECT: return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr());
+ }
+ ShouldNotReachHere();
+ return (LoadNode*)NULL;
+}
+
+LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt) {
+ bool require_atomic = true;
+ return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), require_atomic);
+}
+
+
+
+
+//------------------------------hash-------------------------------------------
+uint LoadNode::hash() const {
+ // unroll addition of interesting fields
+ return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address);
+}
+
+//---------------------------can_see_stored_value------------------------------
+// This routine exists to make sure this set of tests is done the same
+// everywhere. We need to make a coordinated change: first LoadNode::Ideal
+// will change the graph shape in a way which makes memory alive twice at the
+// same time (uses the Oracle model of aliasing), then some
+// LoadXNode::Identity will fold things back to the equivalence-class model
+// of aliasing.
+Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
+ Node* ld_adr = in(MemNode::Address);
+
+ // Loop around twice in the case Load -> Initialize -> Store.
+ // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.)
+ for (int trip = 0; trip <= 1; trip++) {
+
+ if (st->is_Store()) {
+ Node* st_adr = st->in(MemNode::Address);
+ if (!phase->eqv(st_adr, ld_adr)) {
+ // Try harder before giving up... Match raw and non-raw pointers.
+ intptr_t st_off = 0;
+ AllocateNode* alloc = AllocateNode::Ideal_allocation(st_adr, phase, st_off);
+ if (alloc == NULL) return NULL;
+ intptr_t ld_off = 0;
+ AllocateNode* allo2 = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
+ if (alloc != allo2) return NULL;
+ if (ld_off != st_off) return NULL;
+ // At this point we have proven something like this setup:
+ // A = Allocate(...)
+ // L = LoadQ(, AddP(CastPP(, A.Parm),, #Off))
+ // S = StoreQ(, AddP(, A.Parm , #Off), V)
+ // (Actually, we haven't yet proven the Q's are the same.)
+ // In other words, we are loading from a casted version of
+ // the same pointer-and-offset that we stored to.
+ // Thus, we are able to replace L by V.
+ }
+ // Now prove that we have a LoadQ matched to a StoreQ, for some Q.
+ if (store_Opcode() != st->Opcode())
+ return NULL;
+ return st->in(MemNode::ValueIn);
+ }
+
+ intptr_t offset = 0; // scratch
+
+ // A load from a freshly-created object always returns zero.
+ // (This can happen after LoadNode::Ideal resets the load's memory input
+ // to find_captured_store, which returned InitializeNode::zero_memory.)
+ if (st->is_Proj() && st->in(0)->is_Allocate() &&
+ st->in(0) == AllocateNode::Ideal_allocation(ld_adr, phase, offset) &&
+ offset >= st->in(0)->as_Allocate()->minimum_header_size()) {
+ // return a zero value for the load's basic type
+ // (This is one of the few places where a generic PhaseTransform
+ // can create new nodes. Think of it as lazily manifesting
+ // virtually pre-existing constants.)
+ return phase->zerocon(memory_type());
+ }
+
+ // A load from an initialization barrier can match a captured store.
+ if (st->is_Proj() && st->in(0)->is_Initialize()) {
+ InitializeNode* init = st->in(0)->as_Initialize();
+ AllocateNode* alloc = init->allocation();
+ if (alloc != NULL &&
+ alloc == AllocateNode::Ideal_allocation(ld_adr, phase, offset)) {
+ // examine a captured store value
+ st = init->find_captured_store(offset, memory_size(), phase);
+ if (st != NULL)
+ continue; // take one more trip around
+ }
+ }
+
+ break;
+ }
+
+ return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Loads are identity if previous store is to same address
+Node *LoadNode::Identity( PhaseTransform *phase ) {
+ // If the previous store-maker is the right kind of Store, and the store is
+ // to the same address, then we are equal to the value stored.
+ Node* mem = in(MemNode::Memory);
+ Node* value = can_see_stored_value(mem, phase);
+ if( value ) {
+ // byte, short & char stores truncate naturally.
+ // A load has to load the truncated value which requires
+ // some sort of masking operation and that requires an
+ // Ideal call instead of an Identity call.
+ if (memory_size() < BytesPerInt) {
+ // If the input to the store does not fit with the load's result type,
+ // it must be truncated via an Ideal call.
+ if (!phase->type(value)->higher_equal(phase->type(this)))
+ return this;
+ }
+ // (This works even when value is a Con, but LoadNode::Value
+ // usually runs first, producing the singleton type of the Con.)
+ return value;
+ }
+ return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If the load is from Field memory and the pointer is non-null, we can
+// zero out the control input.
+// If the offset is constant and the base is an object allocation,
+// try to hook me up to the exact initializing store.
+Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node* p = MemNode::Ideal_common(phase, can_reshape);
+ if (p) return (p == NodeSentinel) ? NULL : p;
+
+ Node* ctrl = in(MemNode::Control);
+ Node* address = in(MemNode::Address);
+
+ // Skip up past a SafePoint control. Cannot do this for Stores because
+ // pointer stores & cardmarks must stay on the same side of a SafePoint.
+ if( ctrl != NULL && ctrl->Opcode() == Op_SafePoint &&
+ phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw ) {
+ ctrl = ctrl->in(0);
+ set_req(MemNode::Control,ctrl);
+ }
+
+ // Check for useless control edge in some common special cases
+ if (in(MemNode::Control) != NULL) {
+ intptr_t ignore = 0;
+ Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore);
+ if (base != NULL
+ && phase->type(base)->higher_equal(TypePtr::NOTNULL)
+ && detect_dominating_control(base->in(0), phase->C->start())) {
+ // A method-invariant, non-null address (constant or 'this' argument).
+ set_req(MemNode::Control, NULL);
+ }
+ }
+
+ // Check for prior store with a different base or offset; make Load
+ // independent. Skip through any number of them. Bail out if the stores
+ // are in an endless dead cycle and report no progress. This is a key
+ // transform for Reflection. However, if after skipping through the Stores
+ // we can't then fold up against a prior store do NOT do the transform as
+ // this amounts to using the 'Oracle' model of aliasing. It leaves the same
+ // array memory alive twice: once for the hoisted Load and again after the
+ // bypassed Store. This situation only works if EVERYBODY who does
+ // anti-dependence work knows how to bypass. I.e. we need all
+ // anti-dependence checks to ask the same Oracle. Right now, that Oracle is
+ // the alias index stuff. So instead, peek through Stores and IFF we can
+ // fold up, do so.
+ Node* prev_mem = find_previous_store(phase);
+ // Steps (a), (b): Walk past independent stores to find an exact match.
+ if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) {
+ // (c) See if we can fold up on the spot, but don't fold up here.
+ // Fold-up might require truncation (for LoadB/LoadS/LoadC) or
+ // just return a prior value, which is done by Identity calls.
+ if (can_see_stored_value(prev_mem, phase)) {
+ // Make ready for step (d):
+ set_req(MemNode::Memory, prev_mem);
+ return this;
+ }
+ }
+
+ return NULL; // No further progress
+}
+
+// Helper to recognize certain Klass fields which are invariant across
+// some group of array types (e.g., int[] or all T[] where T < Object).
+const Type*
+LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
+ ciKlass* klass) const {
+ if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ // The field is Klass::_modifier_flags. Return its (constant) value.
+ // (Folds up the 2nd indirection in aClassConstant.getModifiers().)
+ assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags");
+ return TypeInt::make(klass->modifier_flags());
+ }
+ if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ // The field is Klass::_access_flags. Return its (constant) value.
+ // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).)
+ assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags");
+ return TypeInt::make(klass->access_flags());
+ }
+ if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ // The field is Klass::_layout_helper. Return its constant value if known.
+ assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper");
+ return TypeInt::make(klass->layout_helper());
+ }
+
+ // No match.
+ return NULL;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *LoadNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ Node* mem = in(MemNode::Memory);
+ const Type *t1 = phase->type(mem);
+ if (t1 == Type::TOP) return Type::TOP;
+ Node* adr = in(MemNode::Address);
+ const TypePtr* tp = phase->type(adr)->isa_ptr();
+ if (tp == NULL || tp->empty()) return Type::TOP;
+ int off = tp->offset();
+ assert(off != Type::OffsetTop, "case covered by TypePtr::empty");
+
+ // Try to guess loaded type from pointer type
+ if (tp->base() == Type::AryPtr) {
+ const Type *t = tp->is_aryptr()->elem();
+ // Don't do this for integer types. There is only potential profit if
+ // the element type t is lower than _type; that is, for int types, if _type is
+ // more restrictive than t. This only happens here if one is short and the other
+ // char (both 16 bits), and in those cases we've made an intentional decision
+ // to use one kind of load over the other. See AndINode::Ideal and 4965907.
+ // Also, do not try to narrow the type for a LoadKlass, regardless of offset.
+ //
+ // Yes, it is possible to encounter an expression like (LoadKlass p1:(AddP x x 8))
+ // where the _gvn.type of the AddP is wider than 8. This occurs when an earlier
+ // copy p0 of (AddP x x 8) has been proven equal to p1, and the p0 has been
+ // subsumed by p1. If p1 is on the worklist but has not yet been re-transformed,
+ // it is possible that p1 will have a type like Foo*[int+]:NotNull*+any.
+ // In fact, that could have been the original type of p1, and p1 could have
+ // had an original form like p1:(AddP x x (LShiftL quux 3)), where the
+ // expression (LShiftL quux 3) independently optimized to the constant 8.
+ if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
+ && Opcode() != Op_LoadKlass) {
+ // t might actually be lower than _type, if _type is a unique
+ // concrete subclass of abstract class t.
+ // Make sure the reference is not into the header, by comparing
+ // the offset against the offset of the start of the array's data.
+ // Different array types begin at slightly different offsets (12 vs. 16).
+ // We choose T_BYTE as an example base type that is least restrictive
+ // as to alignment, which will therefore produce the smallest
+ // possible base offset.
+ const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE);
+ if ((uint)off >= (uint)min_base_off) { // is the offset beyond the header?
+ const Type* jt = t->join(_type);
+ // In any case, do not allow the join, per se, to empty out the type.
+ if (jt->empty() && !t->empty()) {
+ // This can happen if a interface-typed array narrows to a class type.
+ jt = _type;
+ }
+ return jt;
+ }
+ }
+ } else if (tp->base() == Type::InstPtr) {
+ assert( off != Type::OffsetBot ||
+ // arrays can be cast to Objects
+ tp->is_oopptr()->klass()->is_java_lang_Object() ||
+ // unsafe field access may not have a constant offset
+ phase->C->has_unsafe_access(),
+ "Field accesses must be precise" );
+ // For oop loads, we expect the _type to be precise
+ } else if (tp->base() == Type::KlassPtr) {
+ assert( off != Type::OffsetBot ||
+ // arrays can be cast to Objects
+ tp->is_klassptr()->klass()->is_java_lang_Object() ||
+ // also allow array-loading from the primary supertype
+ // array during subtype checks
+ Opcode() == Op_LoadKlass,
+ "Field accesses must be precise" );
+ // For klass/static loads, we expect the _type to be precise
+ }
+
+ const TypeKlassPtr *tkls = tp->isa_klassptr();
+ if (tkls != NULL && !StressReflectiveCode) {
+ ciKlass* klass = tkls->klass();
+ if (klass->is_loaded() && tkls->klass_is_exact()) {
+ // We are loading a field from a Klass metaobject whose identity
+ // is known at compile time (the type is "exact" or "precise").
+ // Check for fields we know are maintained as constants by the VM.
+ if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ // The field is Klass::_super_check_offset. Return its (constant) value.
+ // (Folds up type checking code.)
+ assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
+ return TypeInt::make(klass->super_check_offset());
+ }
+ // Compute index into primary_supers array
+ juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+ // Check for overflowing; use unsigned compare to handle the negative case.
+ if( depth < ciKlass::primary_super_limit() ) {
+ // The field is an element of Klass::_primary_supers. Return its (constant) value.
+ // (Folds up type checking code.)
+ assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
+ ciKlass *ss = klass->super_of_depth(depth);
+ return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
+ }
+ const Type* aift = load_array_final_field(tkls, klass);
+ if (aift != NULL) return aift;
+ if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc)
+ && klass->is_array_klass()) {
+ // The field is arrayKlass::_component_mirror. Return its (constant) value.
+ // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.)
+ assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror");
+ return TypeInstPtr::make(klass->as_array_klass()->component_mirror());
+ }
+ if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) {
+ // The field is Klass::_java_mirror. Return its (constant) value.
+ // (Folds up the 2nd indirection in anObjConstant.getClass().)
+ assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
+ return TypeInstPtr::make(klass->java_mirror());
+ }
+ }
+
+ // We can still check if we are loading from the primary_supers array at a
+ // shallow enough depth. Even though the klass is not exact, entries less
+ // than or equal to its super depth are correct.
+ if (klass->is_loaded() ) {
+ ciType *inner = klass->klass();
+ while( inner->is_obj_array_klass() )
+ inner = inner->as_obj_array_klass()->base_element_type();
+ if( inner->is_instance_klass() &&
+ !inner->as_instance_klass()->flags().is_interface() ) {
+ // Compute index into primary_supers array
+ juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+ // Check for overflowing; use unsigned compare to handle the negative case.
+ if( depth < ciKlass::primary_super_limit() &&
+ depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case
+ // The field is an element of Klass::_primary_supers. Return its (constant) value.
+ // (Folds up type checking code.)
+ assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
+ ciKlass *ss = klass->super_of_depth(depth);
+ return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
+ }
+ }
+ }
+
+ // If the type is enough to determine that the thing is not an array,
+ // we can give the layout_helper a positive interval type.
+ // This will help short-circuit some reflective code.
+ if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)
+ && !klass->is_array_klass() // not directly typed as an array
+ && !klass->is_interface() // specifically not Serializable & Cloneable
+ && !klass->is_java_lang_Object() // not the supertype of all T[]
+ ) {
+ // Note: When interfaces are reliable, we can narrow the interface
+ // test to (klass != Serializable && klass != Cloneable).
+ assert(Opcode() == Op_LoadI, "must load an int from _layout_helper");
+ jint min_size = Klass::instance_layout_helper(oopDesc::header_size(), false);
+ // The key property of this type is that it folds up tests
+ // for array-ness, since it proves that the layout_helper is positive.
+ // Thus, a generic value like the basic object layout helper works fine.
+ return TypeInt::make(min_size, max_jint, Type::WidenMin);
+ }
+ }
+
+ // If we are loading from a freshly-allocated object, produce a zero,
+ // if the load is provably beyond the header of the object.
+ // (Also allow a variable load from a fresh array to produce zero.)
+ if (ReduceFieldZeroing) {
+ Node* value = can_see_stored_value(mem,phase);
+ if (value != NULL && value->is_Con())
+ return value->bottom_type();
+ }
+
+ return _type;
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not? Match only the address.
+uint LoadNode::match_edge(uint idx) const {
+ return idx == MemNode::Address;
+}
+
+//--------------------------LoadBNode::Ideal--------------------------------------
+//
+// If the previous store is to the same address as this load,
+// and the value stored was larger than a byte, replace this load
+// with the value stored truncated to a byte. If no truncation is
+// needed, the replacement is done in LoadNode::Identity().
+//
+Node *LoadBNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node* mem = in(MemNode::Memory);
+ Node* value = can_see_stored_value(mem,phase);
+ if( value && !phase->type(value)->higher_equal( _type ) ) {
+ Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(24)) );
+ return new (phase->C, 3) RShiftINode(result, phase->intcon(24));
+ }
+ // Identity call will handle the case where truncation is not needed.
+ return LoadNode::Ideal(phase, can_reshape);
+}
+
+//--------------------------LoadCNode::Ideal--------------------------------------
+//
+// If the previous store is to the same address as this load,
+// and the value stored was larger than a char, replace this load
+// with the value stored truncated to a char. If no truncation is
+// needed, the replacement is done in LoadNode::Identity().
+//
+Node *LoadCNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node* mem = in(MemNode::Memory);
+ Node* value = can_see_stored_value(mem,phase);
+ if( value && !phase->type(value)->higher_equal( _type ) )
+ return new (phase->C, 3) AndINode(value,phase->intcon(0xFFFF));
+ // Identity call will handle the case where truncation is not needed.
+ return LoadNode::Ideal(phase, can_reshape);
+}
+
+//--------------------------LoadSNode::Ideal--------------------------------------
+//
+// If the previous store is to the same address as this load,
+// and the value stored was larger than a short, replace this load
+// with the value stored truncated to a short. If no truncation is
+// needed, the replacement is done in LoadNode::Identity().
+//
+Node *LoadSNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node* mem = in(MemNode::Memory);
+ Node* value = can_see_stored_value(mem,phase);
+ if( value && !phase->type(value)->higher_equal( _type ) ) {
+ Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(16)) );
+ return new (phase->C, 3) RShiftINode(result, phase->intcon(16));
+ }
+ // Identity call will handle the case where truncation is not needed.
+ return LoadNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *LoadKlassNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(MemNode::Memory) );
+ if (t1 == Type::TOP) return Type::TOP;
+ Node *adr = in(MemNode::Address);
+ const Type *t2 = phase->type( adr );
+ if (t2 == Type::TOP) return Type::TOP;
+ const TypePtr *tp = t2->is_ptr();
+ if (TypePtr::above_centerline(tp->ptr()) ||
+ tp->ptr() == TypePtr::Null) return Type::TOP;
+
+ // Return a more precise klass, if possible
+ const TypeInstPtr *tinst = tp->isa_instptr();
+ if (tinst != NULL) {
+ ciInstanceKlass* ik = tinst->klass()->as_instance_klass();
+ int offset = tinst->offset();
+ if (ik == phase->C->env()->Class_klass()
+ && (offset == java_lang_Class::klass_offset_in_bytes() ||
+ offset == java_lang_Class::array_klass_offset_in_bytes())) {
+ // We are loading a special hidden field from a Class mirror object,
+ // the field which points to the VM's Klass metaobject.
+ ciType* t = tinst->java_mirror_type();
+ // java_mirror_type returns non-null for compile-time Class constants.
+ if (t != NULL) {
+ // constant oop => constant klass
+ if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
+ return TypeKlassPtr::make(ciArrayKlass::make(t));
+ }
+ if (!t->is_klass()) {
+ // a primitive Class (e.g., int.class) has NULL for a klass field
+ return TypePtr::NULL_PTR;
+ }
+ // (Folds up the 1st indirection in aClassConstant.getModifiers().)
+ return TypeKlassPtr::make(t->as_klass());
+ }
+ // non-constant mirror, so we can't tell what's going on
+ }
+ if( !ik->is_loaded() )
+ return _type; // Bail out if not loaded
+ if (offset == oopDesc::klass_offset_in_bytes()) {
+ if (tinst->klass_is_exact()) {
+ return TypeKlassPtr::make(ik);
+ }
+ // See if we can become precise: no subklasses and no interface
+ // (Note: We need to support verified interfaces.)
+ if (!ik->is_interface() && !ik->has_subklass()) {
+ //assert(!UseExactTypes, "this code should be useless with exact types");
+ // Add a dependence; if any subclass added we need to recompile
+ if (!ik->is_final()) {
+ // %%% should use stronger assert_unique_concrete_subtype instead
+ phase->C->dependencies()->assert_leaf_type(ik);
+ }
+ // Return precise klass
+ return TypeKlassPtr::make(ik);
+ }
+
+ // Return root of possible klass
+ return TypeKlassPtr::make(TypePtr::NotNull, ik, 0/*offset*/);
+ }
+ }
+
+ // Check for loading klass from an array
+ const TypeAryPtr *tary = tp->isa_aryptr();
+ if( tary != NULL ) {
+ ciKlass *tary_klass = tary->klass();
+ if (tary_klass != NULL // can be NULL when at BOTTOM or TOP
+ && tary->offset() == oopDesc::klass_offset_in_bytes()) {
+ if (tary->klass_is_exact()) {
+ return TypeKlassPtr::make(tary_klass);
+ }
+ ciArrayKlass *ak = tary->klass()->as_array_klass();
+ // If the klass is an object array, we defer the question to the
+ // array component klass.
+ if( ak->is_obj_array_klass() ) {
+ assert( ak->is_loaded(), "" );
+ ciKlass *base_k = ak->as_obj_array_klass()->base_element_klass();
+ if( base_k->is_loaded() && base_k->is_instance_klass() ) {
+ ciInstanceKlass* ik = base_k->as_instance_klass();
+ // See if we can become precise: no subklasses and no interface
+ if (!ik->is_interface() && !ik->has_subklass()) {
+ //assert(!UseExactTypes, "this code should be useless with exact types");
+ // Add a dependence; if any subclass added we need to recompile
+ if (!ik->is_final()) {
+ phase->C->dependencies()->assert_leaf_type(ik);
+ }
+ // Return precise array klass
+ return TypeKlassPtr::make(ak);
+ }
+ }
+ return TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
+ } else { // Found a type-array?
+ //assert(!UseExactTypes, "this code should be useless with exact types");
+ assert( ak->is_type_array_klass(), "" );
+ return TypeKlassPtr::make(ak); // These are always precise
+ }
+ }
+ }
+
+ // Check for loading klass from an array klass
+ const TypeKlassPtr *tkls = tp->isa_klassptr();
+ if (tkls != NULL && !StressReflectiveCode) {
+ ciKlass* klass = tkls->klass();
+ if( !klass->is_loaded() )
+ return _type; // Bail out if not loaded
+ if( klass->is_obj_array_klass() &&
+ (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) {
+ ciKlass* elem = klass->as_obj_array_klass()->element_klass();
+ // // Always returning precise element type is incorrect,
+ // // e.g., element type could be object and array may contain strings
+ // return TypeKlassPtr::make(TypePtr::Constant, elem, 0);
+
+ // The array's TypeKlassPtr was declared 'precise' or 'not precise'
+ // according to the element type's subclassing.
+ return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/);
+ }
+ if( klass->is_instance_klass() && tkls->klass_is_exact() &&
+ (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) {
+ ciKlass* sup = klass->as_instance_klass()->super();
+ // The field is Klass::_super. Return its (constant) value.
+ // (Folds up the 2nd indirection in aClassConstant.getSuperClass().)
+ return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR;
+ }
+ }
+
+ // Bailout case
+ return LoadNode::Value(phase);
+}
+
+//------------------------------Identity---------------------------------------
+// To clean up reflective code, simplify k.java_mirror.as_klass to plain k.
+// Also feed through the klass in Allocate(...klass...)._klass.
+Node* LoadKlassNode::Identity( PhaseTransform *phase ) {
+ Node* x = LoadNode::Identity(phase);
+ if (x != this) return x;
+
+ // Take apart the address into an oop and and offset.
+ // Return 'this' if we cannot.
+ Node* adr = in(MemNode::Address);
+ intptr_t offset = 0;
+ Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+ if (base == NULL) return this;
+ const TypeOopPtr* toop = phase->type(adr)->isa_oopptr();
+ if (toop == NULL) return this;
+
+ // We can fetch the klass directly through an AllocateNode.
+ // This works even if the klass is not constant (clone or newArray).
+ if (offset == oopDesc::klass_offset_in_bytes()) {
+ Node* allocated_klass = AllocateNode::Ideal_klass(base, phase);
+ if (allocated_klass != NULL) {
+ return allocated_klass;
+ }
+ }
+
+ // Simplify k.java_mirror.as_klass to plain k, where k is a klassOop.
+ // Simplify ak.component_mirror.array_klass to plain ak, ak an arrayKlass.
+ // See inline_native_Class_query for occurrences of these patterns.
+ // Java Example: x.getClass().isAssignableFrom(y)
+ // Java Example: Array.newInstance(x.getClass().getComponentType(), n)
+ //
+ // This improves reflective code, often making the Class
+ // mirror go completely dead. (Current exception: Class
+ // mirrors may appear in debug info, but we could clean them out by
+ // introducing a new debug info operator for klassOop.java_mirror).
+ if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass()
+ && (offset == java_lang_Class::klass_offset_in_bytes() ||
+ offset == java_lang_Class::array_klass_offset_in_bytes())) {
+ // We are loading a special hidden field from a Class mirror,
+ // the field which points to its Klass or arrayKlass metaobject.
+ if (base->is_Load()) {
+ Node* adr2 = base->in(MemNode::Address);
+ const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr();
+ if (tkls != NULL && !tkls->empty()
+ && (tkls->klass()->is_instance_klass() ||
+ tkls->klass()->is_array_klass())
+ && adr2->is_AddP()
+ ) {
+ int mirror_field = Klass::java_mirror_offset_in_bytes();
+ if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
+ mirror_field = in_bytes(arrayKlass::component_mirror_offset());
+ }
+ if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) {
+ return adr2->in(AddPNode::Base);
+ }
+ }
+ }
+ }
+
+ return this;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *LoadRangeNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(MemNode::Memory) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ Node *adr = in(MemNode::Address);
+ const Type *t2 = phase->type( adr );
+ if( t2 == Type::TOP ) return Type::TOP;
+ const TypePtr *tp = t2->is_ptr();
+ if (TypePtr::above_centerline(tp->ptr())) return Type::TOP;
+ const TypeAryPtr *tap = tp->isa_aryptr();
+ if( !tap ) return _type;
+ return tap->size();
+}
+
+//------------------------------Identity---------------------------------------
+// Feed through the length in AllocateArray(...length...)._length.
+Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
+ Node* x = LoadINode::Identity(phase);
+ if (x != this) return x;
+
+ // Take apart the address into an oop and and offset.
+ // Return 'this' if we cannot.
+ Node* adr = in(MemNode::Address);
+ intptr_t offset = 0;
+ Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+ if (base == NULL) return this;
+ const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
+ if (tary == NULL) return this;
+
+ // We can fetch the length directly through an AllocateArrayNode.
+ // This works even if the length is not constant (clone or newArray).
+ if (offset == arrayOopDesc::length_offset_in_bytes()) {
+ Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase);
+ if (allocated_length != NULL) {
+ return allocated_length;
+ }
+ }
+
+ return this;
+
+}
+//=============================================================================
+//---------------------------StoreNode::make-----------------------------------
+// Polymorphic factory method:
+StoreNode* StoreNode::make( Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt ) {
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE: return new (C, 4) StoreBNode(ctl, mem, adr, adr_type, val);
+ case T_INT: return new (C, 4) StoreINode(ctl, mem, adr, adr_type, val);
+ case T_CHAR:
+ case T_SHORT: return new (C, 4) StoreCNode(ctl, mem, adr, adr_type, val);
+ case T_LONG: return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val);
+ case T_FLOAT: return new (C, 4) StoreFNode(ctl, mem, adr, adr_type, val);
+ case T_DOUBLE: return new (C, 4) StoreDNode(ctl, mem, adr, adr_type, val);
+ case T_ADDRESS:
+ case T_OBJECT: return new (C, 4) StorePNode(ctl, mem, adr, adr_type, val);
+ }
+ ShouldNotReachHere();
+ return (StoreNode*)NULL;
+}
+
+StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val) {
+ bool require_atomic = true;
+ return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val, require_atomic);
+}
+
+
+//--------------------------bottom_type----------------------------------------
+const Type *StoreNode::bottom_type() const {
+ return Type::MEMORY;
+}
+
+//------------------------------hash-------------------------------------------
+uint StoreNode::hash() const {
+ // unroll addition of interesting fields
+ //return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address) + (uintptr_t)in(ValueIn);
+
+ // Since they are not commoned, do not hash them:
+ return NO_HASH;
+}
+
+//------------------------------Ideal------------------------------------------
+// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x).
+// When a store immediately follows a relevant allocation/initialization,
+// try to capture it into the initialization, or hoist it above.
+Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node* p = MemNode::Ideal_common(phase, can_reshape);
+ if (p) return (p == NodeSentinel) ? NULL : p;
+
+ Node* mem = in(MemNode::Memory);
+ Node* address = in(MemNode::Address);
+
+ // Back-to-back stores to same address? Fold em up.
+ // Generally unsafe if I have intervening uses...
+ if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address)) {
+ // Looking at a dead closed cycle of memory?
+ assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
+
+ assert(Opcode() == mem->Opcode() ||
+ phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw,
+ "no mismatched stores, except on raw memory");
+
+ if (mem->outcnt() == 1 && // check for intervening uses
+ mem->as_Store()->memory_size() <= this->memory_size()) {
+ // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away.
+ // For example, 'mem' might be the final state at a conditional return.
+ // Or, 'mem' might be used by some node which is live at the same time
+ // 'this' is live, which might be unschedulable. So, require exactly
+ // ONE user, the 'this' store, until such time as we clone 'mem' for
+ // each of 'mem's uses (thus making the exactly-1-user-rule hold true).
+ if (can_reshape) { // (%%% is this an anachronism?)
+ set_req_X(MemNode::Memory, mem->in(MemNode::Memory),
+ phase->is_IterGVN());
+ } else {
+ // It's OK to do this in the parser, since DU info is always accurate,
+ // and the parser always refers to nodes via SafePointNode maps.
+ set_req(MemNode::Memory, mem->in(MemNode::Memory));
+ }
+ return this;
+ }
+ }
+
+ // Capture an unaliased, unconditional, simple store into an initializer.
+ // Or, if it is independent of the allocation, hoist it above the allocation.
+ if (ReduceFieldZeroing && /*can_reshape &&*/
+ mem->is_Proj() && mem->in(0)->is_Initialize()) {
+ InitializeNode* init = mem->in(0)->as_Initialize();
+ intptr_t offset = init->can_capture_store(this, phase);
+ if (offset > 0) {
+ Node* moved = init->capture_store(this, offset, phase);
+ // If the InitializeNode captured me, it made a raw copy of me,
+ // and I need to disappear.
+ if (moved != NULL) {
+ // %%% hack to ensure that Ideal returns a new node:
+ mem = MergeMemNode::make(phase->C, mem);
+ return mem; // fold me away
+ }
+ }
+ }
+
+ return NULL; // No further progress
+}
+
+//------------------------------Value-----------------------------------------
+const Type *StoreNode::Value( PhaseTransform *phase ) const {
+ // Either input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(MemNode::Memory) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ const Type *t2 = phase->type( in(MemNode::Address) );
+ if( t2 == Type::TOP ) return Type::TOP;
+ const Type *t3 = phase->type( in(MemNode::ValueIn) );
+ if( t3 == Type::TOP ) return Type::TOP;
+ return Type::MEMORY;
+}
+
+//------------------------------Identity---------------------------------------
+// Remove redundant stores:
+// Store(m, p, Load(m, p)) changes to m.
+// Store(, p, x) -> Store(m, p, x) changes to Store(m, p, x).
+Node *StoreNode::Identity( PhaseTransform *phase ) {
+ Node* mem = in(MemNode::Memory);
+ Node* adr = in(MemNode::Address);
+ Node* val = in(MemNode::ValueIn);
+
+ // Load then Store? Then the Store is useless
+ if (val->is_Load() &&
+ phase->eqv_uncast( val->in(MemNode::Address), adr ) &&
+ phase->eqv_uncast( val->in(MemNode::Memory ), mem ) &&
+ val->as_Load()->store_Opcode() == Opcode()) {
+ return mem;
+ }
+
+ // Two stores in a row of the same value?
+ if (mem->is_Store() &&
+ phase->eqv_uncast( mem->in(MemNode::Address), adr ) &&
+ phase->eqv_uncast( mem->in(MemNode::ValueIn), val ) &&
+ mem->Opcode() == Opcode()) {
+ return mem;
+ }
+
+ // Store of zero anywhere into a freshly-allocated object?
+ // Then the store is useless.
+ // (It must already have been captured by the InitializeNode.)
+ if (ReduceFieldZeroing && phase->type(val)->is_zero_type()) {
+ // a newly allocated object is already all-zeroes everywhere
+ if (mem->is_Proj() && mem->in(0)->is_Allocate()) {
+ return mem;
+ }
+
+ // the store may also apply to zero-bits in an earlier object
+ Node* prev_mem = find_previous_store(phase);
+ // Steps (a), (b): Walk past independent stores to find an exact match.
+ if (prev_mem != NULL) {
+ Node* prev_val = can_see_stored_value(prev_mem, phase);
+ if (prev_val != NULL && phase->eqv(prev_val, val)) {
+ // prev_val and val might differ by a cast; it would be good
+ // to keep the more informative of the two.
+ return mem;
+ }
+ }
+ }
+
+ return this;
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not? Match only memory & value
+uint StoreNode::match_edge(uint idx) const {
+ return idx == MemNode::Address || idx == MemNode::ValueIn;
+}
+
+//------------------------------cmp--------------------------------------------
+// Do not common stores up together. They generally have to be split
+// back up anyways, so do not bother.
+uint StoreNode::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+//------------------------------Ideal_masked_input-----------------------------
+// Check for a useless mask before a partial-word store
+// (StoreB ... (AndI valIn conIa) )
+// If (conIa & mask == mask) this simplifies to
+// (StoreB ... (valIn) )
+Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) {
+ Node *val = in(MemNode::ValueIn);
+ if( val->Opcode() == Op_AndI ) {
+ const TypeInt *t = phase->type( val->in(2) )->isa_int();
+ if( t && t->is_con() && (t->get_con() & mask) == mask ) {
+ set_req(MemNode::ValueIn, val->in(1));
+ return this;
+ }
+ }
+ return NULL;
+}
+
+
+//------------------------------Ideal_sign_extended_input----------------------
+// Check for useless sign-extension before a partial-word store
+// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) )
+// If (conIL == conIR && conIR <= num_bits) this simplifies to
+// (StoreB ... (valIn) )
+Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) {
+ Node *val = in(MemNode::ValueIn);
+ if( val->Opcode() == Op_RShiftI ) {
+ const TypeInt *t = phase->type( val->in(2) )->isa_int();
+ if( t && t->is_con() && (t->get_con() <= num_bits) ) {
+ Node *shl = val->in(1);
+ if( shl->Opcode() == Op_LShiftI ) {
+ const TypeInt *t2 = phase->type( shl->in(2) )->isa_int();
+ if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) {
+ set_req(MemNode::ValueIn, shl->in(1));
+ return this;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+//------------------------------value_never_loaded-----------------------------------
+// Determine whether there are any possible loads of the value stored.
+// For simplicity, we actually check if there are any loads from the
+// address stored to, not just for loads of the value stored by this node.
+//
+bool StoreNode::value_never_loaded( PhaseTransform *phase) const {
+ Node *adr = in(Address);
+ const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr();
+ if (adr_oop == NULL)
+ return false;
+ if (!adr_oop->is_instance())
+ return false; // if not a distinct instance, there may be aliases of the address
+ for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) {
+ Node *use = adr->fast_out(i);
+ int opc = use->Opcode();
+ if (use->is_Load() || use->is_LoadStore()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// If the store is from an AND mask that leaves the low bits untouched, then
+// we can skip the AND operation. If the store is from a sign-extension
+// (a left shift, then right shift) we can skip both.
+Node *StoreBNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ Node *progress = StoreNode::Ideal_masked_input(phase, 0xFF);
+ if( progress != NULL ) return progress;
+
+ progress = StoreNode::Ideal_sign_extended_input(phase, 24);
+ if( progress != NULL ) return progress;
+
+ // Finally check the default case
+ return StoreNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// If the store is from an AND mask that leaves the low bits untouched, then
+// we can skip the AND operation
+Node *StoreCNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ Node *progress = StoreNode::Ideal_masked_input(phase, 0xFFFF);
+ if( progress != NULL ) return progress;
+
+ progress = StoreNode::Ideal_sign_extended_input(phase, 16);
+ if( progress != NULL ) return progress;
+
+ // Finally check the default case
+ return StoreNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *StoreCMNode::Identity( PhaseTransform *phase ) {
+ // No need to card mark when storing a null ptr
+ Node* my_store = in(MemNode::OopStore);
+ if (my_store->is_Store()) {
+ const Type *t1 = phase->type( my_store->in(MemNode::ValueIn) );
+ if( t1 == TypePtr::NULL_PTR ) {
+ return in(MemNode::Memory);
+ }
+ }
+ return this;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *StoreCMNode::Value( PhaseTransform *phase ) const {
+ // If extra input is TOP ==> the result is TOP
+ const Type *t1 = phase->type( in(MemNode::OopStore) );
+ if( t1 == Type::TOP ) return Type::TOP;
+
+ return StoreNode::Value( phase );
+}
+
+
+//=============================================================================
+//----------------------------------SCMemProjNode------------------------------
+const Type * SCMemProjNode::Value( PhaseTransform *phase ) const
+{
+ return bottom_type();
+}
+
+//=============================================================================
+LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : Node(5) {
+ init_req(MemNode::Control, c );
+ init_req(MemNode::Memory , mem);
+ init_req(MemNode::Address, adr);
+ init_req(MemNode::ValueIn, val);
+ init_req( ExpectedIn, ex );
+ init_class_id(Class_LoadStore);
+
+}
+
+//=============================================================================
+//-------------------------------adr_type--------------------------------------
+// Do we Match on this edge index or not? Do not match memory
+const TypePtr* ClearArrayNode::adr_type() const {
+ Node *adr = in(3);
+ return MemNode::calculate_adr_type(adr->bottom_type());
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not? Do not match memory
+uint ClearArrayNode::match_edge(uint idx) const {
+ return idx > 1;
+}
+
+//------------------------------Identity---------------------------------------
+// Clearing a zero length array does nothing
+Node *ClearArrayNode::Identity( PhaseTransform *phase ) {
+ return phase->type(in(2))->higher_equal(TypeInt::ZERO) ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+// Clearing a short array is faster with stores
+Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ const int unit = BytesPerLong;
+ const TypeX* t = phase->type(in(2))->isa_intptr_t();
+ if (!t) return NULL;
+ if (!t->is_con()) return NULL;
+ intptr_t raw_count = t->get_con();
+ intptr_t size = raw_count;
+ if (!Matcher::init_array_count_is_in_bytes) size *= unit;
+ // Clearing nothing uses the Identity call.
+ // Negative clears are possible on dead ClearArrays
+ // (see jck test stmt114.stmt11402.val).
+ if (size <= 0 || size % unit != 0) return NULL;
+ intptr_t count = size / unit;
+ // Length too long; use fast hardware clear
+ if (size > Matcher::init_array_short_size) return NULL;
+ Node *mem = in(1);
+ if( phase->type(mem)==Type::TOP ) return NULL;
+ Node *adr = in(3);
+ const Type* at = phase->type(adr);
+ if( at==Type::TOP ) return NULL;
+ const TypePtr* atp = at->isa_ptr();
+ // adjust atp to be the correct array element address type
+ if (atp == NULL) atp = TypePtr::BOTTOM;
+ else atp = atp->add_offset(Type::OffsetBot);
+ // Get base for derived pointer purposes
+ if( adr->Opcode() != Op_AddP ) Unimplemented();
+ Node *base = adr->in(1);
+
+ Node *zero = phase->makecon(TypeLong::ZERO);
+ Node *off = phase->MakeConX(BytesPerLong);
+ mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero);
+ count--;
+ while( count-- ) {
+ mem = phase->transform(mem);
+ adr = phase->transform(new (phase->C, 4) AddPNode(base,adr,off));
+ mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero);
+ }
+ return mem;
+}
+
+//----------------------------clear_memory-------------------------------------
+// Generate code to initialize object storage to zero.
+Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+ intptr_t start_offset,
+ Node* end_offset,
+ PhaseGVN* phase) {
+ Compile* C = phase->C;
+ intptr_t offset = start_offset;
+
+ int unit = BytesPerLong;
+ if ((offset % unit) != 0) {
+ Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(offset));
+ adr = phase->transform(adr);
+ const TypePtr* atp = TypeRawPtr::BOTTOM;
+ mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
+ mem = phase->transform(mem);
+ offset += BytesPerInt;
+ }
+ assert((offset % unit) == 0, "");
+
+ // Initialize the remaining stuff, if any, with a ClearArray.
+ return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase);
+}
+
+Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+ Node* start_offset,
+ Node* end_offset,
+ PhaseGVN* phase) {
+ Compile* C = phase->C;
+ int unit = BytesPerLong;
+ Node* zbase = start_offset;
+ Node* zend = end_offset;
+
+ // Scale to the unit required by the CPU:
+ if (!Matcher::init_array_count_is_in_bytes) {
+ Node* shift = phase->intcon(exact_log2(unit));
+ zbase = phase->transform( new(C,3) URShiftXNode(zbase, shift) );
+ zend = phase->transform( new(C,3) URShiftXNode(zend, shift) );
+ }
+
+ Node* zsize = phase->transform( new(C,3) SubXNode(zend, zbase) );
+ Node* zinit = phase->zerocon((unit == BytesPerLong) ? T_LONG : T_INT);
+
+ // Bulk clear double-words
+ Node* adr = phase->transform( new(C,4) AddPNode(dest, dest, start_offset) );
+ mem = new (C, 4) ClearArrayNode(ctl, mem, zsize, adr);
+ return phase->transform(mem);
+}
+
+Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+ intptr_t start_offset,
+ intptr_t end_offset,
+ PhaseGVN* phase) {
+ Compile* C = phase->C;
+ assert((end_offset % BytesPerInt) == 0, "odd end offset");
+ intptr_t done_offset = end_offset;
+ if ((done_offset % BytesPerLong) != 0) {
+ done_offset -= BytesPerInt;
+ }
+ if (done_offset > start_offset) {
+ mem = clear_memory(ctl, mem, dest,
+ start_offset, phase->MakeConX(done_offset), phase);
+ }
+ if (done_offset < end_offset) { // emit the final 32-bit store
+ Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(done_offset));
+ adr = phase->transform(adr);
+ const TypePtr* atp = TypeRawPtr::BOTTOM;
+ mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
+ mem = phase->transform(mem);
+ done_offset += BytesPerInt;
+ }
+ assert(done_offset == end_offset, "");
+ return mem;
+}
+
+//=============================================================================
+// Do we match on this edge? No memory edges
+uint StrCompNode::match_edge(uint idx) const {
+ return idx == 5 || idx == 6;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Strip out
+// control copies
+Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+
+//=============================================================================
+MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
+ : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
+ _adr_type(C->get_adr_type(alias_idx))
+{
+ init_class_id(Class_MemBar);
+ Node* top = C->top();
+ init_req(TypeFunc::I_O,top);
+ init_req(TypeFunc::FramePtr,top);
+ init_req(TypeFunc::ReturnAdr,top);
+ if (precedent != NULL)
+ init_req(TypeFunc::Parms, precedent);
+}
+
+//------------------------------cmp--------------------------------------------
+uint MemBarNode::hash() const { return NO_HASH; }
+uint MemBarNode::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+//------------------------------make-------------------------------------------
+MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
+ int len = Precedent + (pn == NULL? 0: 1);
+ switch (opcode) {
+ case Op_MemBarAcquire: return new(C, len) MemBarAcquireNode(C, atp, pn);
+ case Op_MemBarRelease: return new(C, len) MemBarReleaseNode(C, atp, pn);
+ case Op_MemBarVolatile: return new(C, len) MemBarVolatileNode(C, atp, pn);
+ case Op_MemBarCPUOrder: return new(C, len) MemBarCPUOrderNode(C, atp, pn);
+ case Op_Initialize: return new(C, len) InitializeNode(C, atp, pn);
+ default: ShouldNotReachHere(); return NULL;
+ }
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node. Strip out
+// control copies
+Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if (remove_dead_region(phase, can_reshape)) return this;
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *MemBarNode::Value( PhaseTransform *phase ) const {
+ if( !in(0) ) return Type::TOP;
+ if( phase->type(in(0)) == Type::TOP )
+ return Type::TOP;
+ return TypeTuple::MEMBAR;
+}
+
+//------------------------------match------------------------------------------
+// Construct projections for memory.
+Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) {
+ switch (proj->_con) {
+ case TypeFunc::Control:
+ case TypeFunc::Memory:
+ return new (m->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//===========================InitializeNode====================================
+// SUMMARY:
+// This node acts as a memory barrier on raw memory, after some raw stores.
+// The 'cooked' oop value feeds from the Initialize, not the Allocation.
+// The Initialize can 'capture' suitably constrained stores as raw inits.
+// It can coalesce related raw stores into larger units (called 'tiles').
+// It can avoid zeroing new storage for memory units which have raw inits.
+// At macro-expansion, it is marked 'complete', and does not optimize further.
+//
+// EXAMPLE:
+// The object 'new short[2]' occupies 16 bytes in a 32-bit machine.
+// ctl = incoming control; mem* = incoming memory
+// (Note: A star * on a memory edge denotes I/O and other standard edges.)
+// First allocate uninitialized memory and fill in the header:
+// alloc = (Allocate ctl mem* 16 #short[].klass ...)
+// ctl := alloc.Control; mem* := alloc.Memory*
+// rawmem = alloc.Memory; rawoop = alloc.RawAddress
+// Then initialize to zero the non-header parts of the raw memory block:
+// init = (Initialize alloc.Control alloc.Memory* alloc.RawAddress)
+// ctl := init.Control; mem.SLICE(#short[*]) := init.Memory
+// After the initialize node executes, the object is ready for service:
+// oop := (CheckCastPP init.Control alloc.RawAddress #short[])
+// Suppose its body is immediately initialized as {1,2}:
+// store1 = (StoreC init.Control init.Memory (+ oop 12) 1)
+// store2 = (StoreC init.Control store1 (+ oop 14) 2)
+// mem.SLICE(#short[*]) := store2
+//
+// DETAILS:
+// An InitializeNode collects and isolates object initialization after
+// an AllocateNode and before the next possible safepoint. As a
+// memory barrier (MemBarNode), it keeps critical stores from drifting
+// down past any safepoint or any publication of the allocation.
+// Before this barrier, a newly-allocated object may have uninitialized bits.
+// After this barrier, it may be treated as a real oop, and GC is allowed.
+//
+// The semantics of the InitializeNode include an implicit zeroing of
+// the new object from object header to the end of the object.
+// (The object header and end are determined by the AllocateNode.)
+//
+// Certain stores may be added as direct inputs to the InitializeNode.
+// These stores must update raw memory, and they must be to addresses
+// derived from the raw address produced by AllocateNode, and with
+// a constant offset. They must be ordered by increasing offset.
+// The first one is at in(RawStores), the last at in(req()-1).
+// Unlike most memory operations, they are not linked in a chain,
+// but are displayed in parallel as users of the rawmem output of
+// the allocation.
+//
+// (See comments in InitializeNode::capture_store, which continue
+// the example given above.)
+//
+// When the associated Allocate is macro-expanded, the InitializeNode
+// may be rewritten to optimize collected stores. A ClearArrayNode
+// may also be created at that point to represent any required zeroing.
+// The InitializeNode is then marked 'complete', prohibiting further
+// capturing of nearby memory operations.
+//
+// During macro-expansion, all captured initializations which store
+// constant values of 32 bits or smaller are coalesced (if advantagous)
+// into larger 'tiles' 32 or 64 bits. This allows an object to be
+// initialized in fewer memory operations. Memory words which are
+// covered by neither tiles nor non-constant stores are pre-zeroed
+// by explicit stores of zero. (The code shape happens to do all
+// zeroing first, then all other stores, with both sequences occurring
+// in order of ascending offsets.)
+//
+// Alternatively, code may be inserted between an AllocateNode and its
+// InitializeNode, to perform arbitrary initialization of the new object.
+// E.g., the object copying intrinsics insert complex data transfers here.
+// The initialization must then be marked as 'complete' disable the
+// built-in zeroing semantics and the collection of initializing stores.
+//
+// While an InitializeNode is incomplete, reads from the memory state
+// produced by it are optimizable if they match the control edge and
+// new oop address associated with the allocation/initialization.
+// They return a stored value (if the offset matches) or else zero.
+// A write to the memory state, if it matches control and address,
+// and if it is to a constant offset, may be 'captured' by the
+// InitializeNode. It is cloned as a raw memory operation and rewired
+// inside the initialization, to the raw oop produced by the allocation.
+// Operations on addresses which are provably distinct (e.g., to
+// other AllocateNodes) are allowed to bypass the initialization.
+//
+// The effect of all this is to consolidate object initialization
+// (both arrays and non-arrays, both piecewise and bulk) into a
+// single location, where it can be optimized as a unit.
+//
+// Only stores with an offset less than TrackedInitializationLimit words
+// will be considered for capture by an InitializeNode. This puts a
+// reasonable limit on the complexity of optimized initializations.
+
+//---------------------------InitializeNode------------------------------------
+InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
+ : _is_complete(false),
+ MemBarNode(C, adr_type, rawoop)
+{
+ init_class_id(Class_Initialize);
+
+ assert(adr_type == Compile::AliasIdxRaw, "only valid atp");
+ assert(in(RawAddress) == rawoop, "proper init");
+ // Note: allocation() can be NULL, for secondary initialization barriers
+}
+
+// Since this node is not matched, it will be processed by the
+// register allocator. Declare that there are no constraints
+// on the allocation of the RawAddress edge.
+const RegMask &InitializeNode::in_RegMask(uint idx) const {
+ // This edge should be set to top, by the set_complete. But be conservative.
+ if (idx == InitializeNode::RawAddress)
+ return *(Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()]);
+ return RegMask::Empty;
+}
+
+Node* InitializeNode::memory(uint alias_idx) {
+ Node* mem = in(Memory);
+ if (mem->is_MergeMem()) {
+ return mem->as_MergeMem()->memory_at(alias_idx);
+ } else {
+ // incoming raw memory is not split
+ return mem;
+ }
+}
+
+bool InitializeNode::is_non_zero() {
+ if (is_complete()) return false;
+ remove_extra_zeroes();
+ return (req() > RawStores);
+}
+
+void InitializeNode::set_complete(PhaseGVN* phase) {
+ assert(!is_complete(), "caller responsibility");
+ _is_complete = true;
+
+ // After this node is complete, it contains a bunch of
+ // raw-memory initializations. There is no need for
+ // it to have anything to do with non-raw memory effects.
+ // Therefore, tell all non-raw users to re-optimize themselves,
+ // after skipping the memory effects of this initialization.
+ PhaseIterGVN* igvn = phase->is_IterGVN();
+ if (igvn) igvn->add_users_to_worklist(this);
+}
+
+// convenience function
+// return false if the init contains any stores already
+bool AllocateNode::maybe_set_complete(PhaseGVN* phase) {
+ InitializeNode* init = initialization();
+ if (init == NULL || init->is_complete()) return false;
+ init->remove_extra_zeroes();
+ // for now, if this allocation has already collected any inits, bail:
+ if (init->is_non_zero()) return false;
+ init->set_complete(phase);
+ return true;
+}
+
+void InitializeNode::remove_extra_zeroes() {
+ if (req() == RawStores) return;
+ Node* zmem = zero_memory();
+ uint fill = RawStores;
+ for (uint i = fill; i < req(); i++) {
+ Node* n = in(i);
+ if (n->is_top() || n == zmem) continue; // skip
+ if (fill < i) set_req(fill, n); // compact
+ ++fill;
+ }
+ // delete any empty spaces created:
+ while (fill < req()) {
+ del_req(fill);
+ }
+}
+
+// Helper for remembering which stores go with which offsets.
+intptr_t InitializeNode::get_store_offset(Node* st, PhaseTransform* phase) {
+ if (!st->is_Store()) return -1; // can happen to dead code via subsume_node
+ intptr_t offset = -1;
+ Node* base = AddPNode::Ideal_base_and_offset(st->in(MemNode::Address),
+ phase, offset);
+ if (base == NULL) return -1; // something is dead,
+ if (offset < 0) return -1; // dead, dead
+ return offset;
+}
+
+// Helper for proving that an initialization expression is
+// "simple enough" to be folded into an object initialization.
+// Attempts to prove that a store's initial value 'n' can be captured
+// within the initialization without creating a vicious cycle, such as:
+// { Foo p = new Foo(); p.next = p; }
+// True for constants and parameters and small combinations thereof.
+bool InitializeNode::detect_init_independence(Node* n,
+ bool st_is_pinned,
+ int& count) {
+ if (n == NULL) return true; // (can this really happen?)
+ if (n->is_Proj()) n = n->in(0);
+ if (n == this) return false; // found a cycle
+ if (n->is_Con()) return true;
+ if (n->is_Start()) return true; // params, etc., are OK
+ if (n->is_Root()) return true; // even better
+
+ Node* ctl = n->in(0);
+ if (ctl != NULL && !ctl->is_top()) {
+ if (ctl->is_Proj()) ctl = ctl->in(0);
+ if (ctl == this) return false;
+
+ // If we already know that the enclosing memory op is pinned right after
+ // the init, then any control flow that the store has picked up
+ // must have preceded the init, or else be equal to the init.
+ // Even after loop optimizations (which might change control edges)
+ // a store is never pinned *before* the availability of its inputs.
+ if (!MemNode::detect_dominating_control(ctl, this->in(0)))
+ return false; // failed to prove a good control
+
+ }
+
+ // Check data edges for possible dependencies on 'this'.
+ if ((count += 1) > 20) return false; // complexity limit
+ for (uint i = 1; i < n->req(); i++) {
+ Node* m = n->in(i);
+ if (m == NULL || m == n || m->is_top()) continue;
+ uint first_i = n->find_edge(m);
+ if (i != first_i) continue; // process duplicate edge just once
+ if (!detect_init_independence(m, st_is_pinned, count)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Here are all the checks a Store must pass before it can be moved into
+// an initialization. Returns zero if a check fails.
+// On success, returns the (constant) offset to which the store applies,
+// within the initialized memory.
+intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase) {
+ const int FAIL = 0;
+ if (st->req() != MemNode::ValueIn + 1)
+ return FAIL; // an inscrutable StoreNode (card mark?)
+ Node* ctl = st->in(MemNode::Control);
+ if (!(ctl != NULL && ctl->is_Proj() && ctl->in(0) == this))
+ return FAIL; // must be unconditional after the initialization
+ Node* mem = st->in(MemNode::Memory);
+ if (!(mem->is_Proj() && mem->in(0) == this))
+ return FAIL; // must not be preceded by other stores
+ Node* adr = st->in(MemNode::Address);
+ intptr_t offset;
+ AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset);
+ if (alloc == NULL)
+ return FAIL; // inscrutable address
+ if (alloc != allocation())
+ return FAIL; // wrong allocation! (store needs to float up)
+ Node* val = st->in(MemNode::ValueIn);
+ int complexity_count = 0;
+ if (!detect_init_independence(val, true, complexity_count))
+ return FAIL; // stored value must be 'simple enough'
+
+ return offset; // success
+}
+
+// Find the captured store in(i) which corresponds to the range
+// [start..start+size) in the initialized object.
+// If there is one, return its index i. If there isn't, return the
+// negative of the index where it should be inserted.
+// Return 0 if the queried range overlaps an initialization boundary
+// or if dead code is encountered.
+// If size_in_bytes is zero, do not bother with overlap checks.
+int InitializeNode::captured_store_insertion_point(intptr_t start,
+ int size_in_bytes,
+ PhaseTransform* phase) {
+ const int FAIL = 0, MAX_STORE = BytesPerLong;
+
+ if (is_complete())
+ return FAIL; // arraycopy got here first; punt
+
+ assert(allocation() != NULL, "must be present");
+
+ // no negatives, no header fields:
+ if (start < (intptr_t) sizeof(oopDesc)) return FAIL;
+ if (start < (intptr_t) sizeof(arrayOopDesc) &&
+ start < (intptr_t) allocation()->minimum_header_size()) return FAIL;
+
+ // after a certain size, we bail out on tracking all the stores:
+ intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
+ if (start >= ti_limit) return FAIL;
+
+ for (uint i = InitializeNode::RawStores, limit = req(); ; ) {
+ if (i >= limit) return -(int)i; // not found; here is where to put it
+
+ Node* st = in(i);
+ intptr_t st_off = get_store_offset(st, phase);
+ if (st_off < 0) {
+ if (st != zero_memory()) {
+ return FAIL; // bail out if there is dead garbage
+ }
+ } else if (st_off > start) {
+ // ...we are done, since stores are ordered
+ if (st_off < start + size_in_bytes) {
+ return FAIL; // the next store overlaps
+ }
+ return -(int)i; // not found; here is where to put it
+ } else if (st_off < start) {
+ if (size_in_bytes != 0 &&
+ start < st_off + MAX_STORE &&
+ start < st_off + st->as_Store()->memory_size()) {
+ return FAIL; // the previous store overlaps
+ }
+ } else {
+ if (size_in_bytes != 0 &&
+ st->as_Store()->memory_size() != size_in_bytes) {
+ return FAIL; // mismatched store size
+ }
+ return i;
+ }
+
+ ++i;
+ }
+}
+
+// Look for a captured store which initializes at the offset 'start'
+// with the given size. If there is no such store, and no other
+// initialization interferes, then return zero_memory (the memory
+// projection of the AllocateNode).
+Node* InitializeNode::find_captured_store(intptr_t start, int size_in_bytes,
+ PhaseTransform* phase) {
+ assert(stores_are_sane(phase), "");
+ int i = captured_store_insertion_point(start, size_in_bytes, phase);
+ if (i == 0) {
+ return NULL; // something is dead
+ } else if (i < 0) {
+ return zero_memory(); // just primordial zero bits here
+ } else {
+ Node* st = in(i); // here is the store at this position
+ assert(get_store_offset(st->as_Store(), phase) == start, "sanity");
+ return st;
+ }
+}
+
+// Create, as a raw pointer, an address within my new object at 'offset'.
+Node* InitializeNode::make_raw_address(intptr_t offset,
+ PhaseTransform* phase) {
+ Node* addr = in(RawAddress);
+ if (offset != 0) {
+ Compile* C = phase->C;
+ addr = phase->transform( new (C, 4) AddPNode(C->top(), addr,
+ phase->MakeConX(offset)) );
+ }
+ return addr;
+}
+
+// Clone the given store, converting it into a raw store
+// initializing a field or element of my new object.
+// Caller is responsible for retiring the original store,
+// with subsume_node or the like.
+//
+// From the example above InitializeNode::InitializeNode,
+// here are the old stores to be captured:
+// store1 = (StoreC init.Control init.Memory (+ oop 12) 1)
+// store2 = (StoreC init.Control store1 (+ oop 14) 2)
+//
+// Here is the changed code; note the extra edges on init:
+// alloc = (Allocate ...)
+// rawoop = alloc.RawAddress
+// rawstore1 = (StoreC alloc.Control alloc.Memory (+ rawoop 12) 1)
+// rawstore2 = (StoreC alloc.Control alloc.Memory (+ rawoop 14) 2)
+// init = (Initialize alloc.Control alloc.Memory rawoop
+// rawstore1 rawstore2)
+//
+Node* InitializeNode::capture_store(StoreNode* st, intptr_t start,
+ PhaseTransform* phase) {
+ assert(stores_are_sane(phase), "");
+
+ if (start < 0) return NULL;
+ assert(can_capture_store(st, phase) == start, "sanity");
+
+ Compile* C = phase->C;
+ int size_in_bytes = st->memory_size();
+ int i = captured_store_insertion_point(start, size_in_bytes, phase);
+ if (i == 0) return NULL; // bail out
+ Node* prev_mem = NULL; // raw memory for the captured store
+ if (i > 0) {
+ prev_mem = in(i); // there is a pre-existing store under this one
+ set_req(i, C->top()); // temporarily disconnect it
+ // See StoreNode::Ideal 'st->outcnt() == 1' for the reason to disconnect.
+ } else {
+ i = -i; // no pre-existing store
+ prev_mem = zero_memory(); // a slice of the newly allocated object
+ if (i > InitializeNode::RawStores && in(i-1) == prev_mem)
+ set_req(--i, C->top()); // reuse this edge; it has been folded away
+ else
+ ins_req(i, C->top()); // build a new edge
+ }
+ Node* new_st = st->clone();
+ new_st->set_req(MemNode::Control, in(Control));
+ new_st->set_req(MemNode::Memory, prev_mem);
+ new_st->set_req(MemNode::Address, make_raw_address(start, phase));
+ new_st = phase->transform(new_st);
+
+ // At this point, new_st might have swallowed a pre-existing store
+ // at the same offset, or perhaps new_st might have disappeared,
+ // if it redundantly stored the same value (or zero to fresh memory).
+
+ // In any case, wire it in:
+ set_req(i, new_st);
+
+ // The caller may now kill the old guy.
+ DEBUG_ONLY(Node* check_st = find_captured_store(start, size_in_bytes, phase));
+ assert(check_st == new_st || check_st == NULL, "must be findable");
+ assert(!is_complete(), "");
+ return new_st;
+}
+
+static bool store_constant(jlong* tiles, int num_tiles,
+ intptr_t st_off, int st_size,
+ jlong con) {
+ if ((st_off & (st_size-1)) != 0)
+ return false; // strange store offset (assume size==2**N)
+ address addr = (address)tiles + st_off;
+ assert(st_off >= 0 && addr+st_size <= (address)&tiles[num_tiles], "oob");
+ switch (st_size) {
+ case sizeof(jbyte): *(jbyte*) addr = (jbyte) con; break;
+ case sizeof(jchar): *(jchar*) addr = (jchar) con; break;
+ case sizeof(jint): *(jint*) addr = (jint) con; break;
+ case sizeof(jlong): *(jlong*) addr = (jlong) con; break;
+ default: return false; // strange store size (detect size!=2**N here)
+ }
+ return true; // return success to caller
+}
+
+// Coalesce subword constants into int constants and possibly
+// into long constants. The goal, if the CPU permits,
+// is to initialize the object with a small number of 64-bit tiles.
+// Also, convert floating-point constants to bit patterns.
+// Non-constants are not relevant to this pass.
+//
+// In terms of the running example on InitializeNode::InitializeNode
+// and InitializeNode::capture_store, here is the transformation
+// of rawstore1 and rawstore2 into rawstore12:
+// alloc = (Allocate ...)
+// rawoop = alloc.RawAddress
+// tile12 = 0x00010002
+// rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12)
+// init = (Initialize alloc.Control alloc.Memory rawoop rawstore12)
+//
+void
+InitializeNode::coalesce_subword_stores(intptr_t header_size,
+ Node* size_in_bytes,
+ PhaseGVN* phase) {
+ Compile* C = phase->C;
+
+ assert(stores_are_sane(phase), "");
+ // Note: After this pass, they are not completely sane,
+ // since there may be some overlaps.
+
+ int old_subword = 0, old_long = 0, new_int = 0, new_long = 0;
+
+ intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
+ intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit);
+ size_limit = MIN2(size_limit, ti_limit);
+ size_limit = align_size_up(size_limit, BytesPerLong);
+ int num_tiles = size_limit / BytesPerLong;
+
+ // allocate space for the tile map:
+ const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small
+ jlong tiles_buf[small_len];
+ Node* nodes_buf[small_len];
+ jlong inits_buf[small_len];
+ jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0]
+ : NEW_RESOURCE_ARRAY(jlong, num_tiles));
+ Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0]
+ : NEW_RESOURCE_ARRAY(Node*, num_tiles));
+ jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0]
+ : NEW_RESOURCE_ARRAY(jlong, num_tiles));
+ // tiles: exact bitwise model of all primitive constants
+ // nodes: last constant-storing node subsumed into the tiles model
+ // inits: which bytes (in each tile) are touched by any initializations
+
+ //// Pass A: Fill in the tile model with any relevant stores.
+
+ Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles);
+ Copy::zero_to_bytes(nodes, sizeof(nodes[0]) * num_tiles);
+ Copy::zero_to_bytes(inits, sizeof(inits[0]) * num_tiles);
+ Node* zmem = zero_memory(); // initially zero memory state
+ for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
+ Node* st = in(i);
+ intptr_t st_off = get_store_offset(st, phase);
+
+ // Figure out the store's offset and constant value:
+ if (st_off < header_size) continue; //skip (ignore header)
+ if (st->in(MemNode::Memory) != zmem) continue; //skip (odd store chain)
+ int st_size = st->as_Store()->memory_size();
+ if (st_off + st_size > size_limit) break;
+
+ // Record which bytes are touched, whether by constant or not.
+ if (!store_constant(inits, num_tiles, st_off, st_size, (jlong) -1))
+ continue; // skip (strange store size)
+
+ const Type* val = phase->type(st->in(MemNode::ValueIn));
+ if (!val->singleton()) continue; //skip (non-con store)
+ BasicType type = val->basic_type();
+
+ jlong con = 0;
+ switch (type) {
+ case T_INT: con = val->is_int()->get_con(); break;
+ case T_LONG: con = val->is_long()->get_con(); break;
+ case T_FLOAT: con = jint_cast(val->getf()); break;
+ case T_DOUBLE: con = jlong_cast(val->getd()); break;
+ default: continue; //skip (odd store type)
+ }
+
+ if (type == T_LONG && Matcher::isSimpleConstant64(con) &&
+ st->Opcode() == Op_StoreL) {
+ continue; // This StoreL is already optimal.
+ }
+
+ // Store down the constant.
+ store_constant(tiles, num_tiles, st_off, st_size, con);
+
+ intptr_t j = st_off >> LogBytesPerLong;
+
+ if (type == T_INT && st_size == BytesPerInt
+ && (st_off & BytesPerInt) == BytesPerInt) {
+ jlong lcon = tiles[j];
+ if (!Matcher::isSimpleConstant64(lcon) &&
+ st->Opcode() == Op_StoreI) {
+ // This StoreI is already optimal by itself.
+ jint* intcon = (jint*) &tiles[j];
+ intcon[1] = 0; // undo the store_constant()
+
+ // If the previous store is also optimal by itself, back up and
+ // undo the action of the previous loop iteration... if we can.
+ // But if we can't, just let the previous half take care of itself.
+ st = nodes[j];
+ st_off -= BytesPerInt;
+ con = intcon[0];
+ if (con != 0 && st != NULL && st->Opcode() == Op_StoreI) {
+ assert(st_off >= header_size, "still ignoring header");
+ assert(get_store_offset(st, phase) == st_off, "must be");
+ assert(in(i-1) == zmem, "must be");
+ DEBUG_ONLY(const Type* tcon = phase->type(st->in(MemNode::ValueIn)));
+ assert(con == tcon->is_int()->get_con(), "must be");
+ // Undo the effects of the previous loop trip, which swallowed st:
+ intcon[0] = 0; // undo store_constant()
+ set_req(i-1, st); // undo set_req(i, zmem)
+ nodes[j] = NULL; // undo nodes[j] = st
+ --old_subword; // undo ++old_subword
+ }
+ continue; // This StoreI is already optimal.
+ }
+ }
+
+ // This store is not needed.
+ set_req(i, zmem);
+ nodes[j] = st; // record for the moment
+ if (st_size < BytesPerLong) // something has changed
+ ++old_subword; // includes int/float, but who's counting...
+ else ++old_long;
+ }
+
+ if ((old_subword + old_long) == 0)
+ return; // nothing more to do
+
+ //// Pass B: Convert any non-zero tiles into optimal constant stores.
+ // Be sure to insert them before overlapping non-constant stores.
+ // (E.g., byte[] x = { 1,2,y,4 } => x[int 0] = 0x01020004, x[2]=y.)
+ for (int j = 0; j < num_tiles; j++) {
+ jlong con = tiles[j];
+ jlong init = inits[j];
+ if (con == 0) continue;
+ jint con0, con1; // split the constant, address-wise
+ jint init0, init1; // split the init map, address-wise
+ { union { jlong con; jint intcon[2]; } u;
+ u.con = con;
+ con0 = u.intcon[0];
+ con1 = u.intcon[1];
+ u.con = init;
+ init0 = u.intcon[0];
+ init1 = u.intcon[1];
+ }
+
+ Node* old = nodes[j];
+ assert(old != NULL, "need the prior store");
+ intptr_t offset = (j * BytesPerLong);
+
+ bool split = !Matcher::isSimpleConstant64(con);
+
+ if (offset < header_size) {
+ assert(offset + BytesPerInt >= header_size, "second int counts");
+ assert(*(jint*)&tiles[j] == 0, "junk in header");
+ split = true; // only the second word counts
+ // Example: int a[] = { 42 ... }
+ } else if (con0 == 0 && init0 == -1) {
+ split = true; // first word is covered by full inits
+ // Example: int a[] = { ... foo(), 42 ... }
+ } else if (con1 == 0 && init1 == -1) {
+ split = true; // second word is covered by full inits
+ // Example: int a[] = { ... 42, foo() ... }
+ }
+
+ // Here's a case where init0 is neither 0 nor -1:
+ // byte a[] = { ... 0,0,foo(),0, 0,0,0,42 ... }
+ // Assuming big-endian memory, init0, init1 are 0x0000FF00, 0x000000FF.
+ // In this case the tile is not split; it is (jlong)42.
+ // The big tile is stored down, and then the foo() value is inserted.
+ // (If there were foo(),foo() instead of foo(),0, init0 would be -1.)
+
+ Node* ctl = old->in(MemNode::Control);
+ Node* adr = make_raw_address(offset, phase);
+ const TypePtr* atp = TypeRawPtr::BOTTOM;
+
+ // One or two coalesced stores to plop down.
+ Node* st[2];
+ intptr_t off[2];
+ int nst = 0;
+ if (!split) {
+ ++new_long;
+ off[nst] = offset;
+ st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
+ phase->longcon(con), T_LONG);
+ } else {
+ // Omit either if it is a zero.
+ if (con0 != 0) {
+ ++new_int;
+ off[nst] = offset;
+ st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
+ phase->intcon(con0), T_INT);
+ }
+ if (con1 != 0) {
+ ++new_int;
+ offset += BytesPerInt;
+ adr = make_raw_address(offset, phase);
+ off[nst] = offset;
+ st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
+ phase->intcon(con1), T_INT);
+ }
+ }
+
+ // Insert second store first, then the first before the second.
+ // Insert each one just before any overlapping non-constant stores.
+ while (nst > 0) {
+ Node* st1 = st[--nst];
+ C->copy_node_notes_to(st1, old);
+ st1 = phase->transform(st1);
+ offset = off[nst];
+ assert(offset >= header_size, "do not smash header");
+ int ins_idx = captured_store_insertion_point(offset, /*size:*/0, phase);
+ guarantee(ins_idx != 0, "must re-insert constant store");
+ if (ins_idx < 0) ins_idx = -ins_idx; // never overlap
+ if (ins_idx > InitializeNode::RawStores && in(ins_idx-1) == zmem)
+ set_req(--ins_idx, st1);
+ else
+ ins_req(ins_idx, st1);
+ }
+ }
+
+ if (PrintCompilation && WizardMode)
+ tty->print_cr("Changed %d/%d subword/long constants into %d/%d int/long",
+ old_subword, old_long, new_int, new_long);
+ if (C->log() != NULL)
+ C->log()->elem("comment that='%d/%d subword/long to %d/%d int/long'",
+ old_subword, old_long, new_int, new_long);
+
+ // Clean up any remaining occurrences of zmem:
+ remove_extra_zeroes();
+}
+
+// Explore forward from in(start) to find the first fully initialized
+// word, and return its offset. Skip groups of subword stores which
+// together initialize full words. If in(start) is itself part of a
+// fully initialized word, return the offset of in(start). If there
+// are no following full-word stores, or if something is fishy, return
+// a negative value.
+intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) {
+ int int_map = 0;
+ intptr_t int_map_off = 0;
+ const int FULL_MAP = right_n_bits(BytesPerInt); // the int_map we hope for
+
+ for (uint i = start, limit = req(); i < limit; i++) {
+ Node* st = in(i);
+
+ intptr_t st_off = get_store_offset(st, phase);
+ if (st_off < 0) break; // return conservative answer
+
+ int st_size = st->as_Store()->memory_size();
+ if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) {
+ return st_off; // we found a complete word init
+ }
+
+ // update the map:
+
+ intptr_t this_int_off = align_size_down(st_off, BytesPerInt);
+ if (this_int_off != int_map_off) {
+ // reset the map:
+ int_map = 0;
+ int_map_off = this_int_off;
+ }
+
+ int subword_off = st_off - this_int_off;
+ int_map |= right_n_bits(st_size) << subword_off;
+ if ((int_map & FULL_MAP) == FULL_MAP) {
+ return this_int_off; // we found a complete word init
+ }
+
+ // Did this store hit or cross the word boundary?
+ intptr_t next_int_off = align_size_down(st_off + st_size, BytesPerInt);
+ if (next_int_off == this_int_off + BytesPerInt) {
+ // We passed the current int, without fully initializing it.
+ int_map_off = next_int_off;
+ int_map >>= BytesPerInt;
+ } else if (next_int_off > this_int_off + BytesPerInt) {
+ // We passed the current and next int.
+ return this_int_off + BytesPerInt;
+ }
+ }
+
+ return -1;
+}
+
+
+// Called when the associated AllocateNode is expanded into CFG.
+// At this point, we may perform additional optimizations.
+// Linearize the stores by ascending offset, to make memory
+// activity as coherent as possible.
+Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
+ intptr_t header_size,
+ Node* size_in_bytes,
+ PhaseGVN* phase) {
+ assert(!is_complete(), "not already complete");
+ assert(stores_are_sane(phase), "");
+ assert(allocation() != NULL, "must be present");
+
+ remove_extra_zeroes();
+
+ if (ReduceFieldZeroing || ReduceBulkZeroing)
+ // reduce instruction count for common initialization patterns
+ coalesce_subword_stores(header_size, size_in_bytes, phase);
+
+ Node* zmem = zero_memory(); // initially zero memory state
+ Node* inits = zmem; // accumulating a linearized chain of inits
+ #ifdef ASSERT
+ intptr_t last_init_off = sizeof(oopDesc); // previous init offset
+ intptr_t last_init_end = sizeof(oopDesc); // previous init offset+size
+ intptr_t last_tile_end = sizeof(oopDesc); // previous tile offset+size
+ #endif
+ intptr_t zeroes_done = header_size;
+
+ bool do_zeroing = true; // we might give up if inits are very sparse
+ int big_init_gaps = 0; // how many large gaps have we seen?
+
+ if (ZeroTLAB) do_zeroing = false;
+ if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false;
+
+ for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
+ Node* st = in(i);
+ intptr_t st_off = get_store_offset(st, phase);
+ if (st_off < 0)
+ break; // unknown junk in the inits
+ if (st->in(MemNode::Memory) != zmem)
+ break; // complicated store chains somehow in list
+
+ int st_size = st->as_Store()->memory_size();
+ intptr_t next_init_off = st_off + st_size;
+
+ if (do_zeroing && zeroes_done < next_init_off) {
+ // See if this store needs a zero before it or under it.
+ intptr_t zeroes_needed = st_off;
+
+ if (st_size < BytesPerInt) {
+ // Look for subword stores which only partially initialize words.
+ // If we find some, we must lay down some word-level zeroes first,
+ // underneath the subword stores.
+ //
+ // Examples:
+ // byte[] a = { p,q,r,s } => a[0]=p,a[1]=q,a[2]=r,a[3]=s
+ // byte[] a = { x,y,0,0 } => a[0..3] = 0, a[0]=x,a[1]=y
+ // byte[] a = { 0,0,z,0 } => a[0..3] = 0, a[2]=z
+ //
+ // Note: coalesce_subword_stores may have already done this,
+ // if it was prompted by constant non-zero subword initializers.
+ // But this case can still arise with non-constant stores.
+
+ intptr_t next_full_store = find_next_fullword_store(i, phase);
+
+ // In the examples above:
+ // in(i) p q r s x y z
+ // st_off 12 13 14 15 12 13 14
+ // st_size 1 1 1 1 1 1 1
+ // next_full_s. 12 16 16 16 16 16 16
+ // z's_done 12 16 16 16 12 16 12
+ // z's_needed 12 16 16 16 16 16 16
+ // zsize 0 0 0 0 4 0 4
+ if (next_full_store < 0) {
+ // Conservative tack: Zero to end of current word.
+ zeroes_needed = align_size_up(zeroes_needed, BytesPerInt);
+ } else {
+ // Zero to beginning of next fully initialized word.
+ // Or, don't zero at all, if we are already in that word.
+ assert(next_full_store >= zeroes_needed, "must go forward");
+ assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary");
+ zeroes_needed = next_full_store;
+ }
+ }
+
+ if (zeroes_needed > zeroes_done) {
+ intptr_t zsize = zeroes_needed - zeroes_done;
+ // Do some incremental zeroing on rawmem, in parallel with inits.
+ zeroes_done = align_size_down(zeroes_done, BytesPerInt);
+ rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
+ zeroes_done, zeroes_needed,
+ phase);
+ zeroes_done = zeroes_needed;
+ if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2)
+ do_zeroing = false; // leave the hole, next time
+ }
+ }
+
+ // Collect the store and move on:
+ st->set_req(MemNode::Memory, inits);
+ inits = st; // put it on the linearized chain
+ set_req(i, zmem); // unhook from previous position
+
+ if (zeroes_done == st_off)
+ zeroes_done = next_init_off;
+
+ assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any");
+
+ #ifdef ASSERT
+ // Various order invariants. Weaker than stores_are_sane because
+ // a large constant tile can be filled in by smaller non-constant stores.
+ assert(st_off >= last_init_off, "inits do not reverse");
+ last_init_off = st_off;
+ const Type* val = NULL;
+ if (st_size >= BytesPerInt &&
+ (val = phase->type(st->in(MemNode::ValueIn)))->singleton() &&
+ (int)val->basic_type() < (int)T_OBJECT) {
+ assert(st_off >= last_tile_end, "tiles do not overlap");
+ assert(st_off >= last_init_end, "tiles do not overwrite inits");
+ last_tile_end = MAX2(last_tile_end, next_init_off);
+ } else {
+ intptr_t st_tile_end = align_size_up(next_init_off, BytesPerLong);
+ assert(st_tile_end >= last_tile_end, "inits stay with tiles");
+ assert(st_off >= last_init_end, "inits do not overlap");
+ last_init_end = next_init_off; // it's a non-tile
+ }
+ #endif //ASSERT
+ }
+
+ remove_extra_zeroes(); // clear out all the zmems left over
+ add_req(inits);
+
+ if (!ZeroTLAB) {
+ // If anything remains to be zeroed, zero it all now.
+ zeroes_done = align_size_down(zeroes_done, BytesPerInt);
+ // if it is the last unused 4 bytes of an instance, forget about it
+ intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint);
+ if (zeroes_done + BytesPerLong >= size_limit) {
+ assert(allocation() != NULL, "");
+ Node* klass_node = allocation()->in(AllocateNode::KlassNode);
+ ciKlass* k = phase->type(klass_node)->is_klassptr()->klass();
+ if (zeroes_done == k->layout_helper())
+ zeroes_done = size_limit;
+ }
+ if (zeroes_done < size_limit) {
+ rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
+ zeroes_done, size_in_bytes, phase);
+ }
+ }
+
+ set_complete(phase);
+ return rawmem;
+}
+
+
+#ifdef ASSERT
+bool InitializeNode::stores_are_sane(PhaseTransform* phase) {
+ if (is_complete())
+ return true; // stores could be anything at this point
+ intptr_t last_off = sizeof(oopDesc);
+ for (uint i = InitializeNode::RawStores; i < req(); i++) {
+ Node* st = in(i);
+ intptr_t st_off = get_store_offset(st, phase);
+ if (st_off < 0) continue; // ignore dead garbage
+ if (last_off > st_off) {
+ tty->print_cr("*** bad store offset at %d: %d > %d", i, last_off, st_off);
+ this->dump(2);
+ assert(false, "ascending store offsets");
+ return false;
+ }
+ last_off = st_off + st->as_Store()->memory_size();
+ }
+ return true;
+}
+#endif //ASSERT
+
+
+
+
+//============================MergeMemNode=====================================
+//
+// SEMANTICS OF MEMORY MERGES: A MergeMem is a memory state assembled from several
+// contributing store or call operations. Each contributor provides the memory
+// state for a particular "alias type" (see Compile::alias_type). For example,
+// if a MergeMem has an input X for alias category #6, then any memory reference
+// to alias category #6 may use X as its memory state input, as an exact equivalent
+// to using the MergeMem as a whole.
+// Load<6>( MergeMem(<6>: X, ...), p ) <==> Load<6>(X,p)
+//
+// (Here, the <N> notation gives the index of the relevant adr_type.)
+//
+// In one special case (and more cases in the future), alias categories overlap.
+// The special alias category "Bot" (Compile::AliasIdxBot) includes all memory
+// states. Therefore, if a MergeMem has only one contributing input W for Bot,
+// it is exactly equivalent to that state W:
+// MergeMem(<Bot>: W) <==> W
+//
+// Usually, the merge has more than one input. In that case, where inputs
+// overlap (i.e., one is Bot), the narrower alias type determines the memory
+// state for that type, and the wider alias type (Bot) fills in everywhere else:
+// Load<5>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<5>(W,p)
+// Load<6>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<6>(X,p)
+//
+// A merge can take a "wide" memory state as one of its narrow inputs.
+// This simply means that the merge observes out only the relevant parts of
+// the wide input. That is, wide memory states arriving at narrow merge inputs
+// are implicitly "filtered" or "sliced" as necessary. (This is rare.)
+//
+// These rules imply that MergeMem nodes may cascade (via their <Bot> links),
+// and that memory slices "leak through":
+// MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y)) <==> MergeMem(<Bot>: W, <7>: Y)
+//
+// But, in such a cascade, repeated memory slices can "block the leak":
+// MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y), <7>: Y') <==> MergeMem(<Bot>: W, <7>: Y')
+//
+// In the last example, Y is not part of the combined memory state of the
+// outermost MergeMem. The system must, of course, prevent unschedulable
+// memory states from arising, so you can be sure that the state Y is somehow
+// a precursor to state Y'.
+//
+//
+// REPRESENTATION OF MEMORY MERGES: The indexes used to address the Node::in array
+// of each MergeMemNode array are exactly the numerical alias indexes, including
+// but not limited to AliasIdxTop, AliasIdxBot, and AliasIdxRaw. The functions
+// Compile::alias_type (and kin) produce and manage these indexes.
+//
+// By convention, the value of in(AliasIdxTop) (i.e., in(1)) is always the top node.
+// (Note that this provides quick access to the top node inside MergeMem methods,
+// without the need to reach out via TLS to Compile::current.)
+//
+// As a consequence of what was just described, a MergeMem that represents a full
+// memory state has an edge in(AliasIdxBot) which is a "wide" memory state,
+// containing all alias categories.
+//
+// MergeMem nodes never (?) have control inputs, so in(0) is NULL.
+//
+// All other edges in(N) (including in(AliasIdxRaw), which is in(3)) are either
+// a memory state for the alias type <N>, or else the top node, meaning that
+// there is no particular input for that alias type. Note that the length of
+// a MergeMem is variable, and may be extended at any time to accommodate new
+// memory states at larger alias indexes. When merges grow, they are of course
+// filled with "top" in the unused in() positions.
+//
+// This use of top is named "empty_memory()", or "empty_mem" (no-memory) as a variable.
+// (Top was chosen because it works smoothly with passes like GCM.)
+//
+// For convenience, we hardwire the alias index for TypeRawPtr::BOTTOM. (It is
+// the type of random VM bits like TLS references.) Since it is always the
+// first non-Bot memory slice, some low-level loops use it to initialize an
+// index variable: for (i = AliasIdxRaw; i < req(); i++).
+//
+//
+// ACCESSORS: There is a special accessor MergeMemNode::base_memory which returns
+// the distinguished "wide" state. The accessor MergeMemNode::memory_at(N) returns
+// the memory state for alias type <N>, or (if there is no particular slice at <N>,
+// it returns the base memory. To prevent bugs, memory_at does not accept <Top>
+// or <Bot> indexes. The iterator MergeMemStream provides robust iteration over
+// MergeMem nodes or pairs of such nodes, ensuring that the non-top edges are visited.
+//
+// %%%% We may get rid of base_memory as a separate accessor at some point; it isn't
+// really that different from the other memory inputs. An abbreviation called
+// "bot_memory()" for "memory_at(AliasIdxBot)" would keep code tidy.
+//
+//
+// PARTIAL MEMORY STATES: During optimization, MergeMem nodes may arise that represent
+// partial memory states. When a Phi splits through a MergeMem, the copy of the Phi
+// that "emerges though" the base memory will be marked as excluding the alias types
+// of the other (narrow-memory) copies which "emerged through" the narrow edges:
+//
+// Phi<Bot>(U, MergeMem(<Bot>: W, <8>: Y))
+// ==Ideal=> MergeMem(<Bot>: Phi<Bot-8>(U, W), Phi<8>(U, Y))
+//
+// This strange "subtraction" effect is necessary to ensure IGVN convergence.
+// (It is currently unimplemented.) As you can see, the resulting merge is
+// actually a disjoint union of memory states, rather than an overlay.
+//
+
+//------------------------------MergeMemNode-----------------------------------
+Node* MergeMemNode::make_empty_memory() {
+ Node* empty_memory = (Node*) Compile::current()->top();
+ assert(empty_memory->is_top(), "correct sentinel identity");
+ return empty_memory;
+}
+
+MergeMemNode::MergeMemNode(Node *new_base) : Node(1+Compile::AliasIdxRaw) {
+ init_class_id(Class_MergeMem);
+ // all inputs are nullified in Node::Node(int)
+ // set_input(0, NULL); // no control input
+
+ // Initialize the edges uniformly to top, for starters.
+ Node* empty_mem = make_empty_memory();
+ for (uint i = Compile::AliasIdxTop; i < req(); i++) {
+ init_req(i,empty_mem);
+ }
+ assert(empty_memory() == empty_mem, "");
+
+ if( new_base != NULL && new_base->is_MergeMem() ) {
+ MergeMemNode* mdef = new_base->as_MergeMem();
+ assert(mdef->empty_memory() == empty_mem, "consistent sentinels");
+ for (MergeMemStream mms(this, mdef); mms.next_non_empty2(); ) {
+ mms.set_memory(mms.memory2());
+ }
+ assert(base_memory() == mdef->base_memory(), "");
+ } else {
+ set_base_memory(new_base);
+ }
+}
+
+// Make a new, untransformed MergeMem with the same base as 'mem'.
+// If mem is itself a MergeMem, populate the result with the same edges.
+MergeMemNode* MergeMemNode::make(Compile* C, Node* mem) {
+ return new(C, 1+Compile::AliasIdxRaw) MergeMemNode(mem);
+}
+
+//------------------------------cmp--------------------------------------------
+uint MergeMemNode::hash() const { return NO_HASH; }
+uint MergeMemNode::cmp( const Node &n ) const {
+ return (&n == this); // Always fail except on self
+}
+
+//------------------------------Identity---------------------------------------
+Node* MergeMemNode::Identity(PhaseTransform *phase) {
+ // Identity if this merge point does not record any interesting memory
+ // disambiguations.
+ Node* base_mem = base_memory();
+ Node* empty_mem = empty_memory();
+ if (base_mem != empty_mem) { // Memory path is not dead?
+ for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+ Node* mem = in(i);
+ if (mem != empty_mem && mem != base_mem) {
+ return this; // Many memory splits; no change
+ }
+ }
+ }
+ return base_mem; // No memory splits; ID on the one true input
+}
+
+//------------------------------Ideal------------------------------------------
+// This method is invoked recursively on chains of MergeMem nodes
+Node *MergeMemNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Remove chain'd MergeMems
+ //
+ // This is delicate, because the each "in(i)" (i >= Raw) is interpreted
+ // relative to the "in(Bot)". Since we are patching both at the same time,
+ // we have to be careful to read each "in(i)" relative to the old "in(Bot)",
+ // but rewrite each "in(i)" relative to the new "in(Bot)".
+ Node *progress = NULL;
+
+
+ Node* old_base = base_memory();
+ Node* empty_mem = empty_memory();
+ if (old_base == empty_mem)
+ return NULL; // Dead memory path.
+
+ MergeMemNode* old_mbase;
+ if (old_base != NULL && old_base->is_MergeMem())
+ old_mbase = old_base->as_MergeMem();
+ else
+ old_mbase = NULL;
+ Node* new_base = old_base;
+
+ // simplify stacked MergeMems in base memory
+ if (old_mbase) new_base = old_mbase->base_memory();
+
+ // the base memory might contribute new slices beyond my req()
+ if (old_mbase) grow_to_match(old_mbase);
+
+ // Look carefully at the base node if it is a phi.
+ PhiNode* phi_base;
+ if (new_base != NULL && new_base->is_Phi())
+ phi_base = new_base->as_Phi();
+ else
+ phi_base = NULL;
+
+ Node* phi_reg = NULL;
+ uint phi_len = (uint)-1;
+ if (phi_base != NULL && !phi_base->is_copy()) {
+ // do not examine phi if degraded to a copy
+ phi_reg = phi_base->region();
+ phi_len = phi_base->req();
+ // see if the phi is unfinished
+ for (uint i = 1; i < phi_len; i++) {
+ if (phi_base->in(i) == NULL) {
+ // incomplete phi; do not look at it yet!
+ phi_reg = NULL;
+ phi_len = (uint)-1;
+ break;
+ }
+ }
+ }
+
+ // Note: We do not call verify_sparse on entry, because inputs
+ // can normalize to the base_memory via subsume_node or similar
+ // mechanisms. This method repairs that damage.
+
+ assert(!old_mbase || old_mbase->is_empty_memory(empty_mem), "consistent sentinels");
+
+ // Look at each slice.
+ for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+ Node* old_in = in(i);
+ // calculate the old memory value
+ Node* old_mem = old_in;
+ if (old_mem == empty_mem) old_mem = old_base;
+ assert(old_mem == memory_at(i), "");
+
+ // maybe update (reslice) the old memory value
+
+ // simplify stacked MergeMems
+ Node* new_mem = old_mem;
+ MergeMemNode* old_mmem;
+ if (old_mem != NULL && old_mem->is_MergeMem())
+ old_mmem = old_mem->as_MergeMem();
+ else
+ old_mmem = NULL;
+ if (old_mmem == this) {
+ // This can happen if loops break up and safepoints disappear.
+ // A merge of BotPtr (default) with a RawPtr memory derived from a
+ // safepoint can be rewritten to a merge of the same BotPtr with
+ // the BotPtr phi coming into the loop. If that phi disappears
+ // also, we can end up with a self-loop of the mergemem.
+ // In general, if loops degenerate and memory effects disappear,
+ // a mergemem can be left looking at itself. This simply means
+ // that the mergemem's default should be used, since there is
+ // no longer any apparent effect on this slice.
+ // Note: If a memory slice is a MergeMem cycle, it is unreachable
+ // from start. Update the input to TOP.
+ new_mem = (new_base == this || new_base == empty_mem)? empty_mem : new_base;
+ }
+ else if (old_mmem != NULL) {
+ new_mem = old_mmem->memory_at(i);
+ }
+ // else preceeding memory was not a MergeMem
+
+ // replace equivalent phis (unfortunately, they do not GVN together)
+ if (new_mem != NULL && new_mem != new_base &&
+ new_mem->req() == phi_len && new_mem->in(0) == phi_reg) {
+ if (new_mem->is_Phi()) {
+ PhiNode* phi_mem = new_mem->as_Phi();
+ for (uint i = 1; i < phi_len; i++) {
+ if (phi_base->in(i) != phi_mem->in(i)) {
+ phi_mem = NULL;
+ break;
+ }
+ }
+ if (phi_mem != NULL) {
+ // equivalent phi nodes; revert to the def
+ new_mem = new_base;
+ }
+ }
+ }
+
+ // maybe store down a new value
+ Node* new_in = new_mem;
+ if (new_in == new_base) new_in = empty_mem;
+
+ if (new_in != old_in) {
+ // Warning: Do not combine this "if" with the previous "if"
+ // A memory slice might have be be rewritten even if it is semantically
+ // unchanged, if the base_memory value has changed.
+ set_req(i, new_in);
+ progress = this; // Report progress
+ }
+ }
+
+ if (new_base != old_base) {
+ set_req(Compile::AliasIdxBot, new_base);
+ // Don't use set_base_memory(new_base), because we need to update du.
+ assert(base_memory() == new_base, "");
+ progress = this;
+ }
+
+ if( base_memory() == this ) {
+ // a self cycle indicates this memory path is dead
+ set_req(Compile::AliasIdxBot, empty_mem);
+ }
+
+ // Resolve external cycles by calling Ideal on a MergeMem base_memory
+ // Recursion must occur after the self cycle check above
+ if( base_memory()->is_MergeMem() ) {
+ MergeMemNode *new_mbase = base_memory()->as_MergeMem();
+ Node *m = phase->transform(new_mbase); // Rollup any cycles
+ if( m != NULL && (m->is_top() ||
+ m->is_MergeMem() && m->as_MergeMem()->base_memory() == empty_mem) ) {
+ // propagate rollup of dead cycle to self
+ set_req(Compile::AliasIdxBot, empty_mem);
+ }
+ }
+
+ if( base_memory() == empty_mem ) {
+ progress = this;
+ // Cut inputs during Parse phase only.
+ // During Optimize phase a dead MergeMem node will be subsumed by Top.
+ if( !can_reshape ) {
+ for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+ if( in(i) != empty_mem ) { set_req(i, empty_mem); }
+ }
+ }
+ }
+
+ if( !progress && base_memory()->is_Phi() && can_reshape ) {
+ // Check if PhiNode::Ideal's "Split phis through memory merges"
+ // transform should be attempted. Look for this->phi->this cycle.
+ uint merge_width = req();
+ if (merge_width > Compile::AliasIdxRaw) {
+ PhiNode* phi = base_memory()->as_Phi();
+ for( uint i = 1; i < phi->req(); ++i ) {// For all paths in
+ if (phi->in(i) == this) {
+ phase->is_IterGVN()->_worklist.push(phi);
+ break;
+ }
+ }
+ }
+ }
+
+ assert(verify_sparse(), "please, no dups of base");
+ return progress;
+}
+
+//-------------------------set_base_memory-------------------------------------
+void MergeMemNode::set_base_memory(Node *new_base) {
+ Node* empty_mem = empty_memory();
+ set_req(Compile::AliasIdxBot, new_base);
+ assert(memory_at(req()) == new_base, "must set default memory");
+ // Clear out other occurrences of new_base:
+ if (new_base != empty_mem) {
+ for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+ if (in(i) == new_base) set_req(i, empty_mem);
+ }
+ }
+}
+
+//------------------------------out_RegMask------------------------------------
+const RegMask &MergeMemNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+//------------------------------dump_spec--------------------------------------
+#ifndef PRODUCT
+void MergeMemNode::dump_spec(outputStream *st) const {
+ st->print(" {");
+ Node* base_mem = base_memory();
+ for( uint i = Compile::AliasIdxRaw; i < req(); i++ ) {
+ Node* mem = memory_at(i);
+ if (mem == base_mem) { st->print(" -"); continue; }
+ st->print( " N%d:", mem->_idx );
+ Compile::current()->get_adr_type(i)->dump_on(st);
+ }
+ st->print(" }");
+}
+#endif // !PRODUCT
+
+
+#ifdef ASSERT
+static bool might_be_same(Node* a, Node* b) {
+ if (a == b) return true;
+ if (!(a->is_Phi() || b->is_Phi())) return false;
+ // phis shift around during optimization
+ return true; // pretty stupid...
+}
+
+// verify a narrow slice (either incoming or outgoing)
+static void verify_memory_slice(const MergeMemNode* m, int alias_idx, Node* n) {
+ if (!VerifyAliases) return; // don't bother to verify unless requested
+ if (is_error_reported()) return; // muzzle asserts when debugging an error
+ if (Node::in_dump()) return; // muzzle asserts when printing
+ assert(alias_idx >= Compile::AliasIdxRaw, "must not disturb base_memory or sentinel");
+ assert(n != NULL, "");
+ // Elide intervening MergeMem's
+ while (n->is_MergeMem()) {
+ n = n->as_MergeMem()->memory_at(alias_idx);
+ }
+ Compile* C = Compile::current();
+ const TypePtr* n_adr_type = n->adr_type();
+ if (n == m->empty_memory()) {
+ // Implicit copy of base_memory()
+ } else if (n_adr_type != TypePtr::BOTTOM) {
+ assert(n_adr_type != NULL, "new memory must have a well-defined adr_type");
+ assert(C->must_alias(n_adr_type, alias_idx), "new memory must match selected slice");
+ } else {
+ // A few places like make_runtime_call "know" that VM calls are narrow,
+ // and can be used to update only the VM bits stored as TypeRawPtr::BOTTOM.
+ bool expected_wide_mem = false;
+ if (n == m->base_memory()) {
+ expected_wide_mem = true;
+ } else if (alias_idx == Compile::AliasIdxRaw ||
+ n == m->memory_at(Compile::AliasIdxRaw)) {
+ expected_wide_mem = true;
+ } else if (!C->alias_type(alias_idx)->is_rewritable()) {
+ // memory can "leak through" calls on channels that
+ // are write-once. Allow this also.
+ expected_wide_mem = true;
+ }
+ assert(expected_wide_mem, "expected narrow slice replacement");
+ }
+}
+#else // !ASSERT
+#define verify_memory_slice(m,i,n) (0) // PRODUCT version is no-op
+#endif
+
+
+//-----------------------------memory_at---------------------------------------
+Node* MergeMemNode::memory_at(uint alias_idx) const {
+ assert(alias_idx >= Compile::AliasIdxRaw ||
+ alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0,
+ "must avoid base_memory and AliasIdxTop");
+
+ // Otherwise, it is a narrow slice.
+ Node* n = alias_idx < req() ? in(alias_idx) : empty_memory();
+ Compile *C = Compile::current();
+ if (is_empty_memory(n)) {
+ // the array is sparse; empty slots are the "top" node
+ n = base_memory();
+ assert(Node::in_dump()
+ || n == NULL || n->bottom_type() == Type::TOP
+ || n->adr_type() == TypePtr::BOTTOM
+ || n->adr_type() == TypeRawPtr::BOTTOM
+ || Compile::current()->AliasLevel() == 0,
+ "must be a wide memory");
+ // AliasLevel == 0 if we are organizing the memory states manually.
+ // See verify_memory_slice for comments on TypeRawPtr::BOTTOM.
+ } else {
+ // make sure the stored slice is sane
+ #ifdef ASSERT
+ if (is_error_reported() || Node::in_dump()) {
+ } else if (might_be_same(n, base_memory())) {
+ // Give it a pass: It is a mostly harmless repetition of the base.
+ // This can arise normally from node subsumption during optimization.
+ } else {
+ verify_memory_slice(this, alias_idx, n);
+ }
+ #endif
+ }
+ return n;
+}
+
+//---------------------------set_memory_at-------------------------------------
+void MergeMemNode::set_memory_at(uint alias_idx, Node *n) {
+ verify_memory_slice(this, alias_idx, n);
+ Node* empty_mem = empty_memory();
+ if (n == base_memory()) n = empty_mem; // collapse default
+ uint need_req = alias_idx+1;
+ if (req() < need_req) {
+ if (n == empty_mem) return; // already the default, so do not grow me
+ // grow the sparse array
+ do {
+ add_req(empty_mem);
+ } while (req() < need_req);
+ }
+ set_req( alias_idx, n );
+}
+
+
+
+//--------------------------iteration_setup------------------------------------
+void MergeMemNode::iteration_setup(const MergeMemNode* other) {
+ if (other != NULL) {
+ grow_to_match(other);
+ // invariant: the finite support of mm2 is within mm->req()
+ #ifdef ASSERT
+ for (uint i = req(); i < other->req(); i++) {
+ assert(other->is_empty_memory(other->in(i)), "slice left uncovered");
+ }
+ #endif
+ }
+ // Replace spurious copies of base_memory by top.
+ Node* base_mem = base_memory();
+ if (base_mem != NULL && !base_mem->is_top()) {
+ for (uint i = Compile::AliasIdxBot+1, imax = req(); i < imax; i++) {
+ if (in(i) == base_mem)
+ set_req(i, empty_memory());
+ }
+ }
+}
+
+//---------------------------grow_to_match-------------------------------------
+void MergeMemNode::grow_to_match(const MergeMemNode* other) {
+ Node* empty_mem = empty_memory();
+ assert(other->is_empty_memory(empty_mem), "consistent sentinels");
+ // look for the finite support of the other memory
+ for (uint i = other->req(); --i >= req(); ) {
+ if (other->in(i) != empty_mem) {
+ uint new_len = i+1;
+ while (req() < new_len) add_req(empty_mem);
+ break;
+ }
+ }
+}
+
+//---------------------------verify_sparse-------------------------------------
+#ifndef PRODUCT
+bool MergeMemNode::verify_sparse() const {
+ assert(is_empty_memory(make_empty_memory()), "sane sentinel");
+ Node* base_mem = base_memory();
+ // The following can happen in degenerate cases, since empty==top.
+ if (is_empty_memory(base_mem)) return true;
+ for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+ assert(in(i) != NULL, "sane slice");
+ if (in(i) == base_mem) return false; // should have been the sentinel value!
+ }
+ return true;
+}
+
+bool MergeMemStream::match_memory(Node* mem, const MergeMemNode* mm, int idx) {
+ Node* n;
+ n = mm->in(idx);
+ if (mem == n) return true; // might be empty_memory()
+ n = (idx == Compile::AliasIdxBot)? mm->base_memory(): mm->memory_at(idx);
+ if (mem == n) return true;
+ while (n->is_Phi() && (n = n->as_Phi()->is_copy()) != NULL) {
+ if (mem == n) return true;
+ if (n == NULL) break;
+ }
+ return false;
+}
+#endif // !PRODUCT
diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp
new file mode 100644
index 000000000..989e255a9
--- /dev/null
+++ b/src/share/vm/opto/memnode.hpp
@@ -0,0 +1,1062 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+class MultiNode;
+class PhaseCCP;
+class PhaseTransform;
+
+//------------------------------MemNode----------------------------------------
+// Load or Store, possibly throwing a NULL pointer exception
+class MemNode : public Node {
+protected:
+#ifdef ASSERT
+ const TypePtr* _adr_type; // What kind of memory is being addressed?
+#endif
+ virtual uint size_of() const; // Size is bigger (ASSERT only)
+public:
+ enum { Control, // When is it safe to do this load?
+ Memory, // Chunk of memory is being loaded from
+ Address, // Actually address, derived from base
+ ValueIn, // Value to store
+ OopStore // Preceeding oop store, only in StoreCM
+ };
+protected:
+ MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
+ : Node(c0,c1,c2 ) {
+ init_class_id(Class_Mem);
+ debug_only(_adr_type=at; adr_type();)
+ }
+ MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 )
+ : Node(c0,c1,c2,c3) {
+ init_class_id(Class_Mem);
+ debug_only(_adr_type=at; adr_type();)
+ }
+ MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4)
+ : Node(c0,c1,c2,c3,c4) {
+ init_class_id(Class_Mem);
+ debug_only(_adr_type=at; adr_type();)
+ }
+
+ // Helpers for the optimizer. Documented in memnode.cpp.
+ static bool detect_ptr_independence(Node* p1, AllocateNode* a1,
+ Node* p2, AllocateNode* a2,
+ PhaseTransform* phase);
+ static bool adr_phi_is_loop_invariant(Node* adr_phi, Node* cast);
+
+public:
+ // This one should probably be a phase-specific function:
+ static bool detect_dominating_control(Node* dom, Node* sub);
+
+ // Is this Node a MemNode or some descendent? Default is YES.
+ virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
+
+ virtual const class TypePtr *adr_type() const; // returns bottom_type of address
+
+ // Shared code for Ideal methods:
+ Node *Ideal_common(PhaseGVN *phase, bool can_reshape); // Return -1 for short-circuit NULL.
+
+ // Helper function for adr_type() implementations.
+ static const TypePtr* calculate_adr_type(const Type* t, const TypePtr* cross_check = NULL);
+
+ // Raw access function, to allow copying of adr_type efficiently in
+ // product builds and retain the debug info for debug builds.
+ const TypePtr *raw_adr_type() const {
+#ifdef ASSERT
+ return _adr_type;
+#else
+ return 0;
+#endif
+ }
+
+ // Map a load or store opcode to its corresponding store opcode.
+ // (Return -1 if unknown.)
+ virtual int store_Opcode() const { return -1; }
+
+ // What is the type of the value in memory? (T_VOID mean "unspecified".)
+ virtual BasicType memory_type() const = 0;
+ virtual int memory_size() const { return type2aelembytes[memory_type()]; }
+
+ // Search through memory states which precede this node (load or store).
+ // Look for an exact match for the address, with no intervening
+ // aliased stores.
+ Node* find_previous_store(PhaseTransform* phase);
+
+ // Can this node (load or store) accurately see a stored value in
+ // the given memory state? (The state may or may not be in(Memory).)
+ Node* can_see_stored_value(Node* st, PhaseTransform* phase) const;
+
+#ifndef PRODUCT
+ static void dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st);
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------LoadNode---------------------------------------
+// Load value; requires Memory and Address
+class LoadNode : public MemNode {
+protected:
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+ const Type* const _type; // What kind of value is loaded?
+public:
+
+ LoadNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt )
+ : MemNode(c,mem,adr,at), _type(rt) {
+ init_class_id(Class_Load);
+ }
+
+ // Polymorphic factory method:
+ static LoadNode* make( Compile *C, Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, BasicType bt );
+
+ virtual uint hash() const; // Check the type
+
+ // Handle algebraic identities here. If we have an identity, return the Node
+ // we are equivalent to. We look for Load of a Store.
+ virtual Node *Identity( PhaseTransform *phase );
+
+ // If the load is from Field memory and the pointer is non-null, we can
+ // zero out the control input.
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+ // Compute a new Type for this node. Basically we just do the pre-check,
+ // then call the virtual add() to set the type.
+ virtual const Type *Value( PhaseTransform *phase ) const;
+
+ virtual uint ideal_reg() const;
+ virtual const Type *bottom_type() const;
+ // Following method is copied from TypeNode:
+ void set_type(const Type* t) {
+ assert(t != NULL, "sanity");
+ debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
+ *(const Type**)&_type = t; // cast away const-ness
+ // If this node is in the hash table, make sure it doesn't need a rehash.
+ assert(check_hash == NO_HASH || check_hash == hash(), "type change must preserve hash code");
+ }
+ const Type* type() const { assert(_type != NULL, "sanity"); return _type; };
+
+ // Do not match memory edge
+ virtual uint match_edge(uint idx) const;
+
+ // Map a load opcode to its corresponding store opcode.
+ virtual int store_Opcode() const = 0;
+
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+protected:
+ const Type* load_array_final_field(const TypeKlassPtr *tkls,
+ ciKlass* klass) const;
+};
+
+//------------------------------LoadBNode--------------------------------------
+// Load a byte (8bits signed) from memory
+class LoadBNode : public LoadNode {
+public:
+ LoadBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE )
+ : LoadNode(c,mem,adr,at,ti) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual int store_Opcode() const { return Op_StoreB; }
+ virtual BasicType memory_type() const { return T_BYTE; }
+};
+
+//------------------------------LoadCNode--------------------------------------
+// Load a char (16bits unsigned) from memory
+class LoadCNode : public LoadNode {
+public:
+ LoadCNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR )
+ : LoadNode(c,mem,adr,at,ti) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual int store_Opcode() const { return Op_StoreC; }
+ virtual BasicType memory_type() const { return T_CHAR; }
+};
+
+//------------------------------LoadINode--------------------------------------
+// Load an integer from memory
+class LoadINode : public LoadNode {
+public:
+ LoadINode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT )
+ : LoadNode(c,mem,adr,at,ti) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual int store_Opcode() const { return Op_StoreI; }
+ virtual BasicType memory_type() const { return T_INT; }
+};
+
+//------------------------------LoadRangeNode----------------------------------
+// Load an array length from the array
+class LoadRangeNode : public LoadINode {
+public:
+ LoadRangeNode( Node *c, Node *mem, Node *adr, const TypeInt *ti = TypeInt::POS )
+ : LoadINode(c,mem,adr,TypeAryPtr::RANGE,ti) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+};
+
+//------------------------------LoadLNode--------------------------------------
+// Load a long from memory
+class LoadLNode : public LoadNode {
+ virtual uint hash() const { return LoadNode::hash() + _require_atomic_access; }
+ virtual uint cmp( const Node &n ) const {
+ return _require_atomic_access == ((LoadLNode&)n)._require_atomic_access
+ && LoadNode::cmp(n);
+ }
+ virtual uint size_of() const { return sizeof(*this); }
+ const bool _require_atomic_access; // is piecewise load forbidden?
+
+public:
+ LoadLNode( Node *c, Node *mem, Node *adr, const TypePtr* at,
+ const TypeLong *tl = TypeLong::LONG,
+ bool require_atomic_access = false )
+ : LoadNode(c,mem,adr,at,tl)
+ , _require_atomic_access(require_atomic_access)
+ {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegL; }
+ virtual int store_Opcode() const { return Op_StoreL; }
+ virtual BasicType memory_type() const { return T_LONG; }
+ bool require_atomic_access() { return _require_atomic_access; }
+ static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt);
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const {
+ LoadNode::dump_spec(st);
+ if (_require_atomic_access) st->print(" Atomic!");
+ }
+#endif
+};
+
+//------------------------------LoadL_unalignedNode----------------------------
+// Load a long from unaligned memory
+class LoadL_unalignedNode : public LoadLNode {
+public:
+ LoadL_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at )
+ : LoadLNode(c,mem,adr,at) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------LoadFNode--------------------------------------
+// Load a float (64 bits) from memory
+class LoadFNode : public LoadNode {
+public:
+ LoadFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::FLOAT )
+ : LoadNode(c,mem,adr,at,t) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegF; }
+ virtual int store_Opcode() const { return Op_StoreF; }
+ virtual BasicType memory_type() const { return T_FLOAT; }
+};
+
+//------------------------------LoadDNode--------------------------------------
+// Load a double (64 bits) from memory
+class LoadDNode : public LoadNode {
+public:
+ LoadDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::DOUBLE )
+ : LoadNode(c,mem,adr,at,t) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual int store_Opcode() const { return Op_StoreD; }
+ virtual BasicType memory_type() const { return T_DOUBLE; }
+};
+
+//------------------------------LoadD_unalignedNode----------------------------
+// Load a double from unaligned memory
+class LoadD_unalignedNode : public LoadDNode {
+public:
+ LoadD_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at )
+ : LoadDNode(c,mem,adr,at) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------LoadPNode--------------------------------------
+// Load a pointer from memory (either object or array)
+class LoadPNode : public LoadNode {
+public:
+ LoadPNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t )
+ : LoadNode(c,mem,adr,at,t) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegP; }
+ virtual int store_Opcode() const { return Op_StoreP; }
+ virtual BasicType memory_type() const { return T_ADDRESS; }
+ // depends_only_on_test is almost always true, and needs to be almost always
+ // true to enable key hoisting & commoning optimizations. However, for the
+ // special case of RawPtr loads from TLS top & end, the control edge carries
+ // the dependence preventing hoisting past a Safepoint instead of the memory
+ // edge. (An unfortunate consequence of having Safepoints not set Raw
+ // Memory; itself an unfortunate consequence of having Nodes which produce
+ // results (new raw memory state) inside of loops preventing all manner of
+ // other optimizations). Basically, it's ugly but so is the alternative.
+ // See comment in macro.cpp, around line 125 expand_allocate_common().
+ virtual bool depends_only_on_test() const { return adr_type() != TypeRawPtr::BOTTOM; }
+};
+
+//------------------------------LoadKlassNode----------------------------------
+// Load a Klass from an object
+class LoadKlassNode : public LoadPNode {
+public:
+ LoadKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk = TypeKlassPtr::OBJECT )
+ : LoadPNode(c,mem,adr,at,tk) {}
+ virtual int Opcode() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual bool depends_only_on_test() const { return true; }
+};
+
+//------------------------------LoadSNode--------------------------------------
+// Load a short (16bits signed) from memory
+class LoadSNode : public LoadNode {
+public:
+ LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT )
+ : LoadNode(c,mem,adr,at,ti) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual int store_Opcode() const { return Op_StoreC; }
+ virtual BasicType memory_type() const { return T_SHORT; }
+};
+
+//------------------------------StoreNode--------------------------------------
+// Store value; requires Store, Address and Value
+class StoreNode : public MemNode {
+protected:
+ virtual uint cmp( const Node &n ) const;
+ virtual bool depends_only_on_test() const { return false; }
+
+ Node *Ideal_masked_input (PhaseGVN *phase, uint mask);
+ Node *Ideal_sign_extended_input(PhaseGVN *phase, int num_bits);
+
+public:
+ StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val )
+ : MemNode(c,mem,adr,at,val) {
+ init_class_id(Class_Store);
+ }
+ StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store )
+ : MemNode(c,mem,adr,at,val,oop_store) {
+ init_class_id(Class_Store);
+ }
+
+ // Polymorphic factory method:
+ static StoreNode* make( Compile *C, Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, BasicType bt );
+
+ virtual uint hash() const; // Check the type
+
+ // If the store is to Field memory and the pointer is non-null, we can
+ // zero out the control input.
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+ // Compute a new Type for this node. Basically we just do the pre-check,
+ // then call the virtual add() to set the type.
+ virtual const Type *Value( PhaseTransform *phase ) const;
+
+ // Check for identity function on memory (Load then Store at same address)
+ virtual Node *Identity( PhaseTransform *phase );
+
+ // Do not match memory edge
+ virtual uint match_edge(uint idx) const;
+
+ virtual const Type *bottom_type() const; // returns Type::MEMORY
+
+ // Map a store opcode to its corresponding own opcode, trivially.
+ virtual int store_Opcode() const { return Opcode(); }
+
+ // have all possible loads of the value stored been optimized away?
+ bool value_never_loaded(PhaseTransform *phase) const;
+};
+
+//------------------------------StoreBNode-------------------------------------
+// Store byte to memory
+class StoreBNode : public StoreNode {
+public:
+ StoreBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual BasicType memory_type() const { return T_BYTE; }
+};
+
+//------------------------------StoreCNode-------------------------------------
+// Store char/short to memory
+class StoreCNode : public StoreNode {
+public:
+ StoreCNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual BasicType memory_type() const { return T_CHAR; }
+};
+
+//------------------------------StoreINode-------------------------------------
+// Store int to memory
+class StoreINode : public StoreNode {
+public:
+ StoreINode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual BasicType memory_type() const { return T_INT; }
+};
+
+//------------------------------StoreLNode-------------------------------------
+// Store long to memory
+class StoreLNode : public StoreNode {
+ virtual uint hash() const { return StoreNode::hash() + _require_atomic_access; }
+ virtual uint cmp( const Node &n ) const {
+ return _require_atomic_access == ((StoreLNode&)n)._require_atomic_access
+ && StoreNode::cmp(n);
+ }
+ virtual uint size_of() const { return sizeof(*this); }
+ const bool _require_atomic_access; // is piecewise store forbidden?
+
+public:
+ StoreLNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val,
+ bool require_atomic_access = false )
+ : StoreNode(c,mem,adr,at,val)
+ , _require_atomic_access(require_atomic_access)
+ {}
+ virtual int Opcode() const;
+ virtual BasicType memory_type() const { return T_LONG; }
+ bool require_atomic_access() { return _require_atomic_access; }
+ static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val);
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const {
+ StoreNode::dump_spec(st);
+ if (_require_atomic_access) st->print(" Atomic!");
+ }
+#endif
+};
+
+//------------------------------StoreFNode-------------------------------------
+// Store float to memory
+class StoreFNode : public StoreNode {
+public:
+ StoreFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual BasicType memory_type() const { return T_FLOAT; }
+};
+
+//------------------------------StoreDNode-------------------------------------
+// Store double to memory
+class StoreDNode : public StoreNode {
+public:
+ StoreDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual BasicType memory_type() const { return T_DOUBLE; }
+};
+
+//------------------------------StorePNode-------------------------------------
+// Store pointer to memory
+class StorePNode : public StoreNode {
+public:
+ StorePNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual BasicType memory_type() const { return T_ADDRESS; }
+};
+
+//------------------------------StoreCMNode-----------------------------------
+// Store card-mark byte to memory for CM
+// The last StoreCM before a SafePoint must be preserved and occur after its "oop" store
+// Preceeding equivalent StoreCMs may be eliminated.
+class StoreCMNode : public StoreNode {
+public:
+ StoreCMNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store ) : StoreNode(c,mem,adr,at,val,oop_store) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual BasicType memory_type() const { return T_VOID; } // unspecific
+};
+
+//------------------------------LoadPLockedNode---------------------------------
+// Load-locked a pointer from memory (either object or array).
+// On Sparc & Intel this is implemented as a normal pointer load.
+// On PowerPC and friends it's a real load-locked.
+class LoadPLockedNode : public LoadPNode {
+public:
+ LoadPLockedNode( Node *c, Node *mem, Node *adr )
+ : LoadPNode(c,mem,adr,TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_StorePConditional; }
+ virtual bool depends_only_on_test() const { return true; }
+};
+
+//------------------------------LoadLLockedNode---------------------------------
+// Load-locked a pointer from memory (either object or array).
+// On Sparc & Intel this is implemented as a normal long load.
+class LoadLLockedNode : public LoadLNode {
+public:
+ LoadLLockedNode( Node *c, Node *mem, Node *adr )
+ : LoadLNode(c,mem,adr,TypeRawPtr::BOTTOM, TypeLong::LONG) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_StoreLConditional; }
+};
+
+//------------------------------SCMemProjNode---------------------------------------
+// This class defines a projection of the memory state of a store conditional node.
+// These nodes return a value, but also update memory.
+class SCMemProjNode : public ProjNode {
+public:
+ enum {SCMEMPROJCON = (uint)-2};
+ SCMemProjNode( Node *src) : ProjNode( src, SCMEMPROJCON) { }
+ virtual int Opcode() const;
+ virtual bool is_CFG() const { return false; }
+ virtual const Type *bottom_type() const {return Type::MEMORY;}
+ virtual const TypePtr *adr_type() const { return in(0)->in(MemNode::Memory)->adr_type();}
+ virtual uint ideal_reg() const { return 0;} // memory projections don't have a register
+ virtual const Type *Value( PhaseTransform *phase ) const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const {};
+#endif
+};
+
+//------------------------------LoadStoreNode---------------------------
+class LoadStoreNode : public Node {
+public:
+ enum {
+ ExpectedIn = MemNode::ValueIn+1 // One more input than MemNode
+ };
+ LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex);
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; }
+};
+
+//------------------------------StorePConditionalNode---------------------------
+// Conditionally store pointer to memory, if no change since prior
+// load-locked. Sets flags for success or failure of the store.
+class StorePConditionalNode : public LoadStoreNode {
+public:
+ StorePConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { }
+ virtual int Opcode() const;
+ // Produces flags
+ virtual uint ideal_reg() const { return Op_RegFlags; }
+};
+
+//------------------------------StoreLConditionalNode---------------------------
+// Conditionally store long to memory, if no change since prior
+// load-locked. Sets flags for success or failure of the store.
+class StoreLConditionalNode : public LoadStoreNode {
+public:
+ StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { }
+ virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndSwapLNode---------------------------
+class CompareAndSwapLNode : public LoadStoreNode {
+public:
+ CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+ virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndSwapINode---------------------------
+class CompareAndSwapINode : public LoadStoreNode {
+public:
+ CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+ virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndSwapPNode---------------------------
+class CompareAndSwapPNode : public LoadStoreNode {
+public:
+ CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+ virtual int Opcode() const;
+};
+
+//------------------------------ClearArray-------------------------------------
+class ClearArrayNode: public Node {
+public:
+ ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base ) : Node(ctrl,arymem,word_cnt,base) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::MEMORY; }
+ // ClearArray modifies array elements, and so affects only the
+ // array memory addressed by the bottom_type of its base address.
+ virtual const class TypePtr *adr_type() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint match_edge(uint idx) const;
+
+ // Clear the given area of an object or array.
+ // The start offset must always be aligned mod BytesPerInt.
+ // The end offset must always be aligned mod BytesPerLong.
+ // Return the new memory.
+ static Node* clear_memory(Node* control, Node* mem, Node* dest,
+ intptr_t start_offset,
+ intptr_t end_offset,
+ PhaseGVN* phase);
+ static Node* clear_memory(Node* control, Node* mem, Node* dest,
+ intptr_t start_offset,
+ Node* end_offset,
+ PhaseGVN* phase);
+ static Node* clear_memory(Node* control, Node* mem, Node* dest,
+ Node* start_offset,
+ Node* end_offset,
+ PhaseGVN* phase);
+};
+
+//------------------------------StrComp-------------------------------------
+class StrCompNode: public Node {
+public:
+ StrCompNode(Node *control,
+ Node* char_array_mem,
+ Node* value_mem,
+ Node* count_mem,
+ Node* offset_mem,
+ Node* s1, Node* s2): Node(control,
+ char_array_mem,
+ value_mem,
+ count_mem,
+ offset_mem,
+ s1, s2) {};
+ virtual int Opcode() const;
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const Type* bottom_type() const { return TypeInt::INT; }
+ // a StrCompNode (conservatively) aliases with everything:
+ virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
+ virtual uint match_edge(uint idx) const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------MemBar-----------------------------------------
+// There are different flavors of Memory Barriers to match the Java Memory
+// Model. Monitor-enter and volatile-load act as Aquires: no following ref
+// can be moved to before them. We insert a MemBar-Acquire after a FastLock or
+// volatile-load. Monitor-exit and volatile-store act as Release: no
+// preceeding ref can be moved to after them. We insert a MemBar-Release
+// before a FastUnlock or volatile-store. All volatiles need to be
+// serialized, so we follow all volatile-stores with a MemBar-Volatile to
+// seperate it from any following volatile-load.
+class MemBarNode: public MultiNode {
+ virtual uint hash() const ; // { return NO_HASH; }
+ virtual uint cmp( const Node &n ) const ; // Always fail, except on self
+
+ virtual uint size_of() const { return sizeof(*this); }
+ // Memory type this node is serializing. Usually either rawptr or bottom.
+ const TypePtr* _adr_type;
+
+public:
+ enum {
+ Precedent = TypeFunc::Parms // optional edge to force precedence
+ };
+ MemBarNode(Compile* C, int alias_idx, Node* precedent);
+ virtual int Opcode() const = 0;
+ virtual const class TypePtr *adr_type() const { return _adr_type; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint match_edge(uint idx) const { return 0; }
+ virtual const Type *bottom_type() const { return TypeTuple::MEMBAR; }
+ virtual Node *match( const ProjNode *proj, const Matcher *m );
+ // Factory method. Builds a wide or narrow membar.
+ // Optional 'precedent' becomes an extra edge if not null.
+ static MemBarNode* make(Compile* C, int opcode,
+ int alias_idx = Compile::AliasIdxBot,
+ Node* precedent = NULL);
+};
+
+// "Acquire" - no following ref can move before (but earlier refs can
+// follow, like an early Load stalled in cache). Requires multi-cpu
+// visibility. Inserted after a volatile load or FastLock.
+class MemBarAcquireNode: public MemBarNode {
+public:
+ MemBarAcquireNode(Compile* C, int alias_idx, Node* precedent)
+ : MemBarNode(C, alias_idx, precedent) {}
+ virtual int Opcode() const;
+};
+
+// "Release" - no earlier ref can move after (but later refs can move
+// up, like a speculative pipelined cache-hitting Load). Requires
+// multi-cpu visibility. Inserted before a volatile store or FastUnLock.
+class MemBarReleaseNode: public MemBarNode {
+public:
+ MemBarReleaseNode(Compile* C, int alias_idx, Node* precedent)
+ : MemBarNode(C, alias_idx, precedent) {}
+ virtual int Opcode() const;
+};
+
+// Ordering between a volatile store and a following volatile load.
+// Requires multi-CPU visibility?
+class MemBarVolatileNode: public MemBarNode {
+public:
+ MemBarVolatileNode(Compile* C, int alias_idx, Node* precedent)
+ : MemBarNode(C, alias_idx, precedent) {}
+ virtual int Opcode() const;
+};
+
+// Ordering within the same CPU. Used to order unsafe memory references
+// inside the compiler when we lack alias info. Not needed "outside" the
+// compiler because the CPU does all the ordering for us.
+class MemBarCPUOrderNode: public MemBarNode {
+public:
+ MemBarCPUOrderNode(Compile* C, int alias_idx, Node* precedent)
+ : MemBarNode(C, alias_idx, precedent) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return 0; } // not matched in the AD file
+};
+
+// Isolation of object setup after an AllocateNode and before next safepoint.
+// (See comment in memnode.cpp near InitializeNode::InitializeNode for semantics.)
+class InitializeNode: public MemBarNode {
+ friend class AllocateNode;
+
+ bool _is_complete;
+
+public:
+ enum {
+ Control = TypeFunc::Control,
+ Memory = TypeFunc::Memory, // MergeMem for states affected by this op
+ RawAddress = TypeFunc::Parms+0, // the newly-allocated raw address
+ RawStores = TypeFunc::Parms+1 // zero or more stores (or TOP)
+ };
+
+ InitializeNode(Compile* C, int adr_type, Node* rawoop);
+ virtual int Opcode() const;
+ virtual uint size_of() const { return sizeof(*this); }
+ virtual uint ideal_reg() const { return 0; } // not matched in the AD file
+ virtual const RegMask &in_RegMask(uint) const; // mask for RawAddress
+
+ // Manage incoming memory edges via a MergeMem on in(Memory):
+ Node* memory(uint alias_idx);
+
+ // The raw memory edge coming directly from the Allocation.
+ // The contents of this memory are *always* all-zero-bits.
+ Node* zero_memory() { return memory(Compile::AliasIdxRaw); }
+
+ // Return the corresponding allocation for this initialization (or null if none).
+ // (Note: Both InitializeNode::allocation and AllocateNode::initialization
+ // are defined in graphKit.cpp, which sets up the bidirectional relation.)
+ AllocateNode* allocation();
+
+ // Anything other than zeroing in this init?
+ bool is_non_zero();
+
+ // An InitializeNode must completed before macro expansion is done.
+ // Completion requires that the AllocateNode must be followed by
+ // initialization of the new memory to zero, then to any initializers.
+ bool is_complete() { return _is_complete; }
+
+ // Mark complete. (Must not yet be complete.)
+ void set_complete(PhaseGVN* phase);
+
+#ifdef ASSERT
+ // ensure all non-degenerate stores are ordered and non-overlapping
+ bool stores_are_sane(PhaseTransform* phase);
+#endif //ASSERT
+
+ // See if this store can be captured; return offset where it initializes.
+ // Return 0 if the store cannot be moved (any sort of problem).
+ intptr_t can_capture_store(StoreNode* st, PhaseTransform* phase);
+
+ // Capture another store; reformat it to write my internal raw memory.
+ // Return the captured copy, else NULL if there is some sort of problem.
+ Node* capture_store(StoreNode* st, intptr_t start, PhaseTransform* phase);
+
+ // Find captured store which corresponds to the range [start..start+size).
+ // Return my own memory projection (meaning the initial zero bits)
+ // if there is no such store. Return NULL if there is a problem.
+ Node* find_captured_store(intptr_t start, int size_in_bytes, PhaseTransform* phase);
+
+ // Called when the associated AllocateNode is expanded into CFG.
+ Node* complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
+ intptr_t header_size, Node* size_in_bytes,
+ PhaseGVN* phase);
+
+ private:
+ void remove_extra_zeroes();
+
+ // Find out where a captured store should be placed (or already is placed).
+ int captured_store_insertion_point(intptr_t start, int size_in_bytes,
+ PhaseTransform* phase);
+
+ static intptr_t get_store_offset(Node* st, PhaseTransform* phase);
+
+ Node* make_raw_address(intptr_t offset, PhaseTransform* phase);
+
+ bool detect_init_independence(Node* n, bool st_is_pinned, int& count);
+
+ void coalesce_subword_stores(intptr_t header_size, Node* size_in_bytes,
+ PhaseGVN* phase);
+
+ intptr_t find_next_fullword_store(uint i, PhaseGVN* phase);
+};
+
+//------------------------------MergeMem---------------------------------------
+// (See comment in memnode.cpp near MergeMemNode::MergeMemNode for semantics.)
+class MergeMemNode: public Node {
+ virtual uint hash() const ; // { return NO_HASH; }
+ virtual uint cmp( const Node &n ) const ; // Always fail, except on self
+ friend class MergeMemStream;
+ MergeMemNode(Node* def); // clients use MergeMemNode::make
+
+public:
+ // If the input is a whole memory state, clone it with all its slices intact.
+ // Otherwise, make a new memory state with just that base memory input.
+ // In either case, the result is a newly created MergeMem.
+ static MergeMemNode* make(Compile* C, Node* base_memory);
+
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ virtual uint match_edge(uint idx) const { return 0; }
+ virtual const RegMask &out_RegMask() const;
+ virtual const Type *bottom_type() const { return Type::MEMORY; }
+ virtual const TypePtr *adr_type() const { return TypePtr::BOTTOM; }
+ // sparse accessors
+ // Fetch the previously stored "set_memory_at", or else the base memory.
+ // (Caller should clone it if it is a phi-nest.)
+ Node* memory_at(uint alias_idx) const;
+ // set the memory, regardless of its previous value
+ void set_memory_at(uint alias_idx, Node* n);
+ // the "base" is the memory that provides the non-finite support
+ Node* base_memory() const { return in(Compile::AliasIdxBot); }
+ // warning: setting the base can implicitly set any of the other slices too
+ void set_base_memory(Node* def);
+ // sentinel value which denotes a copy of the base memory:
+ Node* empty_memory() const { return in(Compile::AliasIdxTop); }
+ static Node* make_empty_memory(); // where the sentinel comes from
+ bool is_empty_memory(Node* n) const { assert((n == empty_memory()) == n->is_top(), "sanity"); return n->is_top(); }
+ // hook for the iterator, to perform any necessary setup
+ void iteration_setup(const MergeMemNode* other = NULL);
+ // push sentinels until I am at least as long as the other (semantic no-op)
+ void grow_to_match(const MergeMemNode* other);
+ bool verify_sparse() const PRODUCT_RETURN0;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+class MergeMemStream : public StackObj {
+ private:
+ MergeMemNode* _mm;
+ const MergeMemNode* _mm2; // optional second guy, contributes non-empty iterations
+ Node* _mm_base; // loop-invariant base memory of _mm
+ int _idx;
+ int _cnt;
+ Node* _mem;
+ Node* _mem2;
+ int _cnt2;
+
+ void init(MergeMemNode* mm, const MergeMemNode* mm2 = NULL) {
+ // subsume_node will break sparseness at times, whenever a memory slice
+ // folds down to a copy of the base ("fat") memory. In such a case,
+ // the raw edge will update to base, although it should be top.
+ // This iterator will recognize either top or base_memory as an
+ // "empty" slice. See is_empty, is_empty2, and next below.
+ //
+ // The sparseness property is repaired in MergeMemNode::Ideal.
+ // As long as access to a MergeMem goes through this iterator
+ // or the memory_at accessor, flaws in the sparseness will
+ // never be observed.
+ //
+ // Also, iteration_setup repairs sparseness.
+ assert(mm->verify_sparse(), "please, no dups of base");
+ assert(mm2==NULL || mm2->verify_sparse(), "please, no dups of base");
+
+ _mm = mm;
+ _mm_base = mm->base_memory();
+ _mm2 = mm2;
+ _cnt = mm->req();
+ _idx = Compile::AliasIdxBot-1; // start at the base memory
+ _mem = NULL;
+ _mem2 = NULL;
+ }
+
+#ifdef ASSERT
+ Node* check_memory() const {
+ if (at_base_memory())
+ return _mm->base_memory();
+ else if ((uint)_idx < _mm->req() && !_mm->in(_idx)->is_top())
+ return _mm->memory_at(_idx);
+ else
+ return _mm_base;
+ }
+ Node* check_memory2() const {
+ return at_base_memory()? _mm2->base_memory(): _mm2->memory_at(_idx);
+ }
+#endif
+
+ static bool match_memory(Node* mem, const MergeMemNode* mm, int idx) PRODUCT_RETURN0;
+ void assert_synch() const {
+ assert(!_mem || _idx >= _cnt || match_memory(_mem, _mm, _idx),
+ "no side-effects except through the stream");
+ }
+
+ public:
+
+ // expected usages:
+ // for (MergeMemStream mms(mem->is_MergeMem()); next_non_empty(); ) { ... }
+ // for (MergeMemStream mms(mem1, mem2); next_non_empty2(); ) { ... }
+
+ // iterate over one merge
+ MergeMemStream(MergeMemNode* mm) {
+ mm->iteration_setup();
+ init(mm);
+ debug_only(_cnt2 = 999);
+ }
+ // iterate in parallel over two merges
+ // only iterates through non-empty elements of mm2
+ MergeMemStream(MergeMemNode* mm, const MergeMemNode* mm2) {
+ assert(mm2, "second argument must be a MergeMem also");
+ ((MergeMemNode*)mm2)->iteration_setup(); // update hidden state
+ mm->iteration_setup(mm2);
+ init(mm, mm2);
+ _cnt2 = mm2->req();
+ }
+#ifdef ASSERT
+ ~MergeMemStream() {
+ assert_synch();
+ }
+#endif
+
+ MergeMemNode* all_memory() const {
+ return _mm;
+ }
+ Node* base_memory() const {
+ assert(_mm_base == _mm->base_memory(), "no update to base memory, please");
+ return _mm_base;
+ }
+ const MergeMemNode* all_memory2() const {
+ assert(_mm2 != NULL, "");
+ return _mm2;
+ }
+ bool at_base_memory() const {
+ return _idx == Compile::AliasIdxBot;
+ }
+ int alias_idx() const {
+ assert(_mem, "must call next 1st");
+ return _idx;
+ }
+
+ const TypePtr* adr_type() const {
+ return Compile::current()->get_adr_type(alias_idx());
+ }
+
+ const TypePtr* adr_type(Compile* C) const {
+ return C->get_adr_type(alias_idx());
+ }
+ bool is_empty() const {
+ assert(_mem, "must call next 1st");
+ assert(_mem->is_top() == (_mem==_mm->empty_memory()), "correct sentinel");
+ return _mem->is_top();
+ }
+ bool is_empty2() const {
+ assert(_mem2, "must call next 1st");
+ assert(_mem2->is_top() == (_mem2==_mm2->empty_memory()), "correct sentinel");
+ return _mem2->is_top();
+ }
+ Node* memory() const {
+ assert(!is_empty(), "must not be empty");
+ assert_synch();
+ return _mem;
+ }
+ // get the current memory, regardless of empty or non-empty status
+ Node* force_memory() const {
+ assert(!is_empty() || !at_base_memory(), "");
+ // Use _mm_base to defend against updates to _mem->base_memory().
+ Node *mem = _mem->is_top() ? _mm_base : _mem;
+ assert(mem == check_memory(), "");
+ return mem;
+ }
+ Node* memory2() const {
+ assert(_mem2 == check_memory2(), "");
+ return _mem2;
+ }
+ void set_memory(Node* mem) {
+ if (at_base_memory()) {
+ // Note that this does not change the invariant _mm_base.
+ _mm->set_base_memory(mem);
+ } else {
+ _mm->set_memory_at(_idx, mem);
+ }
+ _mem = mem;
+ assert_synch();
+ }
+
+ // Recover from a side effect to the MergeMemNode.
+ void set_memory() {
+ _mem = _mm->in(_idx);
+ }
+
+ bool next() { return next(false); }
+ bool next2() { return next(true); }
+
+ bool next_non_empty() { return next_non_empty(false); }
+ bool next_non_empty2() { return next_non_empty(true); }
+ // next_non_empty2 can yield states where is_empty() is true
+
+ private:
+ // find the next item, which might be empty
+ bool next(bool have_mm2) {
+ assert((_mm2 != NULL) == have_mm2, "use other next");
+ assert_synch();
+ if (++_idx < _cnt) {
+ // Note: This iterator allows _mm to be non-sparse.
+ // It behaves the same whether _mem is top or base_memory.
+ _mem = _mm->in(_idx);
+ if (have_mm2)
+ _mem2 = _mm2->in((_idx < _cnt2) ? _idx : Compile::AliasIdxTop);
+ return true;
+ }
+ return false;
+ }
+
+ // find the next non-empty item
+ bool next_non_empty(bool have_mm2) {
+ while (next(have_mm2)) {
+ if (!is_empty()) {
+ // make sure _mem2 is filled in sensibly
+ if (have_mm2 && _mem2->is_top()) _mem2 = _mm2->base_memory();
+ return true;
+ } else if (have_mm2 && !is_empty2()) {
+ return true; // is_empty() == true
+ }
+ }
+ return false;
+ }
+};
+
+//------------------------------Prefetch---------------------------------------
+
+// Non-faulting prefetch load. Prefetch for many reads.
+class PrefetchReadNode : public Node {
+public:
+ PrefetchReadNode(Node *abio, Node *adr) : Node(0,abio,adr) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ virtual uint match_edge(uint idx) const { return idx==2; }
+ virtual const Type *bottom_type() const { return Type::ABIO; }
+};
+
+// Non-faulting prefetch load. Prefetch for many reads & many writes.
+class PrefetchWriteNode : public Node {
+public:
+ PrefetchWriteNode(Node *abio, Node *adr) : Node(0,abio,adr) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ virtual uint match_edge(uint idx) const { return idx==2; }
+ virtual const Type *bottom_type() const { return Type::ABIO; }
+};
diff --git a/src/share/vm/opto/mulnode.cpp b/src/share/vm/opto/mulnode.cpp
new file mode 100644
index 000000000..146c432fe
--- /dev/null
+++ b/src/share/vm/opto/mulnode.cpp
@@ -0,0 +1,1310 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+#include "incls/_precompiled.incl"
+#include "incls/_mulnode.cpp.incl"
+
+
+//=============================================================================
+//------------------------------hash-------------------------------------------
+// Hash function over MulNodes. Needs to be commutative; i.e., I swap
+// (commute) inputs to MulNodes willy-nilly so the hash function must return
+// the same value in the presence of edge swapping.
+uint MulNode::hash() const {
+ return (uintptr_t)in(1) + (uintptr_t)in(2) + Opcode();
+}
+
+//------------------------------Identity---------------------------------------
+// Multiplying a one preserves the other argument
+Node *MulNode::Identity( PhaseTransform *phase ) {
+ register const Type *one = mul_id(); // The multiplicative identity
+ if( phase->type( in(1) )->higher_equal( one ) ) return in(2);
+ if( phase->type( in(2) )->higher_equal( one ) ) return in(1);
+
+ return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// We also canonicalize the Node, moving constants to the right input,
+// and flatten expressions (so that 1+x+2 becomes x+3).
+Node *MulNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ Node *progress = NULL; // Progress flag
+ // We are OK if right is a constant, or right is a load and
+ // left is a non-constant.
+ if( !(t2->singleton() ||
+ (in(2)->is_Load() && !(t1->singleton() || in(1)->is_Load())) ) ) {
+ if( t1->singleton() || // Left input is a constant?
+ // Otherwise, sort inputs (commutativity) to help value numbering.
+ (in(1)->_idx > in(2)->_idx) ) {
+ swap_edges(1, 2);
+ const Type *t = t1;
+ t1 = t2;
+ t2 = t;
+ progress = this; // Made progress
+ }
+ }
+
+ // If the right input is a constant, and the left input is a product of a
+ // constant, flatten the expression tree.
+ uint op = Opcode();
+ if( t2->singleton() && // Right input is a constant?
+ op != Op_MulF && // Float & double cannot reassociate
+ op != Op_MulD ) {
+ if( t2 == Type::TOP ) return NULL;
+ Node *mul1 = in(1);
+#ifdef ASSERT
+ // Check for dead loop
+ int op1 = mul1->Opcode();
+ if( phase->eqv( mul1, this ) || phase->eqv( in(2), this ) ||
+ ( op1 == mul_opcode() || op1 == add_opcode() ) &&
+ ( phase->eqv( mul1->in(1), this ) || phase->eqv( mul1->in(2), this ) ||
+ phase->eqv( mul1->in(1), mul1 ) || phase->eqv( mul1->in(2), mul1 ) ) )
+ assert(false, "dead loop in MulNode::Ideal");
+#endif
+
+ if( mul1->Opcode() == mul_opcode() ) { // Left input is a multiply?
+ // Mul of a constant?
+ const Type *t12 = phase->type( mul1->in(2) );
+ if( t12->singleton() && t12 != Type::TOP) { // Left input is an add of a constant?
+ // Compute new constant; check for overflow
+ const Type *tcon01 = mul1->as_Mul()->mul_ring(t2,t12);
+ if( tcon01->singleton() ) {
+ // The Mul of the flattened expression
+ set_req(1, mul1->in(1));
+ set_req(2, phase->makecon( tcon01 ));
+ t2 = tcon01;
+ progress = this; // Made progress
+ }
+ }
+ }
+ // If the right input is a constant, and the left input is an add of a
+ // constant, flatten the tree: (X+con1)*con0 ==> X*con0 + con1*con0
+ const Node *add1 = in(1);
+ if( add1->Opcode() == add_opcode() ) { // Left input is an add?
+ // Add of a constant?
+ const Type *t12 = phase->type( add1->in(2) );
+ if( t12->singleton() && t12 != Type::TOP ) { // Left input is an add of a constant?
+ assert( add1->in(1) != add1, "dead loop in MulNode::Ideal" );
+ // Compute new constant; check for overflow
+ const Type *tcon01 = mul_ring(t2,t12);
+ if( tcon01->singleton() ) {
+
+ // Convert (X+con1)*con0 into X*con0
+ Node *mul = clone(); // mul = ()*con0
+ mul->set_req(1,add1->in(1)); // mul = X*con0
+ mul = phase->transform(mul);
+
+ Node *add2 = add1->clone();
+ add2->set_req(1, mul); // X*con0 + con0*con1
+ add2->set_req(2, phase->makecon(tcon01) );
+ progress = add2;
+ }
+ }
+ } // End of is left input an add
+ } // End of is right input a Mul
+
+ return progress;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *MulNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ // Either input is TOP ==> the result is TOP
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Either input is ZERO ==> the result is ZERO.
+ // Not valid for floats or doubles since +0.0 * -0.0 --> +0.0
+ int op = Opcode();
+ if( op == Op_MulI || op == Op_AndI || op == Op_MulL || op == Op_AndL ) {
+ const Type *zero = add_id(); // The multiplicative zero
+ if( t1->higher_equal( zero ) ) return zero;
+ if( t2->higher_equal( zero ) ) return zero;
+ }
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
+ return bottom_type();
+
+ return mul_ring(t1,t2); // Local flavor of type multiplication
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Check for power-of-2 multiply, then try the regular MulNode::Ideal
+Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Swap constant to right
+ jint con;
+ if ((con = in(1)->find_int_con(0)) != 0) {
+ swap_edges(1, 2);
+ // Finish rest of method to use info in 'con'
+ } else if ((con = in(2)->find_int_con(0)) == 0) {
+ return MulNode::Ideal(phase, can_reshape);
+ }
+
+ // Now we have a constant Node on the right and the constant in con
+ if( con == 0 ) return NULL; // By zero is handled by Value call
+ if( con == 1 ) return NULL; // By one is handled by Identity call
+
+ // Check for negative constant; if so negate the final result
+ bool sign_flip = false;
+ if( con < 0 ) {
+ con = -con;
+ sign_flip = true;
+ }
+
+ // Get low bit; check for being the only bit
+ Node *res = NULL;
+ jint bit1 = con & -con; // Extract low bit
+ if( bit1 == con ) { // Found a power of 2?
+ res = new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(bit1)) );
+ } else {
+
+ // Check for constant with 2 bits set
+ jint bit2 = con-bit1;
+ bit2 = bit2 & -bit2; // Extract 2nd bit
+ if( bit2 + bit1 == con ) { // Found all bits in con?
+ Node *n1 = phase->transform( new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(bit1)) ) );
+ Node *n2 = phase->transform( new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(bit2)) ) );
+ res = new (phase->C, 3) AddINode( n2, n1 );
+
+ } else if (is_power_of_2(con+1)) {
+ // Sleezy: power-of-2 -1. Next time be generic.
+ jint temp = (jint) (con + 1);
+ Node *n1 = phase->transform( new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(temp)) ) );
+ res = new (phase->C, 3) SubINode( n1, in(1) );
+ } else {
+ return MulNode::Ideal(phase, can_reshape);
+ }
+ }
+
+ if( sign_flip ) { // Need to negate result?
+ res = phase->transform(res);// Transform, before making the zero con
+ res = new (phase->C, 3) SubINode(phase->intcon(0),res);
+ }
+
+ return res; // Return final result
+}
+
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two integer ranges into this node.
+const Type *MulINode::mul_ring(const Type *t0, const Type *t1) const {
+ const TypeInt *r0 = t0->is_int(); // Handy access
+ const TypeInt *r1 = t1->is_int();
+
+ // Fetch endpoints of all ranges
+ int32 lo0 = r0->_lo;
+ double a = (double)lo0;
+ int32 hi0 = r0->_hi;
+ double b = (double)hi0;
+ int32 lo1 = r1->_lo;
+ double c = (double)lo1;
+ int32 hi1 = r1->_hi;
+ double d = (double)hi1;
+
+ // Compute all endpoints & check for overflow
+ int32 A = lo0*lo1;
+ if( (double)A != a*c ) return TypeInt::INT; // Overflow?
+ int32 B = lo0*hi1;
+ if( (double)B != a*d ) return TypeInt::INT; // Overflow?
+ int32 C = hi0*lo1;
+ if( (double)C != b*c ) return TypeInt::INT; // Overflow?
+ int32 D = hi0*hi1;
+ if( (double)D != b*d ) return TypeInt::INT; // Overflow?
+
+ if( A < B ) { lo0 = A; hi0 = B; } // Sort range endpoints
+ else { lo0 = B; hi0 = A; }
+ if( C < D ) {
+ if( C < lo0 ) lo0 = C;
+ if( D > hi0 ) hi0 = D;
+ } else {
+ if( D < lo0 ) lo0 = D;
+ if( C > hi0 ) hi0 = C;
+ }
+ return TypeInt::make(lo0, hi0, MAX2(r0->_widen,r1->_widen));
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Check for power-of-2 multiply, then try the regular MulNode::Ideal
+Node *MulLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Swap constant to right
+ jlong con;
+ if ((con = in(1)->find_long_con(0)) != 0) {
+ swap_edges(1, 2);
+ // Finish rest of method to use info in 'con'
+ } else if ((con = in(2)->find_long_con(0)) == 0) {
+ return MulNode::Ideal(phase, can_reshape);
+ }
+
+ // Now we have a constant Node on the right and the constant in con
+ if( con == CONST64(0) ) return NULL; // By zero is handled by Value call
+ if( con == CONST64(1) ) return NULL; // By one is handled by Identity call
+
+ // Check for negative constant; if so negate the final result
+ bool sign_flip = false;
+ if( con < 0 ) {
+ con = -con;
+ sign_flip = true;
+ }
+
+ // Get low bit; check for being the only bit
+ Node *res = NULL;
+ jlong bit1 = con & -con; // Extract low bit
+ if( bit1 == con ) { // Found a power of 2?
+ res = new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(bit1)) );
+ } else {
+
+ // Check for constant with 2 bits set
+ jlong bit2 = con-bit1;
+ bit2 = bit2 & -bit2; // Extract 2nd bit
+ if( bit2 + bit1 == con ) { // Found all bits in con?
+ Node *n1 = phase->transform( new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(bit1)) ) );
+ Node *n2 = phase->transform( new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(bit2)) ) );
+ res = new (phase->C, 3) AddLNode( n2, n1 );
+
+ } else if (is_power_of_2_long(con+1)) {
+ // Sleezy: power-of-2 -1. Next time be generic.
+ jlong temp = (jlong) (con + 1);
+ Node *n1 = phase->transform( new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(temp)) ) );
+ res = new (phase->C, 3) SubLNode( n1, in(1) );
+ } else {
+ return MulNode::Ideal(phase, can_reshape);
+ }
+ }
+
+ if( sign_flip ) { // Need to negate result?
+ res = phase->transform(res);// Transform, before making the zero con
+ res = new (phase->C, 3) SubLNode(phase->longcon(0),res);
+ }
+
+ return res; // Return final result
+}
+
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two integer ranges into this node.
+const Type *MulLNode::mul_ring(const Type *t0, const Type *t1) const {
+ const TypeLong *r0 = t0->is_long(); // Handy access
+ const TypeLong *r1 = t1->is_long();
+
+ // Fetch endpoints of all ranges
+ jlong lo0 = r0->_lo;
+ double a = (double)lo0;
+ jlong hi0 = r0->_hi;
+ double b = (double)hi0;
+ jlong lo1 = r1->_lo;
+ double c = (double)lo1;
+ jlong hi1 = r1->_hi;
+ double d = (double)hi1;
+
+ // Compute all endpoints & check for overflow
+ jlong A = lo0*lo1;
+ if( (double)A != a*c ) return TypeLong::LONG; // Overflow?
+ jlong B = lo0*hi1;
+ if( (double)B != a*d ) return TypeLong::LONG; // Overflow?
+ jlong C = hi0*lo1;
+ if( (double)C != b*c ) return TypeLong::LONG; // Overflow?
+ jlong D = hi0*hi1;
+ if( (double)D != b*d ) return TypeLong::LONG; // Overflow?
+
+ if( A < B ) { lo0 = A; hi0 = B; } // Sort range endpoints
+ else { lo0 = B; hi0 = A; }
+ if( C < D ) {
+ if( C < lo0 ) lo0 = C;
+ if( D > hi0 ) hi0 = D;
+ } else {
+ if( D < lo0 ) lo0 = D;
+ if( C > hi0 ) hi0 = C;
+ }
+ return TypeLong::make(lo0, hi0, MAX2(r0->_widen,r1->_widen));
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two double ranges into this node.
+const Type *MulFNode::mul_ring(const Type *t0, const Type *t1) const {
+ if( t0 == Type::FLOAT || t1 == Type::FLOAT ) return Type::FLOAT;
+ return TypeF::make( t0->getf() * t1->getf() );
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two double ranges into this node.
+const Type *MulDNode::mul_ring(const Type *t0, const Type *t1) const {
+ if( t0 == Type::DOUBLE || t1 == Type::DOUBLE ) return Type::DOUBLE;
+ // We must be adding 2 double constants.
+ return TypeD::make( t0->getd() * t1->getd() );
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Supplied function returns the product of the inputs IN THE CURRENT RING.
+// For the logical operations the ring's MUL is really a logical AND function.
+// This also type-checks the inputs for sanity. Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AndINode::mul_ring( const Type *t0, const Type *t1 ) const {
+ const TypeInt *r0 = t0->is_int(); // Handy access
+ const TypeInt *r1 = t1->is_int();
+ int widen = MAX2(r0->_widen,r1->_widen);
+
+ // If either input is a constant, might be able to trim cases
+ if( !r0->is_con() && !r1->is_con() )
+ return TypeInt::INT; // No constants to be had
+
+ // Both constants? Return bits
+ if( r0->is_con() && r1->is_con() )
+ return TypeInt::make( r0->get_con() & r1->get_con() );
+
+ if( r0->is_con() && r0->get_con() > 0 )
+ return TypeInt::make(0, r0->get_con(), widen);
+
+ if( r1->is_con() && r1->get_con() > 0 )
+ return TypeInt::make(0, r1->get_con(), widen);
+
+ if( r0 == TypeInt::BOOL || r1 == TypeInt::BOOL ) {
+ return TypeInt::BOOL;
+ }
+
+ return TypeInt::INT; // No constants to be had
+}
+
+//------------------------------Identity---------------------------------------
+// Masking off the high bits of an unsigned load is not required
+Node *AndINode::Identity( PhaseTransform *phase ) {
+
+ // x & x => x
+ if (phase->eqv(in(1), in(2))) return in(1);
+
+ Node *load = in(1);
+ const TypeInt *t2 = phase->type( in(2) )->isa_int();
+ if( t2 && t2->is_con() ) {
+ int con = t2->get_con();
+ // Masking off high bits which are always zero is useless.
+ const TypeInt* t1 = phase->type( in(1) )->isa_int();
+ if (t1 != NULL && t1->_lo >= 0) {
+ jint t1_support = ((jint)1 << (1 + log2_intptr(t1->_hi))) - 1;
+ if ((t1_support & con) == t1_support)
+ return load;
+ }
+ uint lop = load->Opcode();
+ if( lop == Op_LoadC &&
+ con == 0x0000FFFF ) // Already zero-extended
+ return load;
+ // Masking off the high bits of a unsigned-shift-right is not
+ // needed either.
+ if( lop == Op_URShiftI ) {
+ const TypeInt *t12 = phase->type( load->in(2) )->isa_int();
+ if( t12 && t12->is_con() ) {
+ int shift_con = t12->get_con();
+ int mask = max_juint >> shift_con;
+ if( (mask&con) == mask ) // If AND is useless, skip it
+ return load;
+ }
+ }
+ }
+ return MulNode::Identity(phase);
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Special case constant AND mask
+ const TypeInt *t2 = phase->type( in(2) )->isa_int();
+ if( !t2 || !t2->is_con() ) return MulNode::Ideal(phase, can_reshape);
+ const int mask = t2->get_con();
+ Node *load = in(1);
+ uint lop = load->Opcode();
+
+ // Masking bits off of a Character? Hi bits are already zero.
+ if( lop == Op_LoadC &&
+ (mask & 0xFFFF0000) ) // Can we make a smaller mask?
+ return new (phase->C, 3) AndINode(load,phase->intcon(mask&0xFFFF));
+
+ // Masking bits off of a Short? Loading a Character does some masking
+ if( lop == Op_LoadS &&
+ (mask & 0xFFFF0000) == 0 ) {
+ Node *ldc = new (phase->C, 3) LoadCNode(load->in(MemNode::Control),
+ load->in(MemNode::Memory),
+ load->in(MemNode::Address),
+ load->adr_type());
+ ldc = phase->transform(ldc);
+ return new (phase->C, 3) AndINode(ldc,phase->intcon(mask&0xFFFF));
+ }
+
+ // Masking sign bits off of a Byte? Let the matcher use an unsigned load
+ if( lop == Op_LoadB &&
+ (!in(0) && load->in(0)) &&
+ (mask == 0x000000FF) ) {
+ // Associate this node with the LoadB, so the matcher can see them together.
+ // If we don't do this, it is common for the LoadB to have one control
+ // edge, and the store or call containing this AndI to have a different
+ // control edge. This will cause Label_Root to group the AndI with
+ // the encoding store or call, so the matcher has no chance to match
+ // this AndI together with the LoadB. Setting the control edge here
+ // prevents Label_Root from grouping the AndI with the store or call,
+ // if it has a control edge that is inconsistent with the LoadB.
+ set_req(0, load->in(0));
+ return this;
+ }
+
+ // Masking off sign bits? Dont make them!
+ if( lop == Op_RShiftI ) {
+ const TypeInt *t12 = phase->type(load->in(2))->isa_int();
+ if( t12 && t12->is_con() ) { // Shift is by a constant
+ int shift = t12->get_con();
+ shift &= BitsPerJavaInteger-1; // semantics of Java shifts
+ const int sign_bits_mask = ~right_n_bits(BitsPerJavaInteger - shift);
+ // If the AND'ing of the 2 masks has no bits, then only original shifted
+ // bits survive. NO sign-extension bits survive the maskings.
+ if( (sign_bits_mask & mask) == 0 ) {
+ // Use zero-fill shift instead
+ Node *zshift = phase->transform(new (phase->C, 3) URShiftINode(load->in(1),load->in(2)));
+ return new (phase->C, 3) AndINode( zshift, in(2) );
+ }
+ }
+ }
+
+ // Check for 'negate/and-1', a pattern emitted when someone asks for
+ // 'mod 2'. Negate leaves the low order bit unchanged (think: complement
+ // plus 1) and the mask is of the low order bit. Skip the negate.
+ if( lop == Op_SubI && mask == 1 && load->in(1) &&
+ phase->type(load->in(1)) == TypeInt::ZERO )
+ return new (phase->C, 3) AndINode( load->in(2), in(2) );
+
+ return MulNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Supplied function returns the product of the inputs IN THE CURRENT RING.
+// For the logical operations the ring's MUL is really a logical AND function.
+// This also type-checks the inputs for sanity. Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AndLNode::mul_ring( const Type *t0, const Type *t1 ) const {
+ const TypeLong *r0 = t0->is_long(); // Handy access
+ const TypeLong *r1 = t1->is_long();
+ int widen = MAX2(r0->_widen,r1->_widen);
+
+ // If either input is a constant, might be able to trim cases
+ if( !r0->is_con() && !r1->is_con() )
+ return TypeLong::LONG; // No constants to be had
+
+ // Both constants? Return bits
+ if( r0->is_con() && r1->is_con() )
+ return TypeLong::make( r0->get_con() & r1->get_con() );
+
+ if( r0->is_con() && r0->get_con() > 0 )
+ return TypeLong::make(CONST64(0), r0->get_con(), widen);
+
+ if( r1->is_con() && r1->get_con() > 0 )
+ return TypeLong::make(CONST64(0), r1->get_con(), widen);
+
+ return TypeLong::LONG; // No constants to be had
+}
+
+//------------------------------Identity---------------------------------------
+// Masking off the high bits of an unsigned load is not required
+Node *AndLNode::Identity( PhaseTransform *phase ) {
+
+ // x & x => x
+ if (phase->eqv(in(1), in(2))) return in(1);
+
+ Node *usr = in(1);
+ const TypeLong *t2 = phase->type( in(2) )->isa_long();
+ if( t2 && t2->is_con() ) {
+ jlong con = t2->get_con();
+ // Masking off high bits which are always zero is useless.
+ const TypeLong* t1 = phase->type( in(1) )->isa_long();
+ if (t1 != NULL && t1->_lo >= 0) {
+ jlong t1_support = ((jlong)1 << (1 + log2_long(t1->_hi))) - 1;
+ if ((t1_support & con) == t1_support)
+ return usr;
+ }
+ uint lop = usr->Opcode();
+ // Masking off the high bits of a unsigned-shift-right is not
+ // needed either.
+ if( lop == Op_URShiftL ) {
+ const TypeInt *t12 = phase->type( usr->in(2) )->isa_int();
+ if( t12 && t12->is_con() ) {
+ int shift_con = t12->get_con();
+ jlong mask = max_julong >> shift_con;
+ if( (mask&con) == mask ) // If AND is useless, skip it
+ return usr;
+ }
+ }
+ }
+ return MulNode::Identity(phase);
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AndLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Special case constant AND mask
+ const TypeLong *t2 = phase->type( in(2) )->isa_long();
+ if( !t2 || !t2->is_con() ) return MulNode::Ideal(phase, can_reshape);
+ const jlong mask = t2->get_con();
+
+ Node *rsh = in(1);
+ uint rop = rsh->Opcode();
+
+ // Masking off sign bits? Dont make them!
+ if( rop == Op_RShiftL ) {
+ const TypeInt *t12 = phase->type(rsh->in(2))->isa_int();
+ if( t12 && t12->is_con() ) { // Shift is by a constant
+ int shift = t12->get_con();
+ shift &= (BitsPerJavaInteger*2)-1; // semantics of Java shifts
+ const jlong sign_bits_mask = ~(((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - shift)) -1);
+ // If the AND'ing of the 2 masks has no bits, then only original shifted
+ // bits survive. NO sign-extension bits survive the maskings.
+ if( (sign_bits_mask & mask) == 0 ) {
+ // Use zero-fill shift instead
+ Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(rsh->in(1),rsh->in(2)));
+ return new (phase->C, 3) AndLNode( zshift, in(2) );
+ }
+ }
+ }
+
+ return MulNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *LShiftINode::Identity( PhaseTransform *phase ) {
+ const TypeInt *ti = phase->type( in(2) )->isa_int(); // shift count is an int
+ return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerInt - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If the right input is a constant, and the left input is an add of a
+// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
+Node *LShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const Type *t = phase->type( in(2) );
+ if( t == Type::TOP ) return NULL; // Right input is dead
+ const TypeInt *t2 = t->isa_int();
+ if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+ const int con = t2->get_con() & ( BitsPerInt - 1 ); // masked shift count
+
+ if ( con == 0 ) return NULL; // let Identity() handle 0 shift count
+
+ // Left input is an add of a constant?
+ Node *add1 = in(1);
+ int add1_op = add1->Opcode();
+ if( add1_op == Op_AddI ) { // Left input is an add?
+ assert( add1 != add1->in(1), "dead loop in LShiftINode::Ideal" );
+ const TypeInt *t12 = phase->type(add1->in(2))->isa_int();
+ if( t12 && t12->is_con() ){ // Left input is an add of a con?
+ // Transform is legal, but check for profit. Avoid breaking 'i2s'
+ // and 'i2b' patterns which typically fold into 'StoreC/StoreB'.
+ if( con < 16 ) {
+ // Compute X << con0
+ Node *lsh = phase->transform( new (phase->C, 3) LShiftINode( add1->in(1), in(2) ) );
+ // Compute X<<con0 + (con1<<con0)
+ return new (phase->C, 3) AddINode( lsh, phase->intcon(t12->get_con() << con));
+ }
+ }
+ }
+
+ // Check for "(x>>c0)<<c0" which just masks off low bits
+ if( (add1_op == Op_RShiftI || add1_op == Op_URShiftI ) &&
+ add1->in(2) == in(2) )
+ // Convert to "(x & -(1<<c0))"
+ return new (phase->C, 3) AndINode(add1->in(1),phase->intcon( -(1<<con)));
+
+ // Check for "((x>>c0) & Y)<<c0" which just masks off more low bits
+ if( add1_op == Op_AndI ) {
+ Node *add2 = add1->in(1);
+ int add2_op = add2->Opcode();
+ if( (add2_op == Op_RShiftI || add2_op == Op_URShiftI ) &&
+ add2->in(2) == in(2) ) {
+ // Convert to "(x & (Y<<c0))"
+ Node *y_sh = phase->transform( new (phase->C, 3) LShiftINode( add1->in(2), in(2) ) );
+ return new (phase->C, 3) AndINode( add2->in(1), y_sh );
+ }
+ }
+
+ // Check for ((x & ((1<<(32-c0))-1)) << c0) which ANDs off high bits
+ // before shifting them away.
+ const jint bits_mask = right_n_bits(BitsPerJavaInteger-con);
+ if( add1_op == Op_AndI &&
+ phase->type(add1->in(2)) == TypeInt::make( bits_mask ) )
+ return new (phase->C, 3) LShiftINode( add1->in(1), in(2) );
+
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A LShiftINode shifts its input2 left by input1 amount.
+const Type *LShiftINode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ // Either input is TOP ==> the result is TOP
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Left input is ZERO ==> the result is ZERO.
+ if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+ // Shift by zero does nothing
+ if( t2 == TypeInt::ZERO ) return t1;
+
+ // Either input is BOTTOM ==> the result is BOTTOM
+ if( (t1 == TypeInt::INT) || (t2 == TypeInt::INT) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return TypeInt::INT;
+
+ const TypeInt *r1 = t1->is_int(); // Handy access
+ const TypeInt *r2 = t2->is_int(); // Handy access
+
+ if (!r2->is_con())
+ return TypeInt::INT;
+
+ uint shift = r2->get_con();
+ shift &= BitsPerJavaInteger-1; // semantics of Java shifts
+ // Shift by a multiple of 32 does nothing:
+ if (shift == 0) return t1;
+
+ // If the shift is a constant, shift the bounds of the type,
+ // unless this could lead to an overflow.
+ if (!r1->is_con()) {
+ jint lo = r1->_lo, hi = r1->_hi;
+ if (((lo << shift) >> shift) == lo &&
+ ((hi << shift) >> shift) == hi) {
+ // No overflow. The range shifts up cleanly.
+ return TypeInt::make((jint)lo << (jint)shift,
+ (jint)hi << (jint)shift,
+ MAX2(r1->_widen,r2->_widen));
+ }
+ return TypeInt::INT;
+ }
+
+ return TypeInt::make( (jint)r1->get_con() << (jint)shift );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *LShiftLNode::Identity( PhaseTransform *phase ) {
+ const TypeInt *ti = phase->type( in(2) )->isa_int(); // shift count is an int
+ return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerLong - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If the right input is a constant, and the left input is an add of a
+// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
+Node *LShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const Type *t = phase->type( in(2) );
+ if( t == Type::TOP ) return NULL; // Right input is dead
+ const TypeInt *t2 = t->isa_int();
+ if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+ const int con = t2->get_con() & ( BitsPerLong - 1 ); // masked shift count
+
+ if ( con == 0 ) return NULL; // let Identity() handle 0 shift count
+
+ // Left input is an add of a constant?
+ Node *add1 = in(1);
+ int add1_op = add1->Opcode();
+ if( add1_op == Op_AddL ) { // Left input is an add?
+ // Avoid dead data cycles from dead loops
+ assert( add1 != add1->in(1), "dead loop in LShiftLNode::Ideal" );
+ const TypeLong *t12 = phase->type(add1->in(2))->isa_long();
+ if( t12 && t12->is_con() ){ // Left input is an add of a con?
+ // Compute X << con0
+ Node *lsh = phase->transform( new (phase->C, 3) LShiftLNode( add1->in(1), in(2) ) );
+ // Compute X<<con0 + (con1<<con0)
+ return new (phase->C, 3) AddLNode( lsh, phase->longcon(t12->get_con() << con));
+ }
+ }
+
+ // Check for "(x>>c0)<<c0" which just masks off low bits
+ if( (add1_op == Op_RShiftL || add1_op == Op_URShiftL ) &&
+ add1->in(2) == in(2) )
+ // Convert to "(x & -(1<<c0))"
+ return new (phase->C, 3) AndLNode(add1->in(1),phase->longcon( -(CONST64(1)<<con)));
+
+ // Check for "((x>>c0) & Y)<<c0" which just masks off more low bits
+ if( add1_op == Op_AndL ) {
+ Node *add2 = add1->in(1);
+ int add2_op = add2->Opcode();
+ if( (add2_op == Op_RShiftL || add2_op == Op_URShiftL ) &&
+ add2->in(2) == in(2) ) {
+ // Convert to "(x & (Y<<c0))"
+ Node *y_sh = phase->transform( new (phase->C, 3) LShiftLNode( add1->in(2), in(2) ) );
+ return new (phase->C, 3) AndLNode( add2->in(1), y_sh );
+ }
+ }
+
+ // Check for ((x & ((CONST64(1)<<(64-c0))-1)) << c0) which ANDs off high bits
+ // before shifting them away.
+ const jlong bits_mask = ((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - con)) - CONST64(1);
+ if( add1_op == Op_AndL &&
+ phase->type(add1->in(2)) == TypeLong::make( bits_mask ) )
+ return new (phase->C, 3) LShiftLNode( add1->in(1), in(2) );
+
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A LShiftLNode shifts its input2 left by input1 amount.
+const Type *LShiftLNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ // Either input is TOP ==> the result is TOP
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Left input is ZERO ==> the result is ZERO.
+ if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+ // Shift by zero does nothing
+ if( t2 == TypeInt::ZERO ) return t1;
+
+ // Either input is BOTTOM ==> the result is BOTTOM
+ if( (t1 == TypeLong::LONG) || (t2 == TypeInt::INT) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return TypeLong::LONG;
+
+ const TypeLong *r1 = t1->is_long(); // Handy access
+ const TypeInt *r2 = t2->is_int(); // Handy access
+
+ if (!r2->is_con())
+ return TypeLong::LONG;
+
+ uint shift = r2->get_con();
+ shift &= (BitsPerJavaInteger*2)-1; // semantics of Java shifts
+ // Shift by a multiple of 64 does nothing:
+ if (shift == 0) return t1;
+
+ // If the shift is a constant, shift the bounds of the type,
+ // unless this could lead to an overflow.
+ if (!r1->is_con()) {
+ jlong lo = r1->_lo, hi = r1->_hi;
+ if (((lo << shift) >> shift) == lo &&
+ ((hi << shift) >> shift) == hi) {
+ // No overflow. The range shifts up cleanly.
+ return TypeLong::make((jlong)lo << (jint)shift,
+ (jlong)hi << (jint)shift,
+ MAX2(r1->_widen,r2->_widen));
+ }
+ return TypeLong::LONG;
+ }
+
+ return TypeLong::make( (jlong)r1->get_con() << (jint)shift );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *RShiftINode::Identity( PhaseTransform *phase ) {
+ const TypeInt *t2 = phase->type(in(2))->isa_int();
+ if( !t2 ) return this;
+ if ( t2->is_con() && ( t2->get_con() & ( BitsPerInt - 1 ) ) == 0 )
+ return in(1);
+
+ // Check for useless sign-masking
+ if( in(1)->Opcode() == Op_LShiftI &&
+ in(1)->req() == 3 &&
+ in(1)->in(2) == in(2) &&
+ t2->is_con() ) {
+ uint shift = t2->get_con();
+ shift &= BitsPerJavaInteger-1; // semantics of Java shifts
+ // Compute masks for which this shifting doesn't change
+ int lo = (-1 << (BitsPerJavaInteger - shift-1)); // FFFF8000
+ int hi = ~lo; // 00007FFF
+ const TypeInt *t11 = phase->type(in(1)->in(1))->isa_int();
+ if( !t11 ) return this;
+ // Does actual value fit inside of mask?
+ if( lo <= t11->_lo && t11->_hi <= hi )
+ return in(1)->in(1); // Then shifting is a nop
+ }
+
+ return this;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *RShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Inputs may be TOP if they are dead.
+ const TypeInt *t1 = phase->type( in(1) )->isa_int();
+ if( !t1 ) return NULL; // Left input is an integer
+ const TypeInt *t2 = phase->type( in(2) )->isa_int();
+ if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+ const TypeInt *t3; // type of in(1).in(2)
+ int shift = t2->get_con();
+ shift &= BitsPerJavaInteger-1; // semantics of Java shifts
+
+ if ( shift == 0 ) return NULL; // let Identity() handle 0 shift count
+
+ // Check for (x & 0xFF000000) >> 24, whose mask can be made smaller.
+ // Such expressions arise normally from shift chains like (byte)(x >> 24).
+ const Node *mask = in(1);
+ if( mask->Opcode() == Op_AndI &&
+ (t3 = phase->type(mask->in(2))->isa_int()) &&
+ t3->is_con() ) {
+ Node *x = mask->in(1);
+ jint maskbits = t3->get_con();
+ // Convert to "(x >> shift) & (mask >> shift)"
+ Node *shr_nomask = phase->transform( new (phase->C, 3) RShiftINode(mask->in(1), in(2)) );
+ return new (phase->C, 3) AndINode(shr_nomask, phase->intcon( maskbits >> shift));
+ }
+
+ // Check for "(short[i] <<16)>>16" which simply sign-extends
+ const Node *shl = in(1);
+ if( shl->Opcode() != Op_LShiftI ) return NULL;
+
+ if( shift == 16 &&
+ (t3 = phase->type(shl->in(2))->isa_int()) &&
+ t3->is_con(16) ) {
+ Node *ld = shl->in(1);
+ if( ld->Opcode() == Op_LoadS ) {
+ // Sign extension is just useless here. Return a RShiftI of zero instead
+ // returning 'ld' directly. We cannot return an old Node directly as
+ // that is the job of 'Identity' calls and Identity calls only work on
+ // direct inputs ('ld' is an extra Node removed from 'this'). The
+ // combined optimization requires Identity only return direct inputs.
+ set_req(1, ld);
+ set_req(2, phase->intcon(0));
+ return this;
+ }
+ else if( ld->Opcode() == Op_LoadC )
+ // Replace zero-extension-load with sign-extension-load
+ return new (phase->C, 3) LoadSNode( ld->in(MemNode::Control),
+ ld->in(MemNode::Memory),
+ ld->in(MemNode::Address),
+ ld->adr_type());
+ }
+
+ // Check for "(byte[i] <<24)>>24" which simply sign-extends
+ if( shift == 24 &&
+ (t3 = phase->type(shl->in(2))->isa_int()) &&
+ t3->is_con(24) ) {
+ Node *ld = shl->in(1);
+ if( ld->Opcode() == Op_LoadB ) {
+ // Sign extension is just useless here
+ set_req(1, ld);
+ set_req(2, phase->intcon(0));
+ return this;
+ }
+ }
+
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A RShiftINode shifts its input2 right by input1 amount.
+const Type *RShiftINode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ // Either input is TOP ==> the result is TOP
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Left input is ZERO ==> the result is ZERO.
+ if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+ // Shift by zero does nothing
+ if( t2 == TypeInt::ZERO ) return t1;
+
+ // Either input is BOTTOM ==> the result is BOTTOM
+ if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+ return TypeInt::INT;
+
+ if (t2 == TypeInt::INT)
+ return TypeInt::INT;
+
+ const TypeInt *r1 = t1->is_int(); // Handy access
+ const TypeInt *r2 = t2->is_int(); // Handy access
+
+ // If the shift is a constant, just shift the bounds of the type.
+ // For example, if the shift is 31, we just propagate sign bits.
+ if (r2->is_con()) {
+ uint shift = r2->get_con();
+ shift &= BitsPerJavaInteger-1; // semantics of Java shifts
+ // Shift by a multiple of 32 does nothing:
+ if (shift == 0) return t1;
+ // Calculate reasonably aggressive bounds for the result.
+ // This is necessary if we are to correctly type things
+ // like (x<<24>>24) == ((byte)x).
+ jint lo = (jint)r1->_lo >> (jint)shift;
+ jint hi = (jint)r1->_hi >> (jint)shift;
+ assert(lo <= hi, "must have valid bounds");
+ const TypeInt* ti = TypeInt::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+#ifdef ASSERT
+ // Make sure we get the sign-capture idiom correct.
+ if (shift == BitsPerJavaInteger-1) {
+ if (r1->_lo >= 0) assert(ti == TypeInt::ZERO, ">>31 of + is 0");
+ if (r1->_hi < 0) assert(ti == TypeInt::MINUS_1, ">>31 of - is -1");
+ }
+#endif
+ return ti;
+ }
+
+ if( !r1->is_con() || !r2->is_con() )
+ return TypeInt::INT;
+
+ // Signed shift right
+ return TypeInt::make( r1->get_con() >> (r2->get_con()&31) );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *RShiftLNode::Identity( PhaseTransform *phase ) {
+ const TypeInt *ti = phase->type( in(2) )->isa_int(); // shift count is an int
+ return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerLong - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Value------------------------------------------
+// A RShiftLNode shifts its input2 right by input1 amount.
+const Type *RShiftLNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ // Either input is TOP ==> the result is TOP
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Left input is ZERO ==> the result is ZERO.
+ if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+ // Shift by zero does nothing
+ if( t2 == TypeInt::ZERO ) return t1;
+
+ // Either input is BOTTOM ==> the result is BOTTOM
+ if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+ return TypeLong::LONG;
+
+ if (t2 == TypeInt::INT)
+ return TypeLong::LONG;
+
+ const TypeLong *r1 = t1->is_long(); // Handy access
+ const TypeInt *r2 = t2->is_int (); // Handy access
+
+ // If the shift is a constant, just shift the bounds of the type.
+ // For example, if the shift is 63, we just propagate sign bits.
+ if (r2->is_con()) {
+ uint shift = r2->get_con();
+ shift &= (2*BitsPerJavaInteger)-1; // semantics of Java shifts
+ // Shift by a multiple of 64 does nothing:
+ if (shift == 0) return t1;
+ // Calculate reasonably aggressive bounds for the result.
+ // This is necessary if we are to correctly type things
+ // like (x<<24>>24) == ((byte)x).
+ jlong lo = (jlong)r1->_lo >> (jlong)shift;
+ jlong hi = (jlong)r1->_hi >> (jlong)shift;
+ assert(lo <= hi, "must have valid bounds");
+ const TypeLong* tl = TypeLong::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+ #ifdef ASSERT
+ // Make sure we get the sign-capture idiom correct.
+ if (shift == (2*BitsPerJavaInteger)-1) {
+ if (r1->_lo >= 0) assert(tl == TypeLong::ZERO, ">>63 of + is 0");
+ if (r1->_hi < 0) assert(tl == TypeLong::MINUS_1, ">>63 of - is -1");
+ }
+ #endif
+ return tl;
+ }
+
+ return TypeLong::LONG; // Give up
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *URShiftINode::Identity( PhaseTransform *phase ) {
+ const TypeInt *ti = phase->type( in(2) )->isa_int();
+ if ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerInt - 1 ) ) == 0 ) return in(1);
+
+ // Check for "((x << LogBytesPerWord) + (wordSize-1)) >> LogBytesPerWord" which is just "x".
+ // Happens during new-array length computation.
+ // Safe if 'x' is in the range [0..(max_int>>LogBytesPerWord)]
+ Node *add = in(1);
+ if( add->Opcode() == Op_AddI ) {
+ const TypeInt *t2 = phase->type(add->in(2))->isa_int();
+ if( t2 && t2->is_con(wordSize - 1) &&
+ add->in(1)->Opcode() == Op_LShiftI ) {
+ // Check that shift_counts are LogBytesPerWord
+ Node *lshift_count = add->in(1)->in(2);
+ const TypeInt *t_lshift_count = phase->type(lshift_count)->isa_int();
+ if( t_lshift_count && t_lshift_count->is_con(LogBytesPerWord) &&
+ t_lshift_count == phase->type(in(2)) ) {
+ Node *x = add->in(1)->in(1);
+ const TypeInt *t_x = phase->type(x)->isa_int();
+ if( t_x != NULL && 0 <= t_x->_lo && t_x->_hi <= (max_jint>>LogBytesPerWord) ) {
+ return x;
+ }
+ }
+ }
+ }
+
+ return (phase->type(in(2))->higher_equal(TypeInt::ZERO)) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *URShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const TypeInt *t2 = phase->type( in(2) )->isa_int();
+ if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+ const int con = t2->get_con() & 31; // Shift count is always masked
+ if ( con == 0 ) return NULL; // let Identity() handle a 0 shift count
+ // We'll be wanting the right-shift amount as a mask of that many bits
+ const int mask = right_n_bits(BitsPerJavaInteger - con);
+
+ int in1_op = in(1)->Opcode();
+
+ // Check for ((x>>>a)>>>b) and replace with (x>>>(a+b)) when a+b < 32
+ if( in1_op == Op_URShiftI ) {
+ const TypeInt *t12 = phase->type( in(1)->in(2) )->isa_int();
+ if( t12 && t12->is_con() ) { // Right input is a constant
+ assert( in(1) != in(1)->in(1), "dead loop in URShiftINode::Ideal" );
+ const int con2 = t12->get_con() & 31; // Shift count is always masked
+ const int con3 = con+con2;
+ if( con3 < 32 ) // Only merge shifts if total is < 32
+ return new (phase->C, 3) URShiftINode( in(1)->in(1), phase->intcon(con3) );
+ }
+ }
+
+ // Check for ((x << z) + Y) >>> z. Replace with x + con>>>z
+ // The idiom for rounding to a power of 2 is "(Q+(2^z-1)) >>> z".
+ // If Q is "X << z" the rounding is useless. Look for patterns like
+ // ((X<<Z) + Y) >>> Z and replace with (X + Y>>>Z) & Z-mask.
+ Node *add = in(1);
+ if( in1_op == Op_AddI ) {
+ Node *lshl = add->in(1);
+ if( lshl->Opcode() == Op_LShiftI &&
+ phase->type(lshl->in(2)) == t2 ) {
+ Node *y_z = phase->transform( new (phase->C, 3) URShiftINode(add->in(2),in(2)) );
+ Node *sum = phase->transform( new (phase->C, 3) AddINode( lshl->in(1), y_z ) );
+ return new (phase->C, 3) AndINode( sum, phase->intcon(mask) );
+ }
+ }
+
+ // Check for (x & mask) >>> z. Replace with (x >>> z) & (mask >>> z)
+ // This shortens the mask. Also, if we are extracting a high byte and
+ // storing it to a buffer, the mask will be removed completely.
+ Node *andi = in(1);
+ if( in1_op == Op_AndI ) {
+ const TypeInt *t3 = phase->type( andi->in(2) )->isa_int();
+ if( t3 && t3->is_con() ) { // Right input is a constant
+ jint mask2 = t3->get_con();
+ mask2 >>= con; // *signed* shift downward (high-order zeroes do not help)
+ Node *newshr = phase->transform( new (phase->C, 3) URShiftINode(andi->in(1), in(2)) );
+ return new (phase->C, 3) AndINode(newshr, phase->intcon(mask2));
+ // The negative values are easier to materialize than positive ones.
+ // A typical case from address arithmetic is ((x & ~15) >> 4).
+ // It's better to change that to ((x >> 4) & ~0) versus
+ // ((x >> 4) & 0x0FFFFFFF). The difference is greatest in LP64.
+ }
+ }
+
+ // Check for "(X << z ) >>> z" which simply zero-extends
+ Node *shl = in(1);
+ if( in1_op == Op_LShiftI &&
+ phase->type(shl->in(2)) == t2 )
+ return new (phase->C, 3) AndINode( shl->in(1), phase->intcon(mask) );
+
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A URShiftINode shifts its input2 right by input1 amount.
+const Type *URShiftINode::Value( PhaseTransform *phase ) const {
+ // (This is a near clone of RShiftINode::Value.)
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ // Either input is TOP ==> the result is TOP
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Left input is ZERO ==> the result is ZERO.
+ if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+ // Shift by zero does nothing
+ if( t2 == TypeInt::ZERO ) return t1;
+
+ // Either input is BOTTOM ==> the result is BOTTOM
+ if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+ return TypeInt::INT;
+
+ if (t2 == TypeInt::INT)
+ return TypeInt::INT;
+
+ const TypeInt *r1 = t1->is_int(); // Handy access
+ const TypeInt *r2 = t2->is_int(); // Handy access
+
+ if (r2->is_con()) {
+ uint shift = r2->get_con();
+ shift &= BitsPerJavaInteger-1; // semantics of Java shifts
+ // Shift by a multiple of 32 does nothing:
+ if (shift == 0) return t1;
+ // Calculate reasonably aggressive bounds for the result.
+ jint lo = (juint)r1->_lo >> (juint)shift;
+ jint hi = (juint)r1->_hi >> (juint)shift;
+ if (r1->_hi >= 0 && r1->_lo < 0) {
+ // If the type has both negative and positive values,
+ // there are two separate sub-domains to worry about:
+ // The positive half and the negative half.
+ jint neg_lo = lo;
+ jint neg_hi = (juint)-1 >> (juint)shift;
+ jint pos_lo = (juint) 0 >> (juint)shift;
+ jint pos_hi = hi;
+ lo = MIN2(neg_lo, pos_lo); // == 0
+ hi = MAX2(neg_hi, pos_hi); // == -1 >>> shift;
+ }
+ assert(lo <= hi, "must have valid bounds");
+ const TypeInt* ti = TypeInt::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+ #ifdef ASSERT
+ // Make sure we get the sign-capture idiom correct.
+ if (shift == BitsPerJavaInteger-1) {
+ if (r1->_lo >= 0) assert(ti == TypeInt::ZERO, ">>>31 of + is 0");
+ if (r1->_hi < 0) assert(ti == TypeInt::ONE, ">>>31 of - is +1");
+ }
+ #endif
+ return ti;
+ }
+
+ //
+ // Do not support shifted oops in info for GC
+ //
+ // else if( t1->base() == Type::InstPtr ) {
+ //
+ // const TypeInstPtr *o = t1->is_instptr();
+ // if( t1->singleton() )
+ // return TypeInt::make( ((uint32)o->const_oop() + o->_offset) >> shift );
+ // }
+ // else if( t1->base() == Type::KlassPtr ) {
+ // const TypeKlassPtr *o = t1->is_klassptr();
+ // if( t1->singleton() )
+ // return TypeInt::make( ((uint32)o->const_oop() + o->_offset) >> shift );
+ // }
+
+ return TypeInt::INT;
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *URShiftLNode::Identity( PhaseTransform *phase ) {
+ const TypeInt *ti = phase->type( in(2) )->isa_int(); // shift count is an int
+ return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerLong - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *URShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const TypeInt *t2 = phase->type( in(2) )->isa_int();
+ if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+ const int con = t2->get_con() & ( BitsPerLong - 1 ); // Shift count is always masked
+ if ( con == 0 ) return NULL; // let Identity() handle a 0 shift count
+ // note: mask computation below does not work for 0 shift count
+ // We'll be wanting the right-shift amount as a mask of that many bits
+ const jlong mask = (((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - con)) -1);
+
+ // Check for ((x << z) + Y) >>> z. Replace with x + con>>>z
+ // The idiom for rounding to a power of 2 is "(Q+(2^z-1)) >>> z".
+ // If Q is "X << z" the rounding is useless. Look for patterns like
+ // ((X<<Z) + Y) >>> Z and replace with (X + Y>>>Z) & Z-mask.
+ Node *add = in(1);
+ if( add->Opcode() == Op_AddL ) {
+ Node *lshl = add->in(1);
+ if( lshl->Opcode() == Op_LShiftL &&
+ phase->type(lshl->in(2)) == t2 ) {
+ Node *y_z = phase->transform( new (phase->C, 3) URShiftLNode(add->in(2),in(2)) );
+ Node *sum = phase->transform( new (phase->C, 3) AddLNode( lshl->in(1), y_z ) );
+ return new (phase->C, 3) AndLNode( sum, phase->longcon(mask) );
+ }
+ }
+
+ // Check for (x & mask) >>> z. Replace with (x >>> z) & (mask >>> z)
+ // This shortens the mask. Also, if we are extracting a high byte and
+ // storing it to a buffer, the mask will be removed completely.
+ Node *andi = in(1);
+ if( andi->Opcode() == Op_AndL ) {
+ const TypeLong *t3 = phase->type( andi->in(2) )->isa_long();
+ if( t3 && t3->is_con() ) { // Right input is a constant
+ jlong mask2 = t3->get_con();
+ mask2 >>= con; // *signed* shift downward (high-order zeroes do not help)
+ Node *newshr = phase->transform( new (phase->C, 3) URShiftLNode(andi->in(1), in(2)) );
+ return new (phase->C, 3) AndLNode(newshr, phase->longcon(mask2));
+ }
+ }
+
+ // Check for "(X << z ) >>> z" which simply zero-extends
+ Node *shl = in(1);
+ if( shl->Opcode() == Op_LShiftL &&
+ phase->type(shl->in(2)) == t2 )
+ return new (phase->C, 3) AndLNode( shl->in(1), phase->longcon(mask) );
+
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A URShiftINode shifts its input2 right by input1 amount.
+const Type *URShiftLNode::Value( PhaseTransform *phase ) const {
+ // (This is a near clone of RShiftLNode::Value.)
+ const Type *t1 = phase->type( in(1) );
+ const Type *t2 = phase->type( in(2) );
+ // Either input is TOP ==> the result is TOP
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Left input is ZERO ==> the result is ZERO.
+ if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+ // Shift by zero does nothing
+ if( t2 == TypeInt::ZERO ) return t1;
+
+ // Either input is BOTTOM ==> the result is BOTTOM
+ if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+ return TypeLong::LONG;
+
+ if (t2 == TypeInt::INT)
+ return TypeLong::LONG;
+
+ const TypeLong *r1 = t1->is_long(); // Handy access
+ const TypeInt *r2 = t2->is_int (); // Handy access
+
+ if (r2->is_con()) {
+ uint shift = r2->get_con();
+ shift &= (2*BitsPerJavaInteger)-1; // semantics of Java shifts
+ // Shift by a multiple of 64 does nothing:
+ if (shift == 0) return t1;
+ // Calculate reasonably aggressive bounds for the result.
+ jlong lo = (julong)r1->_lo >> (juint)shift;
+ jlong hi = (julong)r1->_hi >> (juint)shift;
+ if (r1->_hi >= 0 && r1->_lo < 0) {
+ // If the type has both negative and positive values,
+ // there are two separate sub-domains to worry about:
+ // The positive half and the negative half.
+ jlong neg_lo = lo;
+ jlong neg_hi = (julong)-1 >> (juint)shift;
+ jlong pos_lo = (julong) 0 >> (juint)shift;
+ jlong pos_hi = hi;
+ //lo = MIN2(neg_lo, pos_lo); // == 0
+ lo = neg_lo < pos_lo ? neg_lo : pos_lo;
+ //hi = MAX2(neg_hi, pos_hi); // == -1 >>> shift;
+ hi = neg_hi > pos_hi ? neg_hi : pos_hi;
+ }
+ assert(lo <= hi, "must have valid bounds");
+ const TypeLong* tl = TypeLong::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+ #ifdef ASSERT
+ // Make sure we get the sign-capture idiom correct.
+ if (shift == (2*BitsPerJavaInteger)-1) {
+ if (r1->_lo >= 0) assert(tl == TypeLong::ZERO, ">>>63 of + is 0");
+ if (r1->_hi < 0) assert(tl == TypeLong::ONE, ">>>63 of - is +1");
+ }
+ #endif
+ return tl;
+ }
+
+ return TypeLong::LONG; // Give up
+}
diff --git a/src/share/vm/opto/mulnode.hpp b/src/share/vm/opto/mulnode.hpp
new file mode 100644
index 000000000..380e35a89
--- /dev/null
+++ b/src/share/vm/opto/mulnode.hpp
@@ -0,0 +1,247 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+class PhaseTransform;
+
+//------------------------------MulNode----------------------------------------
+// Classic MULTIPLY functionality. This covers all the usual 'multiply'
+// behaviors for an algebraic ring. Multiply-integer, multiply-float,
+// multiply-double, and binary-and are all inherited from this class. The
+// various identity values are supplied by virtual functions.
+class MulNode : public Node {
+ virtual uint hash() const;
+public:
+ MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {
+ init_class_id(Class_Mul);
+ }
+
+ // Handle algebraic identities here. If we have an identity, return the Node
+ // we are equivalent to. We look for "add of zero" as an identity.
+ virtual Node *Identity( PhaseTransform *phase );
+
+ // We also canonicalize the Node, moving constants to the right input,
+ // and flatten expressions (so that 1+x+2 becomes x+3).
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+ // Compute a new Type for this node. Basically we just do the pre-check,
+ // then call the virtual add() to set the type.
+ virtual const Type *Value( PhaseTransform *phase ) const;
+
+ // Supplied function returns the product of the inputs.
+ // This also type-checks the inputs for sanity. Guaranteed never to
+ // be passed a TOP or BOTTOM type, these are filtered out by a pre-check.
+ // This call recognizes the multiplicative zero type.
+ virtual const Type *mul_ring( const Type *, const Type * ) const = 0;
+
+ // Supplied function to return the multiplicative identity type
+ virtual const Type *mul_id() const = 0;
+
+ // Supplied function to return the additive identity type
+ virtual const Type *add_id() const = 0;
+
+ // Supplied function to return the additive opcode
+ virtual int add_opcode() const = 0;
+
+ // Supplied function to return the multiplicative opcode
+ virtual int mul_opcode() const = 0;
+
+};
+
+//------------------------------MulINode---------------------------------------
+// Multiply 2 integers
+class MulINode : public MulNode {
+public:
+ MulINode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *mul_ring( const Type *, const Type * ) const;
+ const Type *mul_id() const { return TypeInt::ONE; }
+ const Type *add_id() const { return TypeInt::ZERO; }
+ int add_opcode() const { return Op_AddI; }
+ int mul_opcode() const { return Op_MulI; }
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------MulLNode---------------------------------------
+// Multiply 2 longs
+class MulLNode : public MulNode {
+public:
+ MulLNode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *mul_ring( const Type *, const Type * ) const;
+ const Type *mul_id() const { return TypeLong::ONE; }
+ const Type *add_id() const { return TypeLong::ZERO; }
+ int add_opcode() const { return Op_AddL; }
+ int mul_opcode() const { return Op_MulL; }
+ const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+
+//------------------------------MulFNode---------------------------------------
+// Multiply 2 floats
+class MulFNode : public MulNode {
+public:
+ MulFNode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *mul_ring( const Type *, const Type * ) const;
+ const Type *mul_id() const { return TypeF::ONE; }
+ const Type *add_id() const { return TypeF::ZERO; }
+ int add_opcode() const { return Op_AddF; }
+ int mul_opcode() const { return Op_MulF; }
+ const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------MulDNode---------------------------------------
+// Multiply 2 doubles
+class MulDNode : public MulNode {
+public:
+ MulDNode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *mul_ring( const Type *, const Type * ) const;
+ const Type *mul_id() const { return TypeD::ONE; }
+ const Type *add_id() const { return TypeD::ZERO; }
+ int add_opcode() const { return Op_AddD; }
+ int mul_opcode() const { return Op_MulD; }
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+
+//------------------------------AndINode---------------------------------------
+// Logically AND 2 integers. Included with the MUL nodes because it inherits
+// all the behavior of multiplication on a ring.
+class AndINode : public MulINode {
+public:
+ AndINode( Node *in1, Node *in2 ) : MulINode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *mul_ring( const Type *, const Type * ) const;
+ const Type *mul_id() const { return TypeInt::MINUS_1; }
+ const Type *add_id() const { return TypeInt::ZERO; }
+ int add_opcode() const { return Op_OrI; }
+ int mul_opcode() const { return Op_AndI; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------AndINode---------------------------------------
+// Logically AND 2 longs. Included with the MUL nodes because it inherits
+// all the behavior of multiplication on a ring.
+class AndLNode : public MulLNode {
+public:
+ AndLNode( Node *in1, Node *in2 ) : MulLNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *mul_ring( const Type *, const Type * ) const;
+ const Type *mul_id() const { return TypeLong::MINUS_1; }
+ const Type *add_id() const { return TypeLong::ZERO; }
+ int add_opcode() const { return Op_OrL; }
+ int mul_opcode() const { return Op_AndL; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------LShiftINode------------------------------------
+// Logical shift left
+class LShiftINode : public Node {
+public:
+ LShiftINode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------LShiftLNode------------------------------------
+// Logical shift left
+class LShiftLNode : public Node {
+public:
+ LShiftLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------RShiftINode------------------------------------
+// Signed shift right
+class RShiftINode : public Node {
+public:
+ RShiftINode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------RShiftLNode------------------------------------
+// Signed shift right
+class RShiftLNode : public Node {
+public:
+ RShiftLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+
+//------------------------------URShiftINode-----------------------------------
+// Logical shift right
+class URShiftINode : public Node {
+public:
+ URShiftINode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------URShiftLNode-----------------------------------
+// Logical shift right
+class URShiftLNode : public Node {
+public:
+ URShiftLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Identity( PhaseTransform *phase );
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
diff --git a/src/share/vm/opto/multnode.cpp b/src/share/vm/opto/multnode.cpp
new file mode 100644
index 000000000..5caa3dd8f
--- /dev/null
+++ b/src/share/vm/opto/multnode.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_multnode.cpp.incl"
+
+//=============================================================================
+//------------------------------MultiNode--------------------------------------
+const RegMask &MultiNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+Node *MultiNode::match( const ProjNode *proj, const Matcher *m ) { return proj->clone(); }
+
+//------------------------------proj_out---------------------------------------
+// Get a named projection
+ProjNode* MultiNode::proj_out(uint which_proj) const {
+ assert(Opcode() != Op_If || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0");
+ assert(Opcode() != Op_If || outcnt() == 2, "bad if #1");
+ for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
+ Node *p = fast_out(i);
+ if( !p->is_Proj() ) {
+ assert(p == this && this->is_Start(), "else must be proj");
+ continue;
+ }
+ ProjNode *proj = p->as_Proj();
+ if( proj->_con == which_proj ) {
+ assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2");
+ return proj;
+ }
+ }
+ return NULL;
+}
+
+//=============================================================================
+//------------------------------ProjNode---------------------------------------
+uint ProjNode::hash() const {
+ // only one input
+ return (uintptr_t)in(TypeFunc::Control) + (_con << 1) + (_is_io_use ? 1 : 0);
+}
+uint ProjNode::cmp( const Node &n ) const { return _con == ((ProjNode&)n)._con && ((ProjNode&)n)._is_io_use == _is_io_use; }
+uint ProjNode::size_of() const { return sizeof(ProjNode); }
+
+// Test if we propagate interesting control along this projection
+bool ProjNode::is_CFG() const {
+ Node *def = in(0);
+ return (_con == TypeFunc::Control && def->is_CFG());
+}
+
+const Type *ProjNode::bottom_type() const {
+ if (in(0) == NULL) return Type::TOP;
+ const Type *tb = in(0)->bottom_type();
+ if( tb == Type::TOP ) return Type::TOP;
+ if( tb == Type::BOTTOM ) return Type::BOTTOM;
+ const TypeTuple *t = tb->is_tuple();
+ return t->field_at(_con);
+}
+
+const TypePtr *ProjNode::adr_type() const {
+ if (bottom_type() == Type::MEMORY) {
+ // in(0) might be a narrow MemBar; otherwise we will report TypePtr::BOTTOM
+ const TypePtr* adr_type = in(0)->adr_type();
+ #ifdef ASSERT
+ if (!is_error_reported() && !Node::in_dump())
+ assert(adr_type != NULL, "source must have adr_type");
+ #endif
+ return adr_type;
+ }
+ assert(bottom_type()->base() != Type::Memory, "no other memories?");
+ return NULL;
+}
+
+bool ProjNode::pinned() const { return in(0)->pinned(); }
+#ifndef PRODUCT
+void ProjNode::dump_spec(outputStream *st) const { st->print("#%d",_con); if(_is_io_use) st->print(" (i_o_use)");}
+#endif
+
+//----------------------------check_con----------------------------------------
+void ProjNode::check_con() const {
+ Node* n = in(0);
+ if (n == NULL) return; // should be assert, but NodeHash makes bogons
+ if (n->is_Mach()) return; // mach. projs. are not type-safe
+ if (n->is_Start()) return; // alas, starts can have mach. projs. also
+ if (_con == SCMemProjNode::SCMEMPROJCON ) return;
+ const Type* t = n->bottom_type();
+ if (t == Type::TOP) return; // multi is dead
+ assert(_con < t->is_tuple()->cnt(), "ProjNode::_con must be in range");
+}
+
+//------------------------------Value------------------------------------------
+const Type *ProjNode::Value( PhaseTransform *phase ) const {
+ if( !in(0) ) return Type::TOP;
+ const Type *t = phase->type(in(0));
+ if( t == Type::TOP ) return t;
+ if( t == Type::BOTTOM ) return t;
+ return t->is_tuple()->field_at(_con);
+}
+
+//------------------------------out_RegMask------------------------------------
+// Pass the buck uphill
+const RegMask &ProjNode::out_RegMask() const {
+ return RegMask::Empty;
+}
+
+//------------------------------ideal_reg--------------------------------------
+uint ProjNode::ideal_reg() const {
+ return Matcher::base2reg[bottom_type()->base()];
+}
diff --git a/src/share/vm/opto/multnode.hpp b/src/share/vm/opto/multnode.hpp
new file mode 100644
index 000000000..34a573ffc
--- /dev/null
+++ b/src/share/vm/opto/multnode.hpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Matcher;
+class ProjNode;
+
+//------------------------------MultiNode--------------------------------------
+// This class defines a MultiNode, a Node which produces many values. The
+// values are wrapped up in a tuple Type, i.e. a TypeTuple.
+class MultiNode : public Node {
+public:
+ MultiNode( uint required ) : Node(required) {
+ init_class_id(Class_Multi);
+ }
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const = 0;
+ virtual bool is_CFG() const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const RegMask &out_RegMask() const;
+ virtual Node *match( const ProjNode *proj, const Matcher *m );
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ ProjNode* proj_out(uint which_proj) const; // Get a named projection
+
+};
+
+//------------------------------ProjNode---------------------------------------
+// This class defines a Projection node. Projections project a single element
+// out of a tuple (or Signature) type. Only MultiNodes produce TypeTuple
+// results.
+class ProjNode : public Node {
+protected:
+ virtual uint hash() const;
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const;
+ void check_con() const; // Called from constructor.
+
+public:
+ ProjNode( Node *src, uint con, bool io_use = false )
+ : Node( src ), _con(con), _is_io_use(io_use)
+ {
+ init_class_id(Class_Proj);
+ debug_only(check_con());
+ }
+ const uint _con; // The field in the tuple we are projecting
+ const bool _is_io_use; // Used to distinguish between the projections
+ // used on the control and io paths from a macro node
+ virtual int Opcode() const;
+ virtual bool is_CFG() const;
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const Type *bottom_type() const;
+ virtual const TypePtr *adr_type() const;
+ virtual bool pinned() const;
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual uint ideal_reg() const;
+ virtual const RegMask &out_RegMask() const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
diff --git a/src/share/vm/opto/node.cpp b/src/share/vm/opto/node.cpp
new file mode 100644
index 000000000..d7563d611
--- /dev/null
+++ b/src/share/vm/opto/node.cpp
@@ -0,0 +1,1919 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_node.cpp.incl"
+
+class RegMask;
+// #include "phase.hpp"
+class PhaseTransform;
+class PhaseGVN;
+
+// Arena we are currently building Nodes in
+const uint Node::NotAMachineReg = 0xffff0000;
+
+#ifndef PRODUCT
+extern int nodes_created;
+#endif
+
+#ifdef ASSERT
+
+//-------------------------- construct_node------------------------------------
+// Set a breakpoint here to identify where a particular node index is built.
+void Node::verify_construction() {
+ _debug_orig = NULL;
+ int old_debug_idx = Compile::debug_idx();
+ int new_debug_idx = old_debug_idx+1;
+ if (new_debug_idx > 0) {
+ // Arrange that the lowest five decimal digits of _debug_idx
+ // will repeat thos of _idx. In case this is somehow pathological,
+ // we continue to assign negative numbers (!) consecutively.
+ const int mod = 100000;
+ int bump = (int)(_idx - new_debug_idx) % mod;
+ if (bump < 0) bump += mod;
+ assert(bump >= 0 && bump < mod, "");
+ new_debug_idx += bump;
+ }
+ Compile::set_debug_idx(new_debug_idx);
+ set_debug_idx( new_debug_idx );
+ assert(Compile::current()->unique() < (uint)MaxNodeLimit, "Node limit exceeded");
+ if (BreakAtNode != 0 && (_debug_idx == BreakAtNode || (int)_idx == BreakAtNode)) {
+ tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d", _idx, _debug_idx);
+ BREAKPOINT;
+ }
+#if OPTO_DU_ITERATOR_ASSERT
+ _last_del = NULL;
+ _del_tick = 0;
+#endif
+ _hash_lock = 0;
+}
+
+
+// #ifdef ASSERT ...
+
+#if OPTO_DU_ITERATOR_ASSERT
+void DUIterator_Common::sample(const Node* node) {
+ _vdui = VerifyDUIterators;
+ _node = node;
+ _outcnt = node->_outcnt;
+ _del_tick = node->_del_tick;
+ _last = NULL;
+}
+
+void DUIterator_Common::verify(const Node* node, bool at_end_ok) {
+ assert(_node == node, "consistent iterator source");
+ assert(_del_tick == node->_del_tick, "no unexpected deletions allowed");
+}
+
+void DUIterator_Common::verify_resync() {
+ // Ensure that the loop body has just deleted the last guy produced.
+ const Node* node = _node;
+ // Ensure that at least one copy of the last-seen edge was deleted.
+ // Note: It is OK to delete multiple copies of the last-seen edge.
+ // Unfortunately, we have no way to verify that all the deletions delete
+ // that same edge. On this point we must use the Honor System.
+ assert(node->_del_tick >= _del_tick+1, "must have deleted an edge");
+ assert(node->_last_del == _last, "must have deleted the edge just produced");
+ // We liked this deletion, so accept the resulting outcnt and tick.
+ _outcnt = node->_outcnt;
+ _del_tick = node->_del_tick;
+}
+
+void DUIterator_Common::reset(const DUIterator_Common& that) {
+ if (this == &that) return; // ignore assignment to self
+ if (!_vdui) {
+ // We need to initialize everything, overwriting garbage values.
+ _last = that._last;
+ _vdui = that._vdui;
+ }
+ // Note: It is legal (though odd) for an iterator over some node x
+ // to be reassigned to iterate over another node y. Some doubly-nested
+ // progress loops depend on being able to do this.
+ const Node* node = that._node;
+ // Re-initialize everything, except _last.
+ _node = node;
+ _outcnt = node->_outcnt;
+ _del_tick = node->_del_tick;
+}
+
+void DUIterator::sample(const Node* node) {
+ DUIterator_Common::sample(node); // Initialize the assertion data.
+ _refresh_tick = 0; // No refreshes have happened, as yet.
+}
+
+void DUIterator::verify(const Node* node, bool at_end_ok) {
+ DUIterator_Common::verify(node, at_end_ok);
+ assert(_idx < node->_outcnt + (uint)at_end_ok, "idx in range");
+}
+
+void DUIterator::verify_increment() {
+ if (_refresh_tick & 1) {
+ // We have refreshed the index during this loop.
+ // Fix up _idx to meet asserts.
+ if (_idx > _outcnt) _idx = _outcnt;
+ }
+ verify(_node, true);
+}
+
+void DUIterator::verify_resync() {
+ // Note: We do not assert on _outcnt, because insertions are OK here.
+ DUIterator_Common::verify_resync();
+ // Make sure we are still in sync, possibly with no more out-edges:
+ verify(_node, true);
+}
+
+void DUIterator::reset(const DUIterator& that) {
+ if (this == &that) return; // self assignment is always a no-op
+ assert(that._refresh_tick == 0, "assign only the result of Node::outs()");
+ assert(that._idx == 0, "assign only the result of Node::outs()");
+ assert(_idx == that._idx, "already assigned _idx");
+ if (!_vdui) {
+ // We need to initialize everything, overwriting garbage values.
+ sample(that._node);
+ } else {
+ DUIterator_Common::reset(that);
+ if (_refresh_tick & 1) {
+ _refresh_tick++; // Clear the "was refreshed" flag.
+ }
+ assert(_refresh_tick < 2*100000, "DU iteration must converge quickly");
+ }
+}
+
+void DUIterator::refresh() {
+ DUIterator_Common::sample(_node); // Re-fetch assertion data.
+ _refresh_tick |= 1; // Set the "was refreshed" flag.
+}
+
+void DUIterator::verify_finish() {
+ // If the loop has killed the node, do not require it to re-run.
+ if (_node->_outcnt == 0) _refresh_tick &= ~1;
+ // If this assert triggers, it means that a loop used refresh_out_pos
+ // to re-synch an iteration index, but the loop did not correctly
+ // re-run itself, using a "while (progress)" construct.
+ // This iterator enforces the rule that you must keep trying the loop
+ // until it "runs clean" without any need for refreshing.
+ assert(!(_refresh_tick & 1), "the loop must run once with no refreshing");
+}
+
+
+void DUIterator_Fast::verify(const Node* node, bool at_end_ok) {
+ DUIterator_Common::verify(node, at_end_ok);
+ Node** out = node->_out;
+ uint cnt = node->_outcnt;
+ assert(cnt == _outcnt, "no insertions allowed");
+ assert(_outp >= out && _outp <= out + cnt - !at_end_ok, "outp in range");
+ // This last check is carefully designed to work for NO_OUT_ARRAY.
+}
+
+void DUIterator_Fast::verify_limit() {
+ const Node* node = _node;
+ verify(node, true);
+ assert(_outp == node->_out + node->_outcnt, "limit still correct");
+}
+
+void DUIterator_Fast::verify_resync() {
+ const Node* node = _node;
+ if (_outp == node->_out + _outcnt) {
+ // Note that the limit imax, not the pointer i, gets updated with the
+ // exact count of deletions. (For the pointer it's always "--i".)
+ assert(node->_outcnt+node->_del_tick == _outcnt+_del_tick, "no insertions allowed with deletion(s)");
+ // This is a limit pointer, with a name like "imax".
+ // Fudge the _last field so that the common assert will be happy.
+ _last = (Node*) node->_last_del;
+ DUIterator_Common::verify_resync();
+ } else {
+ assert(node->_outcnt < _outcnt, "no insertions allowed with deletion(s)");
+ // A normal internal pointer.
+ DUIterator_Common::verify_resync();
+ // Make sure we are still in sync, possibly with no more out-edges:
+ verify(node, true);
+ }
+}
+
+void DUIterator_Fast::verify_relimit(uint n) {
+ const Node* node = _node;
+ assert((int)n > 0, "use imax -= n only with a positive count");
+ // This must be a limit pointer, with a name like "imax".
+ assert(_outp == node->_out + node->_outcnt, "apply -= only to a limit (imax)");
+ // The reported number of deletions must match what the node saw.
+ assert(node->_del_tick == _del_tick + n, "must have deleted n edges");
+ // Fudge the _last field so that the common assert will be happy.
+ _last = (Node*) node->_last_del;
+ DUIterator_Common::verify_resync();
+}
+
+void DUIterator_Fast::reset(const DUIterator_Fast& that) {
+ assert(_outp == that._outp, "already assigned _outp");
+ DUIterator_Common::reset(that);
+}
+
+void DUIterator_Last::verify(const Node* node, bool at_end_ok) {
+ // at_end_ok means the _outp is allowed to underflow by 1
+ _outp += at_end_ok;
+ DUIterator_Fast::verify(node, at_end_ok); // check _del_tick, etc.
+ _outp -= at_end_ok;
+ assert(_outp == (node->_out + node->_outcnt) - 1, "pointer must point to end of nodes");
+}
+
+void DUIterator_Last::verify_limit() {
+ // Do not require the limit address to be resynched.
+ //verify(node, true);
+ assert(_outp == _node->_out, "limit still correct");
+}
+
+void DUIterator_Last::verify_step(uint num_edges) {
+ assert((int)num_edges > 0, "need non-zero edge count for loop progress");
+ _outcnt -= num_edges;
+ _del_tick += num_edges;
+ // Make sure we are still in sync, possibly with no more out-edges:
+ const Node* node = _node;
+ verify(node, true);
+ assert(node->_last_del == _last, "must have deleted the edge just produced");
+}
+
+#endif //OPTO_DU_ITERATOR_ASSERT
+
+
+#endif //ASSERT
+
+
+// This constant used to initialize _out may be any non-null value.
+// The value NULL is reserved for the top node only.
+#define NO_OUT_ARRAY ((Node**)-1)
+
+// This funny expression handshakes with Node::operator new
+// to pull Compile::current out of the new node's _out field,
+// and then calls a subroutine which manages most field
+// initializations. The only one which is tricky is the
+// _idx field, which is const, and so must be initialized
+// by a return value, not an assignment.
+//
+// (Aren't you thankful that Java finals don't require so many tricks?)
+#define IDX_INIT(req) this->Init((req), (Compile*) this->_out)
+#ifdef _MSC_VER // the IDX_INIT hack falls foul of warning C4355
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif
+
+// Out-of-line code from node constructors.
+// Executed only when extra debug info. is being passed around.
+static void init_node_notes(Compile* C, int idx, Node_Notes* nn) {
+ C->set_node_notes_at(idx, nn);
+}
+
+// Shared initialization code.
+inline int Node::Init(int req, Compile* C) {
+ assert(Compile::current() == C, "must use operator new(Compile*)");
+ int idx = C->next_unique();
+
+ // If there are default notes floating around, capture them:
+ Node_Notes* nn = C->default_node_notes();
+ if (nn != NULL) init_node_notes(C, idx, nn);
+
+ // Note: At this point, C is dead,
+ // and we begin to initialize the new Node.
+
+ _cnt = _max = req;
+ _outcnt = _outmax = 0;
+ _class_id = Class_Node;
+ _flags = 0;
+ _out = NO_OUT_ARRAY;
+ return idx;
+}
+
+//------------------------------Node-------------------------------------------
+// Create a Node, with a given number of required edges.
+Node::Node(uint req)
+ : _idx(IDX_INIT(req))
+{
+ assert( req < (uint)(MaxNodeLimit - NodeLimitFudgeFactor), "Input limit exceeded" );
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ if (req == 0) {
+ assert( _in == (Node**)this, "Must not pass arg count to 'new'" );
+ _in = NULL;
+ } else {
+ assert( _in[req-1] == this, "Must pass arg count to 'new'" );
+ Node** to = _in;
+ for(uint i = 0; i < req; i++) {
+ to[i] = NULL;
+ }
+ }
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0)
+ : _idx(IDX_INIT(1))
+{
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Assert we allocated space for input array already
+ assert( _in[0] == this, "Must pass arg count to 'new'" );
+ assert( is_not_dead(n0), "can not use dead node");
+ _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1)
+ : _idx(IDX_INIT(2))
+{
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Assert we allocated space for input array already
+ assert( _in[1] == this, "Must pass arg count to 'new'" );
+ assert( is_not_dead(n0), "can not use dead node");
+ assert( is_not_dead(n1), "can not use dead node");
+ _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+ _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2)
+ : _idx(IDX_INIT(3))
+{
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Assert we allocated space for input array already
+ assert( _in[2] == this, "Must pass arg count to 'new'" );
+ assert( is_not_dead(n0), "can not use dead node");
+ assert( is_not_dead(n1), "can not use dead node");
+ assert( is_not_dead(n2), "can not use dead node");
+ _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+ _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+ _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3)
+ : _idx(IDX_INIT(4))
+{
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Assert we allocated space for input array already
+ assert( _in[3] == this, "Must pass arg count to 'new'" );
+ assert( is_not_dead(n0), "can not use dead node");
+ assert( is_not_dead(n1), "can not use dead node");
+ assert( is_not_dead(n2), "can not use dead node");
+ assert( is_not_dead(n3), "can not use dead node");
+ _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+ _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+ _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+ _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3, Node *n4)
+ : _idx(IDX_INIT(5))
+{
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Assert we allocated space for input array already
+ assert( _in[4] == this, "Must pass arg count to 'new'" );
+ assert( is_not_dead(n0), "can not use dead node");
+ assert( is_not_dead(n1), "can not use dead node");
+ assert( is_not_dead(n2), "can not use dead node");
+ assert( is_not_dead(n3), "can not use dead node");
+ assert( is_not_dead(n4), "can not use dead node");
+ _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+ _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+ _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+ _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+ _in[4] = n4; if (n4 != NULL) n4->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3,
+ Node *n4, Node *n5)
+ : _idx(IDX_INIT(6))
+{
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Assert we allocated space for input array already
+ assert( _in[5] == this, "Must pass arg count to 'new'" );
+ assert( is_not_dead(n0), "can not use dead node");
+ assert( is_not_dead(n1), "can not use dead node");
+ assert( is_not_dead(n2), "can not use dead node");
+ assert( is_not_dead(n3), "can not use dead node");
+ assert( is_not_dead(n4), "can not use dead node");
+ assert( is_not_dead(n5), "can not use dead node");
+ _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+ _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+ _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+ _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+ _in[4] = n4; if (n4 != NULL) n4->add_out((Node *)this);
+ _in[5] = n5; if (n5 != NULL) n5->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3,
+ Node *n4, Node *n5, Node *n6)
+ : _idx(IDX_INIT(7))
+{
+ debug_only( verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Assert we allocated space for input array already
+ assert( _in[6] == this, "Must pass arg count to 'new'" );
+ assert( is_not_dead(n0), "can not use dead node");
+ assert( is_not_dead(n1), "can not use dead node");
+ assert( is_not_dead(n2), "can not use dead node");
+ assert( is_not_dead(n3), "can not use dead node");
+ assert( is_not_dead(n4), "can not use dead node");
+ assert( is_not_dead(n5), "can not use dead node");
+ assert( is_not_dead(n6), "can not use dead node");
+ _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+ _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+ _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+ _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+ _in[4] = n4; if (n4 != NULL) n4->add_out((Node *)this);
+ _in[5] = n5; if (n5 != NULL) n5->add_out((Node *)this);
+ _in[6] = n6; if (n6 != NULL) n6->add_out((Node *)this);
+}
+
+
+//------------------------------clone------------------------------------------
+// Clone a Node.
+Node *Node::clone() const {
+ Compile *compile = Compile::current();
+ uint s = size_of(); // Size of inherited Node
+ Node *n = (Node*)compile->node_arena()->Amalloc_D(size_of() + _max*sizeof(Node*));
+ Copy::conjoint_words_to_lower((HeapWord*)this, (HeapWord*)n, s);
+ // Set the new input pointer array
+ n->_in = (Node**)(((char*)n)+s);
+ // Cannot share the old output pointer array, so kill it
+ n->_out = NO_OUT_ARRAY;
+ // And reset the counters to 0
+ n->_outcnt = 0;
+ n->_outmax = 0;
+ // Unlock this guy, since he is not in any hash table.
+ debug_only(n->_hash_lock = 0);
+ // Walk the old node's input list to duplicate its edges
+ uint i;
+ for( i = 0; i < len(); i++ ) {
+ Node *x = in(i);
+ n->_in[i] = x;
+ if (x != NULL) x->add_out(n);
+ }
+ if (is_macro())
+ compile->add_macro_node(n);
+
+ n->set_idx(compile->next_unique()); // Get new unique index as well
+ debug_only( n->verify_construction() );
+ NOT_PRODUCT(nodes_created++);
+ // Do not patch over the debug_idx of a clone, because it makes it
+ // impossible to break on the clone's moment of creation.
+ //debug_only( n->set_debug_idx( debug_idx() ) );
+
+ compile->copy_node_notes_to(n, (Node*) this);
+
+ // MachNode clone
+ uint nopnds;
+ if (this->is_Mach() && (nopnds = this->as_Mach()->num_opnds()) > 0) {
+ MachNode *mach = n->as_Mach();
+ MachNode *mthis = this->as_Mach();
+ // Get address of _opnd_array.
+ // It should be the same offset since it is the clone of this node.
+ MachOper **from = mthis->_opnds;
+ MachOper **to = (MachOper **)((size_t)(&mach->_opnds) +
+ pointer_delta((const void*)from,
+ (const void*)(&mthis->_opnds), 1));
+ mach->_opnds = to;
+ for ( uint i = 0; i < nopnds; ++i ) {
+ to[i] = from[i]->clone(compile);
+ }
+ }
+ // cloning CallNode may need to clone JVMState
+ if (n->is_Call()) {
+ CallNode *call = n->as_Call();
+ call->clone_jvms();
+ }
+ return n; // Return the clone
+}
+
+//---------------------------setup_is_top--------------------------------------
+// Call this when changing the top node, to reassert the invariants
+// required by Node::is_top. See Compile::set_cached_top_node.
+void Node::setup_is_top() {
+ if (this == (Node*)Compile::current()->top()) {
+ // This node has just become top. Kill its out array.
+ _outcnt = _outmax = 0;
+ _out = NULL; // marker value for top
+ assert(is_top(), "must be top");
+ } else {
+ if (_out == NULL) _out = NO_OUT_ARRAY;
+ assert(!is_top(), "must not be top");
+ }
+}
+
+
+//------------------------------~Node------------------------------------------
+// Fancy destructor; eagerly attempt to reclaim Node numberings and storage
+extern int reclaim_idx ;
+extern int reclaim_in ;
+extern int reclaim_node;
+void Node::destruct() {
+ // Eagerly reclaim unique Node numberings
+ Compile* compile = Compile::current();
+ if ((uint)_idx+1 == compile->unique()) {
+ compile->set_unique(compile->unique()-1);
+#ifdef ASSERT
+ reclaim_idx++;
+#endif
+ }
+ // Clear debug info:
+ Node_Notes* nn = compile->node_notes_at(_idx);
+ if (nn != NULL) nn->clear();
+ // Walk the input array, freeing the corresponding output edges
+ _cnt = _max; // forget req/prec distinction
+ uint i;
+ for( i = 0; i < _max; i++ ) {
+ set_req(i, NULL);
+ //assert(def->out(def->outcnt()-1) == (Node *)this,"bad def-use hacking in reclaim");
+ }
+ assert(outcnt() == 0, "deleting a node must not leave a dangling use");
+ // See if the input array was allocated just prior to the object
+ int edge_size = _max*sizeof(void*);
+ int out_edge_size = _outmax*sizeof(void*);
+ char *edge_end = ((char*)_in) + edge_size;
+ char *out_array = (char*)(_out == NO_OUT_ARRAY? NULL: _out);
+ char *out_edge_end = out_array + out_edge_size;
+ int node_size = size_of();
+
+ // Free the output edge array
+ if (out_edge_size > 0) {
+#ifdef ASSERT
+ if( out_edge_end == compile->node_arena()->hwm() )
+ reclaim_in += out_edge_size; // count reclaimed out edges with in edges
+#endif
+ compile->node_arena()->Afree(out_array, out_edge_size);
+ }
+
+ // Free the input edge array and the node itself
+ if( edge_end == (char*)this ) {
+#ifdef ASSERT
+ if( edge_end+node_size == compile->node_arena()->hwm() ) {
+ reclaim_in += edge_size;
+ reclaim_node+= node_size;
+ }
+#else
+ // It was; free the input array and object all in one hit
+ compile->node_arena()->Afree(_in,edge_size+node_size);
+#endif
+ } else {
+
+ // Free just the input array
+#ifdef ASSERT
+ if( edge_end == compile->node_arena()->hwm() )
+ reclaim_in += edge_size;
+#endif
+ compile->node_arena()->Afree(_in,edge_size);
+
+ // Free just the object
+#ifdef ASSERT
+ if( ((char*)this) + node_size == compile->node_arena()->hwm() )
+ reclaim_node+= node_size;
+#else
+ compile->node_arena()->Afree(this,node_size);
+#endif
+ }
+ if (is_macro()) {
+ compile->remove_macro_node(this);
+ }
+#ifdef ASSERT
+ // We will not actually delete the storage, but we'll make the node unusable.
+ *(address*)this = badAddress; // smash the C++ vtbl, probably
+ _in = _out = (Node**) badAddress;
+ _max = _cnt = _outmax = _outcnt = 0;
+#endif
+}
+
+//------------------------------grow-------------------------------------------
+// Grow the input array, making space for more edges
+void Node::grow( uint len ) {
+ Arena* arena = Compile::current()->node_arena();
+ uint new_max = _max;
+ if( new_max == 0 ) {
+ _max = 4;
+ _in = (Node**)arena->Amalloc(4*sizeof(Node*));
+ Node** to = _in;
+ to[0] = NULL;
+ to[1] = NULL;
+ to[2] = NULL;
+ to[3] = NULL;
+ return;
+ }
+ while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+ // Trimming to limit allows a uint8 to handle up to 255 edges.
+ // Previously I was using only powers-of-2 which peaked at 128 edges.
+ //if( new_max >= limit ) new_max = limit-1;
+ _in = (Node**)arena->Arealloc(_in, _max*sizeof(Node*), new_max*sizeof(Node*));
+ Copy::zero_to_bytes(&_in[_max], (new_max-_max)*sizeof(Node*)); // NULL all new space
+ _max = new_max; // Record new max length
+ // This assertion makes sure that Node::_max is wide enough to
+ // represent the numerical value of new_max.
+ assert(_max == new_max && _max > len, "int width of _max is too small");
+}
+
+//-----------------------------out_grow----------------------------------------
+// Grow the input array, making space for more edges
+void Node::out_grow( uint len ) {
+ assert(!is_top(), "cannot grow a top node's out array");
+ Arena* arena = Compile::current()->node_arena();
+ uint new_max = _outmax;
+ if( new_max == 0 ) {
+ _outmax = 4;
+ _out = (Node **)arena->Amalloc(4*sizeof(Node*));
+ return;
+ }
+ while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+ // Trimming to limit allows a uint8 to handle up to 255 edges.
+ // Previously I was using only powers-of-2 which peaked at 128 edges.
+ //if( new_max >= limit ) new_max = limit-1;
+ assert(_out != NULL && _out != NO_OUT_ARRAY, "out must have sensible value");
+ _out = (Node**)arena->Arealloc(_out,_outmax*sizeof(Node*),new_max*sizeof(Node*));
+ //Copy::zero_to_bytes(&_out[_outmax], (new_max-_outmax)*sizeof(Node*)); // NULL all new space
+ _outmax = new_max; // Record new max length
+ // This assertion makes sure that Node::_max is wide enough to
+ // represent the numerical value of new_max.
+ assert(_outmax == new_max && _outmax > len, "int width of _outmax is too small");
+}
+
+#ifdef ASSERT
+//------------------------------is_dead----------------------------------------
+bool Node::is_dead() const {
+ // Mach and pinch point nodes may look like dead.
+ if( is_top() || is_Mach() || (Opcode() == Op_Node && _outcnt > 0) )
+ return false;
+ for( uint i = 0; i < _max; i++ )
+ if( _in[i] != NULL )
+ return false;
+ dump();
+ return true;
+}
+#endif
+
+//------------------------------add_req----------------------------------------
+// Add a new required input at the end
+void Node::add_req( Node *n ) {
+ assert( is_not_dead(n), "can not use dead node");
+
+ // Look to see if I can move precedence down one without reallocating
+ if( (_cnt >= _max) || (in(_max-1) != NULL) )
+ grow( _max+1 );
+
+ // Find a precedence edge to move
+ if( in(_cnt) != NULL ) { // Next precedence edge is busy?
+ uint i;
+ for( i=_cnt; i<_max; i++ )
+ if( in(i) == NULL ) // Find the NULL at end of prec edge list
+ break; // There must be one, since we grew the array
+ _in[i] = in(_cnt); // Move prec over, making space for req edge
+ }
+ _in[_cnt++] = n; // Stuff over old prec edge
+ if (n != NULL) n->add_out((Node *)this);
+}
+
+//---------------------------add_req_batch-------------------------------------
+// Add a new required input at the end
+void Node::add_req_batch( Node *n, uint m ) {
+ assert( is_not_dead(n), "can not use dead node");
+ // check various edge cases
+ if ((int)m <= 1) {
+ assert((int)m >= 0, "oob");
+ if (m != 0) add_req(n);
+ return;
+ }
+
+ // Look to see if I can move precedence down one without reallocating
+ if( (_cnt+m) > _max || _in[_max-m] )
+ grow( _max+m );
+
+ // Find a precedence edge to move
+ if( _in[_cnt] != NULL ) { // Next precedence edge is busy?
+ uint i;
+ for( i=_cnt; i<_max; i++ )
+ if( _in[i] == NULL ) // Find the NULL at end of prec edge list
+ break; // There must be one, since we grew the array
+ // Slide all the precs over by m positions (assume #prec << m).
+ Copy::conjoint_words_to_higher((HeapWord*)&_in[_cnt], (HeapWord*)&_in[_cnt+m], ((i-_cnt)*sizeof(Node*)));
+ }
+
+ // Stuff over the old prec edges
+ for(uint i=0; i<m; i++ ) {
+ _in[_cnt++] = n;
+ }
+
+ // Insert multiple out edges on the node.
+ if (n != NULL && !n->is_top()) {
+ for(uint i=0; i<m; i++ ) {
+ n->add_out((Node *)this);
+ }
+ }
+}
+
+//------------------------------del_req----------------------------------------
+// Delete the required edge and compact the edge array
+void Node::del_req( uint idx ) {
+ // First remove corresponding def-use edge
+ Node *n = in(idx);
+ if (n != NULL) n->del_out((Node *)this);
+ _in[idx] = in(--_cnt); // Compact the array
+ _in[_cnt] = NULL; // NULL out emptied slot
+}
+
+//------------------------------ins_req----------------------------------------
+// Insert a new required input at the end
+void Node::ins_req( uint idx, Node *n ) {
+ assert( is_not_dead(n), "can not use dead node");
+ add_req(NULL); // Make space
+ assert( idx < _max, "Must have allocated enough space");
+ // Slide over
+ if(_cnt-idx-1 > 0) {
+ Copy::conjoint_words_to_higher((HeapWord*)&_in[idx], (HeapWord*)&_in[idx+1], ((_cnt-idx-1)*sizeof(Node*)));
+ }
+ _in[idx] = n; // Stuff over old required edge
+ if (n != NULL) n->add_out((Node *)this); // Add reciprocal def-use edge
+}
+
+//-----------------------------find_edge---------------------------------------
+int Node::find_edge(Node* n) {
+ for (uint i = 0; i < len(); i++) {
+ if (_in[i] == n) return i;
+ }
+ return -1;
+}
+
+//----------------------------replace_edge-------------------------------------
+int Node::replace_edge(Node* old, Node* neww) {
+ if (old == neww) return 0; // nothing to do
+ uint nrep = 0;
+ for (uint i = 0; i < len(); i++) {
+ if (in(i) == old) {
+ if (i < req())
+ set_req(i, neww);
+ else
+ set_prec(i, neww);
+ nrep++;
+ }
+ }
+ return nrep;
+}
+
+//-------------------------disconnect_inputs-----------------------------------
+// NULL out all inputs to eliminate incoming Def-Use edges.
+// Return the number of edges between 'n' and 'this'
+int Node::disconnect_inputs(Node *n) {
+ int edges_to_n = 0;
+
+ uint cnt = req();
+ for( uint i = 0; i < cnt; ++i ) {
+ if( in(i) == 0 ) continue;
+ if( in(i) == n ) ++edges_to_n;
+ set_req(i, NULL);
+ }
+ // Remove precedence edges if any exist
+ // Note: Safepoints may have precedence edges, even during parsing
+ if( (req() != len()) && (in(req()) != NULL) ) {
+ uint max = len();
+ for( uint i = 0; i < max; ++i ) {
+ if( in(i) == 0 ) continue;
+ if( in(i) == n ) ++edges_to_n;
+ set_prec(i, NULL);
+ }
+ }
+
+ // Node::destruct requires all out edges be deleted first
+ // debug_only(destruct();) // no reuse benefit expected
+ return edges_to_n;
+}
+
+//-----------------------------uncast---------------------------------------
+// %%% Temporary, until we sort out CheckCastPP vs. CastPP.
+// Strip away casting. (It is depth-limited.)
+Node* Node::uncast() const {
+ // Should be inline:
+ //return is_ConstraintCast() ? uncast_helper(this) : (Node*) this;
+ if (is_ConstraintCast() ||
+ (is_Type() && req() == 2 && Opcode() == Op_CheckCastPP))
+ return uncast_helper(this);
+ else
+ return (Node*) this;
+}
+
+//---------------------------uncast_helper-------------------------------------
+Node* Node::uncast_helper(const Node* p) {
+ uint max_depth = 3;
+ for (uint i = 0; i < max_depth; i++) {
+ if (p == NULL || p->req() != 2) {
+ break;
+ } else if (p->is_ConstraintCast()) {
+ p = p->in(1);
+ } else if (p->is_Type() && p->Opcode() == Op_CheckCastPP) {
+ p = p->in(1);
+ } else {
+ break;
+ }
+ }
+ return (Node*) p;
+}
+
+//------------------------------add_prec---------------------------------------
+// Add a new precedence input. Precedence inputs are unordered, with
+// duplicates removed and NULLs packed down at the end.
+void Node::add_prec( Node *n ) {
+ assert( is_not_dead(n), "can not use dead node");
+
+ // Check for NULL at end
+ if( _cnt >= _max || in(_max-1) )
+ grow( _max+1 );
+
+ // Find a precedence edge to move
+ uint i = _cnt;
+ while( in(i) != NULL ) i++;
+ _in[i] = n; // Stuff prec edge over NULL
+ if ( n != NULL) n->add_out((Node *)this); // Add mirror edge
+}
+
+//------------------------------rm_prec----------------------------------------
+// Remove a precedence input. Precedence inputs are unordered, with
+// duplicates removed and NULLs packed down at the end.
+void Node::rm_prec( uint j ) {
+
+ // Find end of precedence list to pack NULLs
+ uint i;
+ for( i=j; i<_max; i++ )
+ if( !_in[i] ) // Find the NULL at end of prec edge list
+ break;
+ if (_in[j] != NULL) _in[j]->del_out((Node *)this);
+ _in[j] = _in[--i]; // Move last element over removed guy
+ _in[i] = NULL; // NULL out last element
+}
+
+//------------------------------size_of----------------------------------------
+uint Node::size_of() const { return sizeof(*this); }
+
+//------------------------------ideal_reg--------------------------------------
+uint Node::ideal_reg() const { return 0; }
+
+//------------------------------jvms-------------------------------------------
+JVMState* Node::jvms() const { return NULL; }
+
+#ifdef ASSERT
+//------------------------------jvms-------------------------------------------
+bool Node::verify_jvms(const JVMState* using_jvms) const {
+ for (JVMState* jvms = this->jvms(); jvms != NULL; jvms = jvms->caller()) {
+ if (jvms == using_jvms) return true;
+ }
+ return false;
+}
+
+//------------------------------init_NodeProperty------------------------------
+void Node::init_NodeProperty() {
+ assert(_max_classes <= max_jushort, "too many NodeProperty classes");
+ assert(_max_flags <= max_jushort, "too many NodeProperty flags");
+}
+#endif
+
+//------------------------------format-----------------------------------------
+// Print as assembly
+void Node::format( PhaseRegAlloc *, outputStream *st ) const {}
+//------------------------------emit-------------------------------------------
+// Emit bytes starting at parameter 'ptr'.
+void Node::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {}
+//------------------------------size-------------------------------------------
+// Size of instruction in bytes
+uint Node::size(PhaseRegAlloc *ra_) const { return 0; }
+
+//------------------------------CFG Construction-------------------------------
+// Nodes that end basic blocks, e.g. IfTrue/IfFalse, JumpProjNode, Root,
+// Goto and Return.
+const Node *Node::is_block_proj() const { return 0; }
+
+// Minimum guaranteed type
+const Type *Node::bottom_type() const { return Type::BOTTOM; }
+
+
+//------------------------------raise_bottom_type------------------------------
+// Get the worst-case Type output for this Node.
+void Node::raise_bottom_type(const Type* new_type) {
+ if (is_Type()) {
+ TypeNode *n = this->as_Type();
+ if (VerifyAliases) {
+ assert(new_type->higher_equal(n->type()), "new type must refine old type");
+ }
+ n->set_type(new_type);
+ } else if (is_Load()) {
+ LoadNode *n = this->as_Load();
+ if (VerifyAliases) {
+ assert(new_type->higher_equal(n->type()), "new type must refine old type");
+ }
+ n->set_type(new_type);
+ }
+}
+
+//------------------------------Identity---------------------------------------
+// Return a node that the given node is equivalent to.
+Node *Node::Identity( PhaseTransform * ) {
+ return this; // Default to no identities
+}
+
+//------------------------------Value------------------------------------------
+// Compute a new Type for a node using the Type of the inputs.
+const Type *Node::Value( PhaseTransform * ) const {
+ return bottom_type(); // Default to worst-case Type
+}
+
+//------------------------------Ideal------------------------------------------
+//
+// 'Idealize' the graph rooted at this Node.
+//
+// In order to be efficient and flexible there are some subtle invariants
+// these Ideal calls need to hold. Running with '+VerifyIterativeGVN' checks
+// these invariants, although its too slow to have on by default. If you are
+// hacking an Ideal call, be sure to test with +VerifyIterativeGVN!
+//
+// The Ideal call almost arbitrarily reshape the graph rooted at the 'this'
+// pointer. If ANY change is made, it must return the root of the reshaped
+// graph - even if the root is the same Node. Example: swapping the inputs
+// to an AddINode gives the same answer and same root, but you still have to
+// return the 'this' pointer instead of NULL.
+//
+// You cannot return an OLD Node, except for the 'this' pointer. Use the
+// Identity call to return an old Node; basically if Identity can find
+// another Node have the Ideal call make no change and return NULL.
+// Example: AddINode::Ideal must check for add of zero; in this case it
+// returns NULL instead of doing any graph reshaping.
+//
+// You cannot modify any old Nodes except for the 'this' pointer. Due to
+// sharing there may be other users of the old Nodes relying on their current
+// semantics. Modifying them will break the other users.
+// Example: when reshape "(X+3)+4" into "X+7" you must leave the Node for
+// "X+3" unchanged in case it is shared.
+//
+// If you modify the 'this' pointer's inputs, you must use 'set_req' with
+// def-use info. If you are making a new Node (either as the new root or
+// some new internal piece) you must NOT use set_req with def-use info.
+// You can make a new Node with either 'new' or 'clone'. In either case,
+// def-use info is (correctly) not generated.
+// Example: reshape "(X+3)+4" into "X+7":
+// set_req(1,in(1)->in(1) /* grab X */, du /* must use DU on 'this' */);
+// set_req(2,phase->intcon(7),du);
+// return this;
+// Example: reshape "X*4" into "X<<1"
+// return new (C,3) LShiftINode( in(1), phase->intcon(1) );
+//
+// You must call 'phase->transform(X)' on any new Nodes X you make, except
+// for the returned root node. Example: reshape "X*31" with "(X<<5)-1".
+// Node *shift=phase->transform(new(C,3)LShiftINode(in(1),phase->intcon(5)));
+// return new (C,3) AddINode(shift, phase->intcon(-1));
+//
+// When making a Node for a constant use 'phase->makecon' or 'phase->intcon'.
+// These forms are faster than 'phase->transform(new (C,1) ConNode())' and Do
+// The Right Thing with def-use info.
+//
+// You cannot bury the 'this' Node inside of a graph reshape. If the reshaped
+// graph uses the 'this' Node it must be the root. If you want a Node with
+// the same Opcode as the 'this' pointer use 'clone'.
+//
+Node *Node::Ideal(PhaseGVN *phase, bool can_reshape) {
+ return NULL; // Default to being Ideal already
+}
+
+// Some nodes have specific Ideal subgraph transformations only if they are
+// unique users of specific nodes. Such nodes should be put on IGVN worklist
+// for the transformations to happen.
+bool Node::has_special_unique_user() const {
+ assert(outcnt() == 1, "match only for unique out");
+ Node* n = unique_out();
+ int op = Opcode();
+ if( this->is_Store() ) {
+ // Condition for back-to-back stores folding.
+ return n->Opcode() == op && n->in(MemNode::Memory) == this;
+ } else if( op == Op_AddL ) {
+ // Condition for convL2I(addL(x,y)) ==> addI(convL2I(x),convL2I(y))
+ return n->Opcode() == Op_ConvL2I && n->in(1) == this;
+ } else if( op == Op_SubI || op == Op_SubL ) {
+ // Condition for subI(x,subI(y,z)) ==> subI(addI(x,z),y)
+ return n->Opcode() == op && n->in(2) == this;
+ }
+ return false;
+};
+
+//------------------------------remove_dead_region-----------------------------
+// This control node is dead. Follow the subgraph below it making everything
+// using it dead as well. This will happen normally via the usual IterGVN
+// worklist but this call is more efficient. Do not update use-def info
+// inside the dead region, just at the borders.
+static bool kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
+ // Con's are a popular node to re-hit in the hash table again.
+ if( dead->is_Con() ) return false;
+
+ // Can't put ResourceMark here since igvn->_worklist uses the same arena
+ // for verify pass with +VerifyOpto and we add/remove elements in it here.
+ Node_List nstack(Thread::current()->resource_area());
+
+ Node *top = igvn->C->top();
+ bool progress = false;
+ nstack.push(dead);
+
+ while (nstack.size() > 0) {
+ dead = nstack.pop();
+ if (dead->outcnt() > 0) {
+ // Keep dead node on stack until all uses are processed.
+ nstack.push(dead);
+ // For all Users of the Dead... ;-)
+ for (DUIterator_Last kmin, k = dead->last_outs(kmin); k >= kmin; ) {
+ Node* use = dead->last_out(k);
+ igvn->hash_delete(use); // Yank from hash table prior to mod
+ if (use->in(0) == dead) { // Found another dead node
+ assert (!use->is_Con(), "Control for Con node should be Root node.")
+ use->set_req(0, top); // Cut dead edge to prevent processing
+ nstack.push(use); // the dead node again.
+ } else { // Else found a not-dead user
+ for (uint j = 1; j < use->req(); j++) {
+ if (use->in(j) == dead) { // Turn all dead inputs into TOP
+ use->set_req(j, top);
+ }
+ }
+ igvn->_worklist.push(use);
+ }
+ // Refresh the iterator, since any number of kills might have happened.
+ k = dead->last_outs(kmin);
+ }
+ } else { // (dead->outcnt() == 0)
+ // Done with outputs.
+ igvn->hash_delete(dead);
+ igvn->_worklist.remove(dead);
+ igvn->set_type(dead, Type::TOP);
+ if (dead->is_macro()) {
+ igvn->C->remove_macro_node(dead);
+ }
+ // Kill all inputs to the dead guy
+ for (uint i=0; i < dead->req(); i++) {
+ Node *n = dead->in(i); // Get input to dead guy
+ if (n != NULL && !n->is_top()) { // Input is valid?
+ progress = true;
+ dead->set_req(i, top); // Smash input away
+ if (n->outcnt() == 0) { // Input also goes dead?
+ if (!n->is_Con())
+ nstack.push(n); // Clear it out as well
+ } else if (n->outcnt() == 1 &&
+ n->has_special_unique_user()) {
+ igvn->add_users_to_worklist( n );
+ } else if (n->outcnt() <= 2 && n->is_Store()) {
+ // Push store's uses on worklist to enable folding optimization for
+ // store/store and store/load to the same address.
+ // The restriction (outcnt() <= 2) is the same as in set_req_X()
+ // and remove_globally_dead_node().
+ igvn->add_users_to_worklist( n );
+ }
+ }
+ }
+ } // (dead->outcnt() == 0)
+ } // while (nstack.size() > 0) for outputs
+ return progress;
+}
+
+//------------------------------remove_dead_region-----------------------------
+bool Node::remove_dead_region(PhaseGVN *phase, bool can_reshape) {
+ Node *n = in(0);
+ if( !n ) return false;
+ // Lost control into this guy? I.e., it became unreachable?
+ // Aggressively kill all unreachable code.
+ if (can_reshape && n->is_top()) {
+ return kill_dead_code(this, phase->is_IterGVN());
+ }
+
+ if( n->is_Region() && n->as_Region()->is_copy() ) {
+ Node *m = n->nonnull_req();
+ set_req(0, m);
+ return true;
+ }
+ return false;
+}
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// Idealize graph, using DU info. Must clone result into new-space
+Node *Node::Ideal_DU_postCCP( PhaseCCP * ) {
+ return NULL; // Default to no change
+}
+
+//------------------------------hash-------------------------------------------
+// Hash function over Nodes.
+uint Node::hash() const {
+ uint sum = 0;
+ for( uint i=0; i<_cnt; i++ ) // Add in all inputs
+ sum = (sum<<1)-(uintptr_t)in(i); // Ignore embedded NULLs
+ return (sum>>2) + _cnt + Opcode();
+}
+
+//------------------------------cmp--------------------------------------------
+// Compare special parts of simple Nodes
+uint Node::cmp( const Node &n ) const {
+ return 1; // Must be same
+}
+
+//------------------------------rematerialize-----------------------------------
+// Should we clone rather than spill this instruction?
+bool Node::rematerialize() const {
+ if ( is_Mach() )
+ return this->as_Mach()->rematerialize();
+ else
+ return (_flags & Flag_rematerialize) != 0;
+}
+
+//------------------------------needs_anti_dependence_check---------------------
+// Nodes which use memory without consuming it, hence need antidependences.
+bool Node::needs_anti_dependence_check() const {
+ if( req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0 )
+ return false;
+ else
+ return in(1)->bottom_type()->has_memory();
+}
+
+
+// Get an integer constant from a ConNode (or CastIINode).
+// Return a default value if there is no apparent constant here.
+const TypeInt* Node::find_int_type() const {
+ if (this->is_Type()) {
+ return this->as_Type()->type()->isa_int();
+ } else if (this->is_Con()) {
+ assert(is_Mach(), "should be ConNode(TypeNode) or else a MachNode");
+ return this->bottom_type()->isa_int();
+ }
+ return NULL;
+}
+
+// Get a pointer constant from a ConstNode.
+// Returns the constant if it is a pointer ConstNode
+intptr_t Node::get_ptr() const {
+ assert( Opcode() == Op_ConP, "" );
+ return ((ConPNode*)this)->type()->is_ptr()->get_con();
+}
+
+// Get a long constant from a ConNode.
+// Return a default value if there is no apparent constant here.
+const TypeLong* Node::find_long_type() const {
+ if (this->is_Type()) {
+ return this->as_Type()->type()->isa_long();
+ } else if (this->is_Con()) {
+ assert(is_Mach(), "should be ConNode(TypeNode) or else a MachNode");
+ return this->bottom_type()->isa_long();
+ }
+ return NULL;
+}
+
+// Get a double constant from a ConstNode.
+// Returns the constant if it is a double ConstNode
+jdouble Node::getd() const {
+ assert( Opcode() == Op_ConD, "" );
+ return ((ConDNode*)this)->type()->is_double_constant()->getd();
+}
+
+// Get a float constant from a ConstNode.
+// Returns the constant if it is a float ConstNode
+jfloat Node::getf() const {
+ assert( Opcode() == Op_ConF, "" );
+ return ((ConFNode*)this)->type()->is_float_constant()->getf();
+}
+
+#ifndef PRODUCT
+
+//----------------------------NotANode----------------------------------------
+// Used in debugging code to avoid walking across dead or uninitialized edges.
+static inline bool NotANode(const Node* n) {
+ if (n == NULL) return true;
+ if (((intptr_t)n & 1) != 0) return true; // uninitialized, etc.
+ if (*(address*)n == badAddress) return true; // kill by Node::destruct
+ return false;
+}
+
+
+//------------------------------find------------------------------------------
+// Find a neighbor of this Node with the given _idx
+// If idx is negative, find its absolute value, following both _in and _out.
+static void find_recur( Node* &result, Node *n, int idx, bool only_ctrl,
+ VectorSet &old_space, VectorSet &new_space ) {
+ int node_idx = (idx >= 0) ? idx : -idx;
+ if (NotANode(n)) return; // Gracefully handle NULL, -1, 0xabababab, etc.
+ // Contained in new_space or old_space?
+ VectorSet *v = Compile::current()->node_arena()->contains(n) ? &new_space : &old_space;
+ if( v->test(n->_idx) ) return;
+ if( (int)n->_idx == node_idx
+ debug_only(|| n->debug_idx() == node_idx) ) {
+ if (result != NULL)
+ tty->print("find: " INTPTR_FORMAT " and " INTPTR_FORMAT " both have idx==%d\n",
+ (uintptr_t)result, (uintptr_t)n, node_idx);
+ result = n;
+ }
+ v->set(n->_idx);
+ for( uint i=0; i<n->len(); i++ ) {
+ if( only_ctrl && !(n->is_Region()) && (n->Opcode() != Op_Root) && (i != TypeFunc::Control) ) continue;
+ find_recur( result, n->in(i), idx, only_ctrl, old_space, new_space );
+ }
+ // Search along forward edges also:
+ if (idx < 0 && !only_ctrl) {
+ for( uint j=0; j<n->outcnt(); j++ ) {
+ find_recur( result, n->raw_out(j), idx, only_ctrl, old_space, new_space );
+ }
+ }
+#ifdef ASSERT
+ // Search along debug_orig edges last:
+ for (Node* orig = n->debug_orig(); orig != NULL; orig = orig->debug_orig()) {
+ if (NotANode(orig)) break;
+ find_recur( result, orig, idx, only_ctrl, old_space, new_space );
+ }
+#endif //ASSERT
+}
+
+// call this from debugger:
+Node* find_node(Node* n, int idx) {
+ return n->find(idx);
+}
+
+//------------------------------find-------------------------------------------
+Node* Node::find(int idx) const {
+ ResourceArea *area = Thread::current()->resource_area();
+ VectorSet old_space(area), new_space(area);
+ Node* result = NULL;
+ find_recur( result, (Node*) this, idx, false, old_space, new_space );
+ return result;
+}
+
+//------------------------------find_ctrl--------------------------------------
+// Find an ancestor to this node in the control history with given _idx
+Node* Node::find_ctrl(int idx) const {
+ ResourceArea *area = Thread::current()->resource_area();
+ VectorSet old_space(area), new_space(area);
+ Node* result = NULL;
+ find_recur( result, (Node*) this, idx, true, old_space, new_space );
+ return result;
+}
+#endif
+
+
+
+#ifndef PRODUCT
+int Node::_in_dump_cnt = 0;
+
+// -----------------------------Name-------------------------------------------
+extern const char *NodeClassNames[];
+const char *Node::Name() const { return NodeClassNames[Opcode()]; }
+
+static bool is_disconnected(const Node* n) {
+ for (uint i = 0; i < n->req(); i++) {
+ if (n->in(i) != NULL) return false;
+ }
+ return true;
+}
+
+#ifdef ASSERT
+static void dump_orig(Node* orig) {
+ Compile* C = Compile::current();
+ if (NotANode(orig)) orig = NULL;
+ if (orig != NULL && !C->node_arena()->contains(orig)) orig = NULL;
+ if (orig == NULL) return;
+ tty->print(" !orig=");
+ Node* fast = orig->debug_orig(); // tortoise & hare algorithm to detect loops
+ if (NotANode(fast)) fast = NULL;
+ while (orig != NULL) {
+ bool discon = is_disconnected(orig); // if discon, print [123] else 123
+ if (discon) tty->print("[");
+ if (!Compile::current()->node_arena()->contains(orig))
+ tty->print("o");
+ tty->print("%d", orig->_idx);
+ if (discon) tty->print("]");
+ orig = orig->debug_orig();
+ if (NotANode(orig)) orig = NULL;
+ if (orig != NULL && !C->node_arena()->contains(orig)) orig = NULL;
+ if (orig != NULL) tty->print(",");
+ if (fast != NULL) {
+ // Step fast twice for each single step of orig:
+ fast = fast->debug_orig();
+ if (NotANode(fast)) fast = NULL;
+ if (fast != NULL && fast != orig) {
+ fast = fast->debug_orig();
+ if (NotANode(fast)) fast = NULL;
+ }
+ if (fast == orig) {
+ tty->print("...");
+ break;
+ }
+ }
+ }
+}
+
+void Node::set_debug_orig(Node* orig) {
+ _debug_orig = orig;
+ if (BreakAtNode == 0) return;
+ if (NotANode(orig)) orig = NULL;
+ int trip = 10;
+ while (orig != NULL) {
+ if (orig->debug_idx() == BreakAtNode || (int)orig->_idx == BreakAtNode) {
+ tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d orig._idx=%d orig._debug_idx=%d",
+ this->_idx, this->debug_idx(), orig->_idx, orig->debug_idx());
+ BREAKPOINT;
+ }
+ orig = orig->debug_orig();
+ if (NotANode(orig)) orig = NULL;
+ if (trip-- <= 0) break;
+ }
+}
+#endif //ASSERT
+
+//------------------------------dump------------------------------------------
+// Dump a Node
+void Node::dump() const {
+ Compile* C = Compile::current();
+ bool is_new = C->node_arena()->contains(this);
+ _in_dump_cnt++;
+ tty->print("%c%d\t%s\t=== ",
+ is_new ? ' ' : 'o', _idx, Name());
+
+ // Dump the required and precedence inputs
+ dump_req();
+ dump_prec();
+ // Dump the outputs
+ dump_out();
+
+ if (is_disconnected(this)) {
+#ifdef ASSERT
+ tty->print(" [%d]",debug_idx());
+ dump_orig(debug_orig());
+#endif
+ tty->cr();
+ _in_dump_cnt--;
+ return; // don't process dead nodes
+ }
+
+ // Dump node-specific info
+ dump_spec(tty);
+#ifdef ASSERT
+ // Dump the non-reset _debug_idx
+ if( Verbose && WizardMode ) {
+ tty->print(" [%d]",debug_idx());
+ }
+#endif
+
+ const Type *t = bottom_type();
+
+ if (t != NULL && (t->isa_instptr() || t->isa_klassptr())) {
+ const TypeInstPtr *toop = t->isa_instptr();
+ const TypeKlassPtr *tkls = t->isa_klassptr();
+ ciKlass* klass = toop ? toop->klass() : (tkls ? tkls->klass() : NULL );
+ if( klass && klass->is_loaded() && klass->is_interface() ) {
+ tty->print(" Interface:");
+ } else if( toop ) {
+ tty->print(" Oop:");
+ } else if( tkls ) {
+ tty->print(" Klass:");
+ }
+ t->dump();
+ } else if( t == Type::MEMORY ) {
+ tty->print(" Memory:");
+ MemNode::dump_adr_type(this, adr_type(), tty);
+ } else if( Verbose || WizardMode ) {
+ tty->print(" Type:");
+ if( t ) {
+ t->dump();
+ } else {
+ tty->print("no type");
+ }
+ }
+ if (is_new) {
+ debug_only(dump_orig(debug_orig()));
+ Node_Notes* nn = C->node_notes_at(_idx);
+ if (nn != NULL && !nn->is_clear()) {
+ if (nn->jvms() != NULL) {
+ tty->print(" !jvms:");
+ nn->jvms()->dump_spec(tty);
+ }
+ }
+ }
+ tty->cr();
+ _in_dump_cnt--;
+}
+
+//------------------------------dump_req--------------------------------------
+void Node::dump_req() const {
+ // Dump the required input edges
+ for (uint i = 0; i < req(); i++) { // For all required inputs
+ Node* d = in(i);
+ if (d == NULL) {
+ tty->print("_ ");
+ } else if (NotANode(d)) {
+ tty->print("NotANode "); // uninitialized, sentinel, garbage, etc.
+ } else {
+ tty->print("%c%d ", Compile::current()->node_arena()->contains(d) ? ' ' : 'o', d->_idx);
+ }
+ }
+}
+
+
+//------------------------------dump_prec-------------------------------------
+void Node::dump_prec() const {
+ // Dump the precedence edges
+ int any_prec = 0;
+ for (uint i = req(); i < len(); i++) { // For all precedence inputs
+ Node* p = in(i);
+ if (p != NULL) {
+ if( !any_prec++ ) tty->print(" |");
+ if (NotANode(p)) { tty->print("NotANode "); continue; }
+ tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+ }
+ }
+}
+
+//------------------------------dump_out--------------------------------------
+void Node::dump_out() const {
+ // Delimit the output edges
+ tty->print(" [[");
+ // Dump the output edges
+ for (uint i = 0; i < _outcnt; i++) { // For all outputs
+ Node* u = _out[i];
+ if (u == NULL) {
+ tty->print("_ ");
+ } else if (NotANode(u)) {
+ tty->print("NotANode ");
+ } else {
+ tty->print("%c%d ", Compile::current()->node_arena()->contains(u) ? ' ' : 'o', u->_idx);
+ }
+ }
+ tty->print("]] ");
+}
+
+//------------------------------dump_nodes-------------------------------------
+
+// Helper class for dump_nodes. Wraps an old and new VectorSet.
+class OldNewVectorSet : public StackObj {
+ Arena* _node_arena;
+ VectorSet _old_vset, _new_vset;
+ VectorSet* select(Node* n) {
+ return _node_arena->contains(n) ? &_new_vset : &_old_vset;
+ }
+ public:
+ OldNewVectorSet(Arena* node_arena, ResourceArea* area) :
+ _node_arena(node_arena),
+ _old_vset(area), _new_vset(area) {}
+
+ void set(Node* n) { select(n)->set(n->_idx); }
+ bool test_set(Node* n) { return select(n)->test_set(n->_idx) != 0; }
+ bool test(Node* n) { return select(n)->test(n->_idx) != 0; }
+ void del(Node* n) { (*select(n)) >>= n->_idx; }
+};
+
+
+static void dump_nodes(const Node* start, int d, bool only_ctrl) {
+ Node* s = (Node*)start; // remove const
+ if (NotANode(s)) return;
+
+ Compile* C = Compile::current();
+ ResourceArea *area = Thread::current()->resource_area();
+ Node_Stack stack(area, MIN2((uint)ABS(d), C->unique() >> 1));
+ OldNewVectorSet visited(C->node_arena(), area);
+ OldNewVectorSet on_stack(C->node_arena(), area);
+
+ visited.set(s);
+ on_stack.set(s);
+ stack.push(s, 0);
+ if (d < 0) s->dump();
+
+ // Do a depth first walk over edges
+ while (stack.is_nonempty()) {
+ Node* tp = stack.node();
+ uint idx = stack.index();
+ uint limit = d > 0 ? tp->len() : tp->outcnt();
+ if (idx >= limit) {
+ // no more arcs to visit
+ if (d > 0) tp->dump();
+ on_stack.del(tp);
+ stack.pop();
+ } else {
+ // process the "idx"th arc
+ stack.set_index(idx + 1);
+ Node* n = d > 0 ? tp->in(idx) : tp->raw_out(idx);
+
+ if (NotANode(n)) continue;
+ // do not recurse through top or the root (would reach unrelated stuff)
+ if (n->is_Root() || n->is_top()) continue;
+ if (only_ctrl && !n->is_CFG()) continue;
+
+ if (!visited.test_set(n)) { // forward arc
+ // Limit depth
+ if (stack.size() < (uint)ABS(d)) {
+ if (d < 0) n->dump();
+ stack.push(n, 0);
+ on_stack.set(n);
+ }
+ } else { // back or cross arc
+ if (on_stack.test(n)) { // back arc
+ // print loop if there are no phis or regions in the mix
+ bool found_loop_breaker = false;
+ int k;
+ for (k = stack.size() - 1; k >= 0; k--) {
+ Node* m = stack.node_at(k);
+ if (m->is_Phi() || m->is_Region() || m->is_Root() || m->is_Start()) {
+ found_loop_breaker = true;
+ break;
+ }
+ if (m == n) // Found loop head
+ break;
+ }
+ assert(k >= 0, "n must be on stack");
+
+ if (!found_loop_breaker) {
+ tty->print("# %s LOOP FOUND:", only_ctrl ? "CONTROL" : "DATA");
+ for (int i = stack.size() - 1; i >= k; i--) {
+ Node* m = stack.node_at(i);
+ bool mnew = C->node_arena()->contains(m);
+ tty->print(" %s%d:%s", (mnew? "": "o"), m->_idx, m->Name());
+ if (i != 0) tty->print(d > 0? " <-": " ->");
+ }
+ tty->cr();
+ }
+ }
+ }
+ }
+ }
+}
+
+//------------------------------dump-------------------------------------------
+void Node::dump(int d) const {
+ dump_nodes(this, d, false);
+}
+
+//------------------------------dump_ctrl--------------------------------------
+// Dump a Node's control history to depth
+void Node::dump_ctrl(int d) const {
+ dump_nodes(this, d, true);
+}
+
+// VERIFICATION CODE
+// For each input edge to a node (ie - for each Use-Def edge), verify that
+// there is a corresponding Def-Use edge.
+//------------------------------verify_edges-----------------------------------
+void Node::verify_edges(Unique_Node_List &visited) {
+ uint i, j, idx;
+ int cnt;
+ Node *n;
+
+ // Recursive termination test
+ if (visited.member(this)) return;
+ visited.push(this);
+
+ // Walk over all input edges, checking for correspondance
+ for( i = 0; i < len(); i++ ) {
+ n = in(i);
+ if (n != NULL && !n->is_top()) {
+ // Count instances of (Node *)this
+ cnt = 0;
+ for (idx = 0; idx < n->_outcnt; idx++ ) {
+ if (n->_out[idx] == (Node *)this) cnt++;
+ }
+ assert( cnt > 0,"Failed to find Def-Use edge." );
+ // Check for duplicate edges
+ // walk the input array downcounting the input edges to n
+ for( j = 0; j < len(); j++ ) {
+ if( in(j) == n ) cnt--;
+ }
+ assert( cnt == 0,"Mismatched edge count.");
+ } else if (n == NULL) {
+ assert(i >= req() || i == 0 || is_Region() || is_Phi(), "only regions or phis have null data edges");
+ } else {
+ assert(n->is_top(), "sanity");
+ // Nothing to check.
+ }
+ }
+ // Recursive walk over all input edges
+ for( i = 0; i < len(); i++ ) {
+ n = in(i);
+ if( n != NULL )
+ in(i)->verify_edges(visited);
+ }
+}
+
+//------------------------------verify_recur-----------------------------------
+static const Node *unique_top = NULL;
+
+void Node::verify_recur(const Node *n, int verify_depth,
+ VectorSet &old_space, VectorSet &new_space) {
+ if ( verify_depth == 0 ) return;
+ if (verify_depth > 0) --verify_depth;
+
+ Compile* C = Compile::current();
+
+ // Contained in new_space or old_space?
+ VectorSet *v = C->node_arena()->contains(n) ? &new_space : &old_space;
+ // Check for visited in the proper space. Numberings are not unique
+ // across spaces so we need a seperate VectorSet for each space.
+ if( v->test_set(n->_idx) ) return;
+
+ if (n->is_Con() && n->bottom_type() == Type::TOP) {
+ if (C->cached_top_node() == NULL)
+ C->set_cached_top_node((Node*)n);
+ assert(C->cached_top_node() == n, "TOP node must be unique");
+ }
+
+ for( uint i = 0; i < n->len(); i++ ) {
+ Node *x = n->in(i);
+ if (!x || x->is_top()) continue;
+
+ // Verify my input has a def-use edge to me
+ if (true /*VerifyDefUse*/) {
+ // Count use-def edges from n to x
+ int cnt = 0;
+ for( uint j = 0; j < n->len(); j++ )
+ if( n->in(j) == x )
+ cnt++;
+ // Count def-use edges from x to n
+ uint max = x->_outcnt;
+ for( uint k = 0; k < max; k++ )
+ if (x->_out[k] == n)
+ cnt--;
+ assert( cnt == 0, "mismatched def-use edge counts" );
+ }
+
+ verify_recur(x, verify_depth, old_space, new_space);
+ }
+
+}
+
+//------------------------------verify-----------------------------------------
+// Check Def-Use info for my subgraph
+void Node::verify() const {
+ Compile* C = Compile::current();
+ Node* old_top = C->cached_top_node();
+ ResourceMark rm;
+ ResourceArea *area = Thread::current()->resource_area();
+ VectorSet old_space(area), new_space(area);
+ verify_recur(this, -1, old_space, new_space);
+ C->set_cached_top_node(old_top);
+}
+#endif
+
+
+//------------------------------walk-------------------------------------------
+// Graph walk, with both pre-order and post-order functions
+void Node::walk(NFunc pre, NFunc post, void *env) {
+ VectorSet visited(Thread::current()->resource_area()); // Setup for local walk
+ walk_(pre, post, env, visited);
+}
+
+void Node::walk_(NFunc pre, NFunc post, void *env, VectorSet &visited) {
+ if( visited.test_set(_idx) ) return;
+ pre(*this,env); // Call the pre-order walk function
+ for( uint i=0; i<_max; i++ )
+ if( in(i) ) // Input exists and is not walked?
+ in(i)->walk_(pre,post,env,visited); // Walk it with pre & post functions
+ post(*this,env); // Call the post-order walk function
+}
+
+void Node::nop(Node &, void*) {}
+
+//------------------------------Registers--------------------------------------
+// Do we Match on this edge index or not? Generally false for Control
+// and true for everything else. Weird for calls & returns.
+uint Node::match_edge(uint idx) const {
+ return idx; // True for other than index 0 (control)
+}
+
+// Register classes are defined for specific machines
+const RegMask &Node::out_RegMask() const {
+ ShouldNotCallThis();
+ return *(new RegMask());
+}
+
+const RegMask &Node::in_RegMask(uint) const {
+ ShouldNotCallThis();
+ return *(new RegMask());
+}
+
+//=============================================================================
+//-----------------------------------------------------------------------------
+void Node_Array::reset( Arena *new_arena ) {
+ _a->Afree(_nodes,_max*sizeof(Node*));
+ _max = 0;
+ _nodes = NULL;
+ _a = new_arena;
+}
+
+//------------------------------clear------------------------------------------
+// Clear all entries in _nodes to NULL but keep storage
+void Node_Array::clear() {
+ Copy::zero_to_bytes( _nodes, _max*sizeof(Node*) );
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::grow( uint i ) {
+ if( !_max ) {
+ _max = 1;
+ _nodes = (Node**)_a->Amalloc( _max * sizeof(Node*) );
+ _nodes[0] = NULL;
+ }
+ uint old = _max;
+ while( i >= _max ) _max <<= 1; // Double to fit
+ _nodes = (Node**)_a->Arealloc( _nodes, old*sizeof(Node*),_max*sizeof(Node*));
+ Copy::zero_to_bytes( &_nodes[old], (_max-old)*sizeof(Node*) );
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::insert( uint i, Node *n ) {
+ if( _nodes[_max-1] ) grow(_max); // Get more space if full
+ Copy::conjoint_words_to_higher((HeapWord*)&_nodes[i], (HeapWord*)&_nodes[i+1], ((_max-i-1)*sizeof(Node*)));
+ _nodes[i] = n;
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::remove( uint i ) {
+ Copy::conjoint_words_to_lower((HeapWord*)&_nodes[i+1], (HeapWord*)&_nodes[i], ((_max-i-1)*sizeof(Node*)));
+ _nodes[_max-1] = NULL;
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::sort( C_sort_func_t func) {
+ qsort( _nodes, _max, sizeof( Node* ), func );
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::dump() const {
+#ifndef PRODUCT
+ for( uint i = 0; i < _max; i++ ) {
+ Node *nn = _nodes[i];
+ if( nn != NULL ) {
+ tty->print("%5d--> ",i); nn->dump();
+ }
+ }
+#endif
+}
+
+//--------------------------is_iteratively_computed------------------------------
+// Operation appears to be iteratively computed (such as an induction variable)
+// It is possible for this operation to return false for a loop-varying
+// value, if it appears (by local graph inspection) to be computed by a simple conditional.
+bool Node::is_iteratively_computed() {
+ if (ideal_reg()) { // does operation have a result register?
+ for (uint i = 1; i < req(); i++) {
+ Node* n = in(i);
+ if (n != NULL && n->is_Phi()) {
+ for (uint j = 1; j < n->req(); j++) {
+ if (n->in(j) == this) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+//--------------------------find_similar------------------------------
+// Return a node with opcode "opc" and same inputs as "this" if one can
+// be found; Otherwise return NULL;
+Node* Node::find_similar(int opc) {
+ if (req() >= 2) {
+ Node* def = in(1);
+ if (def && def->outcnt() >= 2) {
+ for (DUIterator_Fast dmax, i = def->fast_outs(dmax); i < dmax; i++) {
+ Node* use = def->fast_out(i);
+ if (use->Opcode() == opc &&
+ use->req() == req()) {
+ uint j;
+ for (j = 0; j < use->req(); j++) {
+ if (use->in(j) != in(j)) {
+ break;
+ }
+ }
+ if (j == use->req()) {
+ return use;
+ }
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+
+//--------------------------unique_ctrl_out------------------------------
+// Return the unique control out if only one. Null if none or more than one.
+Node* Node::unique_ctrl_out() {
+ Node* found = NULL;
+ for (uint i = 0; i < outcnt(); i++) {
+ Node* use = raw_out(i);
+ if (use->is_CFG() && use != this) {
+ if (found != NULL) return NULL;
+ found = use;
+ }
+ }
+ return found;
+}
+
+//=============================================================================
+//------------------------------yank-------------------------------------------
+// Find and remove
+void Node_List::yank( Node *n ) {
+ uint i;
+ for( i = 0; i < _cnt; i++ )
+ if( _nodes[i] == n )
+ break;
+
+ if( i < _cnt )
+ _nodes[i] = _nodes[--_cnt];
+}
+
+//------------------------------dump-------------------------------------------
+void Node_List::dump() const {
+#ifndef PRODUCT
+ for( uint i = 0; i < _cnt; i++ )
+ if( _nodes[i] ) {
+ tty->print("%5d--> ",i);
+ _nodes[i]->dump();
+ }
+#endif
+}
+
+//=============================================================================
+//------------------------------remove-----------------------------------------
+void Unique_Node_List::remove( Node *n ) {
+ if( _in_worklist[n->_idx] ) {
+ for( uint i = 0; i < size(); i++ )
+ if( _nodes[i] == n ) {
+ map(i,Node_List::pop());
+ _in_worklist >>= n->_idx;
+ return;
+ }
+ ShouldNotReachHere();
+ }
+}
+
+//-----------------------remove_useless_nodes----------------------------------
+// Remove useless nodes from worklist
+void Unique_Node_List::remove_useless_nodes(VectorSet &useful) {
+
+ for( uint i = 0; i < size(); ++i ) {
+ Node *n = at(i);
+ assert( n != NULL, "Did not expect null entries in worklist");
+ if( ! useful.test(n->_idx) ) {
+ _in_worklist >>= n->_idx;
+ map(i,Node_List::pop());
+ // Node *replacement = Node_List::pop();
+ // if( i != size() ) { // Check if removing last entry
+ // _nodes[i] = replacement;
+ // }
+ --i; // Visit popped node
+ // If it was last entry, loop terminates since size() was also reduced
+ }
+ }
+}
+
+//=============================================================================
+void Node_Stack::grow() {
+ size_t old_top = pointer_delta(_inode_top,_inodes,sizeof(INode)); // save _top
+ size_t old_max = pointer_delta(_inode_max,_inodes,sizeof(INode));
+ size_t max = old_max << 1; // max * 2
+ _inodes = REALLOC_ARENA_ARRAY(_a, INode, _inodes, old_max, max);
+ _inode_max = _inodes + max;
+ _inode_top = _inodes + old_top; // restore _top
+}
+
+//=============================================================================
+uint TypeNode::size_of() const { return sizeof(*this); }
+#ifndef PRODUCT
+void TypeNode::dump_spec(outputStream *st) const {
+ if( !Verbose && !WizardMode ) {
+ // standard dump does this in Verbose and WizardMode
+ st->print(" #"); _type->dump_on(st);
+ }
+}
+#endif
+uint TypeNode::hash() const {
+ return Node::hash() + _type->hash();
+}
+uint TypeNode::cmp( const Node &n ) const
+{ return !Type::cmp( _type, ((TypeNode&)n)._type ); }
+const Type *TypeNode::bottom_type() const { return _type; }
+const Type *TypeNode::Value( PhaseTransform * ) const { return _type; }
+
+//------------------------------ideal_reg--------------------------------------
+uint TypeNode::ideal_reg() const {
+ return Matcher::base2reg[_type->base()];
+}
diff --git a/src/share/vm/opto/node.hpp b/src/share/vm/opto/node.hpp
new file mode 100644
index 000000000..f93562c09
--- /dev/null
+++ b/src/share/vm/opto/node.hpp
@@ -0,0 +1,1492 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+
+class AbstractLockNode;
+class AddNode;
+class AddPNode;
+class AliasInfo;
+class AllocateArrayNode;
+class AllocateNode;
+class Block;
+class Block_Array;
+class BoolNode;
+class BoxLockNode;
+class CMoveNode;
+class CallDynamicJavaNode;
+class CallJavaNode;
+class CallLeafNode;
+class CallNode;
+class CallRuntimeNode;
+class CallStaticJavaNode;
+class CatchNode;
+class CatchProjNode;
+class CheckCastPPNode;
+class CmpNode;
+class CodeBuffer;
+class ConstraintCastNode;
+class ConNode;
+class CountedLoopNode;
+class CountedLoopEndNode;
+class FastLockNode;
+class FastUnlockNode;
+class IfNode;
+class InitializeNode;
+class JVMState;
+class JumpNode;
+class JumpProjNode;
+class LoadNode;
+class LoadStoreNode;
+class LockNode;
+class LoopNode;
+class MachCallDynamicJavaNode;
+class MachCallJavaNode;
+class MachCallLeafNode;
+class MachCallNode;
+class MachCallRuntimeNode;
+class MachCallStaticJavaNode;
+class MachIfNode;
+class MachNode;
+class MachNullCheckNode;
+class MachReturnNode;
+class MachSafePointNode;
+class MachSpillCopyNode;
+class MachTempNode;
+class Matcher;
+class MemBarNode;
+class MemNode;
+class MergeMemNode;
+class MulNode;
+class MultiNode;
+class MultiBranchNode;
+class NeverBranchNode;
+class Node;
+class Node_Array;
+class Node_List;
+class Node_Stack;
+class NullCheckNode;
+class OopMap;
+class PCTableNode;
+class PhaseCCP;
+class PhaseGVN;
+class PhaseIterGVN;
+class PhaseRegAlloc;
+class PhaseTransform;
+class PhaseValues;
+class PhiNode;
+class Pipeline;
+class ProjNode;
+class RegMask;
+class RegionNode;
+class RootNode;
+class SafePointNode;
+class StartNode;
+class State;
+class StoreNode;
+class SubNode;
+class Type;
+class TypeNode;
+class UnlockNode;
+class VectorSet;
+class IfTrueNode;
+class IfFalseNode;
+typedef void (*NFunc)(Node&,void*);
+extern "C" {
+ typedef int (*C_sort_func_t)(const void *, const void *);
+}
+
+// The type of all node counts and indexes.
+// It must hold at least 16 bits, but must also be fast to load and store.
+// This type, if less than 32 bits, could limit the number of possible nodes.
+// (To make this type platform-specific, move to globalDefinitions_xxx.hpp.)
+typedef unsigned int node_idx_t;
+
+
+#ifndef OPTO_DU_ITERATOR_ASSERT
+#ifdef ASSERT
+#define OPTO_DU_ITERATOR_ASSERT 1
+#else
+#define OPTO_DU_ITERATOR_ASSERT 0
+#endif
+#endif //OPTO_DU_ITERATOR_ASSERT
+
+#if OPTO_DU_ITERATOR_ASSERT
+class DUIterator;
+class DUIterator_Fast;
+class DUIterator_Last;
+#else
+typedef uint DUIterator;
+typedef Node** DUIterator_Fast;
+typedef Node** DUIterator_Last;
+#endif
+
+// Node Sentinel
+#define NodeSentinel (Node*)-1
+
+// Unknown count frequency
+#define COUNT_UNKNOWN (-1.0f)
+
+//------------------------------Node-------------------------------------------
+// Nodes define actions in the program. They create values, which have types.
+// They are both vertices in a directed graph and program primitives. Nodes
+// are labeled; the label is the "opcode", the primitive function in the lambda
+// calculus sense that gives meaning to the Node. Node inputs are ordered (so
+// that "a-b" is different from "b-a"). The inputs to a Node are the inputs to
+// the Node's function. These inputs also define a Type equation for the Node.
+// Solving these Type equations amounts to doing dataflow analysis.
+// Control and data are uniformly represented in the graph. Finally, Nodes
+// have a unique dense integer index which is used to index into side arrays
+// whenever I have phase-specific information.
+
+class Node {
+ // Lots of restrictions on cloning Nodes
+ Node(const Node&); // not defined; linker error to use these
+ Node &operator=(const Node &rhs);
+
+public:
+ friend class Compile;
+ #if OPTO_DU_ITERATOR_ASSERT
+ friend class DUIterator_Common;
+ friend class DUIterator;
+ friend class DUIterator_Fast;
+ friend class DUIterator_Last;
+ #endif
+
+ // Because Nodes come and go, I define an Arena of Node structures to pull
+ // from. This should allow fast access to node creation & deletion. This
+ // field is a local cache of a value defined in some "program fragment" for
+ // which these Nodes are just a part of.
+
+ // New Operator that takes a Compile pointer, this will eventually
+ // be the "new" New operator.
+ inline void* operator new( size_t x, Compile* C) {
+ Node* n = (Node*)C->node_arena()->Amalloc_D(x);
+#ifdef ASSERT
+ n->_in = (Node**)n; // magic cookie for assertion check
+#endif
+ n->_out = (Node**)C;
+ return (void*)n;
+ }
+
+ // New Operator that takes a Compile pointer, this will eventually
+ // be the "new" New operator.
+ inline void* operator new( size_t x, Compile* C, int y) {
+ Node* n = (Node*)C->node_arena()->Amalloc_D(x + y*sizeof(void*));
+ n->_in = (Node**)(((char*)n) + x);
+#ifdef ASSERT
+ n->_in[y-1] = n; // magic cookie for assertion check
+#endif
+ n->_out = (Node**)C;
+ return (void*)n;
+ }
+
+ // Delete is a NOP
+ void operator delete( void *ptr ) {}
+ // Fancy destructor; eagerly attempt to reclaim Node numberings and storage
+ void destruct();
+
+ // Create a new Node. Required is the number is of inputs required for
+ // semantic correctness.
+ Node( uint required );
+
+ // Create a new Node with given input edges.
+ // This version requires use of the "edge-count" new.
+ // E.g. new (C,3) FooNode( C, NULL, left, right );
+ Node( Node *n0 );
+ Node( Node *n0, Node *n1 );
+ Node( Node *n0, Node *n1, Node *n2 );
+ Node( Node *n0, Node *n1, Node *n2, Node *n3 );
+ Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4 );
+ Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4, Node *n5 );
+ Node( Node *n0, Node *n1, Node *n2, Node *n3,
+ Node *n4, Node *n5, Node *n6 );
+
+ // Clone an inherited Node given only the base Node type.
+ Node* clone() const;
+
+ // Clone a Node, immediately supplying one or two new edges.
+ // The first and second arguments, if non-null, replace in(1) and in(2),
+ // respectively.
+ Node* clone_with_data_edge(Node* in1, Node* in2 = NULL) const {
+ Node* nn = clone();
+ if (in1 != NULL) nn->set_req(1, in1);
+ if (in2 != NULL) nn->set_req(2, in2);
+ return nn;
+ }
+
+private:
+ // Shared setup for the above constructors.
+ // Handles all interactions with Compile::current.
+ // Puts initial values in all Node fields except _idx.
+ // Returns the initial value for _idx, which cannot
+ // be initialized by assignment.
+ inline int Init(int req, Compile* C);
+
+//----------------- input edge handling
+protected:
+ friend class PhaseCFG; // Access to address of _in array elements
+ Node **_in; // Array of use-def references to Nodes
+ Node **_out; // Array of def-use references to Nodes
+
+ // Input edges are split into two catagories. Required edges are required
+ // for semantic correctness; order is important and NULLs are allowed.
+ // Precedence edges are used to help determine execution order and are
+ // added, e.g., for scheduling purposes. They are unordered and not
+ // duplicated; they have no embedded NULLs. Edges from 0 to _cnt-1
+ // are required, from _cnt to _max-1 are precedence edges.
+ node_idx_t _cnt; // Total number of required Node inputs.
+
+ node_idx_t _max; // Actual length of input array.
+
+ // Output edges are an unordered list of def-use edges which exactly
+ // correspond to required input edges which point from other nodes
+ // to this one. Thus the count of the output edges is the number of
+ // users of this node.
+ node_idx_t _outcnt; // Total number of Node outputs.
+
+ node_idx_t _outmax; // Actual length of output array.
+
+ // Grow the actual input array to the next larger power-of-2 bigger than len.
+ void grow( uint len );
+ // Grow the output array to the next larger power-of-2 bigger than len.
+ void out_grow( uint len );
+
+ public:
+ // Each Node is assigned a unique small/dense number. This number is used
+ // to index into auxiliary arrays of data and bitvectors.
+ // It is declared const to defend against inadvertant assignment,
+ // since it is used by clients as a naked field.
+ const node_idx_t _idx;
+
+ // Get the (read-only) number of input edges
+ uint req() const { return _cnt; }
+ uint len() const { return _max; }
+ // Get the (read-only) number of output edges
+ uint outcnt() const { return _outcnt; }
+
+#if OPTO_DU_ITERATOR_ASSERT
+ // Iterate over the out-edges of this node. Deletions are illegal.
+ inline DUIterator outs() const;
+ // Use this when the out array might have changed to suppress asserts.
+ inline DUIterator& refresh_out_pos(DUIterator& i) const;
+ // Does the node have an out at this position? (Used for iteration.)
+ inline bool has_out(DUIterator& i) const;
+ inline Node* out(DUIterator& i) const;
+ // Iterate over the out-edges of this node. All changes are illegal.
+ inline DUIterator_Fast fast_outs(DUIterator_Fast& max) const;
+ inline Node* fast_out(DUIterator_Fast& i) const;
+ // Iterate over the out-edges of this node, deleting one at a time.
+ inline DUIterator_Last last_outs(DUIterator_Last& min) const;
+ inline Node* last_out(DUIterator_Last& i) const;
+ // The inline bodies of all these methods are after the iterator definitions.
+#else
+ // Iterate over the out-edges of this node. Deletions are illegal.
+ // This iteration uses integral indexes, to decouple from array reallocations.
+ DUIterator outs() const { return 0; }
+ // Use this when the out array might have changed to suppress asserts.
+ DUIterator refresh_out_pos(DUIterator i) const { return i; }
+
+ // Reference to the i'th output Node. Error if out of bounds.
+ Node* out(DUIterator i) const { assert(i < _outcnt, "oob"); return _out[i]; }
+ // Does the node have an out at this position? (Used for iteration.)
+ bool has_out(DUIterator i) const { return i < _outcnt; }
+
+ // Iterate over the out-edges of this node. All changes are illegal.
+ // This iteration uses a pointer internal to the out array.
+ DUIterator_Fast fast_outs(DUIterator_Fast& max) const {
+ Node** out = _out;
+ // Assign a limit pointer to the reference argument:
+ max = out + (ptrdiff_t)_outcnt;
+ // Return the base pointer:
+ return out;
+ }
+ Node* fast_out(DUIterator_Fast i) const { return *i; }
+ // Iterate over the out-edges of this node, deleting one at a time.
+ // This iteration uses a pointer internal to the out array.
+ DUIterator_Last last_outs(DUIterator_Last& min) const {
+ Node** out = _out;
+ // Assign a limit pointer to the reference argument:
+ min = out;
+ // Return the pointer to the start of the iteration:
+ return out + (ptrdiff_t)_outcnt - 1;
+ }
+ Node* last_out(DUIterator_Last i) const { return *i; }
+#endif
+
+ // Reference to the i'th input Node. Error if out of bounds.
+ Node* in(uint i) const { assert(i < _max,"oob"); return _in[i]; }
+ // Reference to the i'th output Node. Error if out of bounds.
+ // Use this accessor sparingly. We are going trying to use iterators instead.
+ Node* raw_out(uint i) const { assert(i < _outcnt,"oob"); return _out[i]; }
+ // Return the unique out edge.
+ Node* unique_out() const { assert(_outcnt==1,"not unique"); return _out[0]; }
+ // Delete out edge at position 'i' by moving last out edge to position 'i'
+ void raw_del_out(uint i) {
+ assert(i < _outcnt,"oob");
+ assert(_outcnt > 0,"oob");
+ #if OPTO_DU_ITERATOR_ASSERT
+ // Record that a change happened here.
+ debug_only(_last_del = _out[i]; ++_del_tick);
+ #endif
+ _out[i] = _out[--_outcnt];
+ // Smash the old edge so it can't be used accidentally.
+ debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef);
+ }
+
+#ifdef ASSERT
+ bool is_dead() const;
+#define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead()))
+#endif
+
+ // Set a required input edge, also updates corresponding output edge
+ void add_req( Node *n ); // Append a NEW required input
+ void add_req_batch( Node* n, uint m ); // Append m NEW required inputs (all n).
+ void del_req( uint idx ); // Delete required edge & compact
+ void ins_req( uint i, Node *n ); // Insert a NEW required input
+ void set_req( uint i, Node *n ) {
+ assert( is_not_dead(n), "can not use dead node");
+ assert( i < _cnt, "oob");
+ assert( !VerifyHashTableKeys || _hash_lock == 0,
+ "remove node from hash table before modifying it");
+ Node** p = &_in[i]; // cache this._in, across the del_out call
+ if (*p != NULL) (*p)->del_out((Node *)this);
+ (*p) = n;
+ if (n != NULL) n->add_out((Node *)this);
+ }
+ // Light version of set_req() to init inputs after node creation.
+ void init_req( uint i, Node *n ) {
+ assert( i == 0 && this == n ||
+ is_not_dead(n), "can not use dead node");
+ assert( i < _cnt, "oob");
+ assert( !VerifyHashTableKeys || _hash_lock == 0,
+ "remove node from hash table before modifying it");
+ assert( _in[i] == NULL, "sanity");
+ _in[i] = n;
+ if (n != NULL) n->add_out((Node *)this);
+ }
+ // Find first occurrence of n among my edges:
+ int find_edge(Node* n);
+ int replace_edge(Node* old, Node* neww);
+ // NULL out all inputs to eliminate incoming Def-Use edges.
+ // Return the number of edges between 'n' and 'this'
+ int disconnect_inputs(Node *n);
+
+ // Quickly, return true if and only if I am Compile::current()->top().
+ bool is_top() const {
+ assert((this == (Node*) Compile::current()->top()) == (_out == NULL), "");
+ return (_out == NULL);
+ }
+ // Reaffirm invariants for is_top. (Only from Compile::set_cached_top_node.)
+ void setup_is_top();
+
+ // Strip away casting. (It is depth-limited.)
+ Node* uncast() const;
+
+private:
+ static Node* uncast_helper(const Node* n);
+
+ // Add an output edge to the end of the list
+ void add_out( Node *n ) {
+ if (is_top()) return;
+ if( _outcnt == _outmax ) out_grow(_outcnt);
+ _out[_outcnt++] = n;
+ }
+ // Delete an output edge
+ void del_out( Node *n ) {
+ if (is_top()) return;
+ Node** outp = &_out[_outcnt];
+ // Find and remove n
+ do {
+ assert(outp > _out, "Missing Def-Use edge");
+ } while (*--outp != n);
+ *outp = _out[--_outcnt];
+ // Smash the old edge so it can't be used accidentally.
+ debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef);
+ // Record that a change happened here.
+ #if OPTO_DU_ITERATOR_ASSERT
+ debug_only(_last_del = n; ++_del_tick);
+ #endif
+ }
+
+public:
+ // Globally replace this node by a given new node, updating all uses.
+ void replace_by(Node* new_node);
+ void set_req_X( uint i, Node *n, PhaseIterGVN *igvn );
+ // Find the one non-null required input. RegionNode only
+ Node *nonnull_req() const;
+ // Add or remove precedence edges
+ void add_prec( Node *n );
+ void rm_prec( uint i );
+ void set_prec( uint i, Node *n ) {
+ assert( is_not_dead(n), "can not use dead node");
+ assert( i >= _cnt, "not a precedence edge");
+ if (_in[i] != NULL) _in[i]->del_out((Node *)this);
+ _in[i] = n;
+ if (n != NULL) n->add_out((Node *)this);
+ }
+ // Set this node's index, used by cisc_version to replace current node
+ void set_idx(uint new_idx) {
+ const node_idx_t* ref = &_idx;
+ *(node_idx_t*)ref = new_idx;
+ }
+ // Swap input edge order. (Edge indexes i1 and i2 are usually 1 and 2.)
+ void swap_edges(uint i1, uint i2) {
+ debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
+ // Def-Use info is unchanged
+ Node* n1 = in(i1);
+ Node* n2 = in(i2);
+ _in[i1] = n2;
+ _in[i2] = n1;
+ // If this node is in the hash table, make sure it doesn't need a rehash.
+ assert(check_hash == NO_HASH || check_hash == hash(), "edge swap must preserve hash code");
+ }
+
+ // Iterators over input Nodes for a Node X are written as:
+ // for( i = 0; i < X.req(); i++ ) ... X[i] ...
+ // NOTE: Required edges can contain embedded NULL pointers.
+
+//----------------- Other Node Properties
+
+ // Generate class id for some ideal nodes to avoid virtual query
+ // methods is_<Node>().
+ // Class id is the set of bits corresponded to the node class and all its
+ // super classes so that queries for super classes are also valid.
+ // Subclasses of the same super class have different assigned bit
+ // (the third parameter in the macro DEFINE_CLASS_ID).
+ // Classes with deeper hierarchy are declared first.
+ // Classes with the same hierarchy depth are sorted by usage frequency.
+ //
+ // The query method masks the bits to cut off bits of subclasses
+ // and then compare the result with the class id
+ // (see the macro DEFINE_CLASS_QUERY below).
+ //
+ // Class_MachCall=30, ClassMask_MachCall=31
+ // 12 8 4 0
+ // 0 0 0 0 0 0 0 0 1 1 1 1 0
+ // | | | |
+ // | | | Bit_Mach=2
+ // | | Bit_MachReturn=4
+ // | Bit_MachSafePoint=8
+ // Bit_MachCall=16
+ //
+ // Class_CountedLoop=56, ClassMask_CountedLoop=63
+ // 12 8 4 0
+ // 0 0 0 0 0 0 0 1 1 1 0 0 0
+ // | | |
+ // | | Bit_Region=8
+ // | Bit_Loop=16
+ // Bit_CountedLoop=32
+
+ #define DEFINE_CLASS_ID(cl, supcl, subn) \
+ Bit_##cl = (Class_##supcl == 0) ? 1 << subn : (Bit_##supcl) << (1 + subn) , \
+ Class_##cl = Class_##supcl + Bit_##cl , \
+ ClassMask_##cl = ((Bit_##cl << 1) - 1) ,
+
+ // This enum is used only for C2 ideal and mach nodes with is_<node>() methods
+ // so that it's values fits into 16 bits.
+ enum NodeClasses {
+ Bit_Node = 0x0000,
+ Class_Node = 0x0000,
+ ClassMask_Node = 0xFFFF,
+
+ DEFINE_CLASS_ID(Multi, Node, 0)
+ DEFINE_CLASS_ID(SafePoint, Multi, 0)
+ DEFINE_CLASS_ID(Call, SafePoint, 0)
+ DEFINE_CLASS_ID(CallJava, Call, 0)
+ DEFINE_CLASS_ID(CallStaticJava, CallJava, 0)
+ DEFINE_CLASS_ID(CallDynamicJava, CallJava, 1)
+ DEFINE_CLASS_ID(CallRuntime, Call, 1)
+ DEFINE_CLASS_ID(CallLeaf, CallRuntime, 0)
+ DEFINE_CLASS_ID(Allocate, Call, 2)
+ DEFINE_CLASS_ID(AllocateArray, Allocate, 0)
+ DEFINE_CLASS_ID(AbstractLock, Call, 3)
+ DEFINE_CLASS_ID(Lock, AbstractLock, 0)
+ DEFINE_CLASS_ID(Unlock, AbstractLock, 1)
+ DEFINE_CLASS_ID(MultiBranch, Multi, 1)
+ DEFINE_CLASS_ID(PCTable, MultiBranch, 0)
+ DEFINE_CLASS_ID(Catch, PCTable, 0)
+ DEFINE_CLASS_ID(Jump, PCTable, 1)
+ DEFINE_CLASS_ID(If, MultiBranch, 1)
+ DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
+ DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
+ DEFINE_CLASS_ID(Start, Multi, 2)
+ DEFINE_CLASS_ID(MemBar, Multi, 3)
+ DEFINE_CLASS_ID(Initialize, MemBar, 0)
+
+ DEFINE_CLASS_ID(Mach, Node, 1)
+ DEFINE_CLASS_ID(MachReturn, Mach, 0)
+ DEFINE_CLASS_ID(MachSafePoint, MachReturn, 0)
+ DEFINE_CLASS_ID(MachCall, MachSafePoint, 0)
+ DEFINE_CLASS_ID(MachCallJava, MachCall, 0)
+ DEFINE_CLASS_ID(MachCallStaticJava, MachCallJava, 0)
+ DEFINE_CLASS_ID(MachCallDynamicJava, MachCallJava, 1)
+ DEFINE_CLASS_ID(MachCallRuntime, MachCall, 1)
+ DEFINE_CLASS_ID(MachCallLeaf, MachCallRuntime, 0)
+ DEFINE_CLASS_ID(MachSpillCopy, Mach, 1)
+ DEFINE_CLASS_ID(MachNullCheck, Mach, 2)
+ DEFINE_CLASS_ID(MachIf, Mach, 3)
+ DEFINE_CLASS_ID(MachTemp, Mach, 4)
+
+ DEFINE_CLASS_ID(Proj, Node, 2)
+ DEFINE_CLASS_ID(CatchProj, Proj, 0)
+ DEFINE_CLASS_ID(JumpProj, Proj, 1)
+ DEFINE_CLASS_ID(IfTrue, Proj, 2)
+ DEFINE_CLASS_ID(IfFalse, Proj, 3)
+
+ DEFINE_CLASS_ID(Region, Node, 3)
+ DEFINE_CLASS_ID(Loop, Region, 0)
+ DEFINE_CLASS_ID(Root, Loop, 0)
+ DEFINE_CLASS_ID(CountedLoop, Loop, 1)
+
+ DEFINE_CLASS_ID(Sub, Node, 4)
+ DEFINE_CLASS_ID(Cmp, Sub, 0)
+ DEFINE_CLASS_ID(FastLock, Cmp, 0)
+ DEFINE_CLASS_ID(FastUnlock, Cmp, 1)
+
+ DEFINE_CLASS_ID(Type, Node, 5)
+ DEFINE_CLASS_ID(Phi, Type, 0)
+ DEFINE_CLASS_ID(ConstraintCast, Type, 1)
+ DEFINE_CLASS_ID(CheckCastPP, Type, 2)
+ DEFINE_CLASS_ID(CMove, Type, 3)
+
+ DEFINE_CLASS_ID(Mem, Node, 6)
+ DEFINE_CLASS_ID(Load, Mem, 0)
+ DEFINE_CLASS_ID(Store, Mem, 1)
+ DEFINE_CLASS_ID(LoadStore, Mem, 2)
+
+ DEFINE_CLASS_ID(MergeMem, Node, 7)
+ DEFINE_CLASS_ID(Bool, Node, 8)
+ DEFINE_CLASS_ID(AddP, Node, 9)
+ DEFINE_CLASS_ID(BoxLock, Node, 10)
+ DEFINE_CLASS_ID(Add, Node, 11)
+ DEFINE_CLASS_ID(Mul, Node, 12)
+
+ _max_classes = ClassMask_Mul
+ };
+ #undef DEFINE_CLASS_ID
+
+ // Flags are sorted by usage frequency.
+ enum NodeFlags {
+ Flag_is_Copy = 0x01, // should be first bit to avoid shift
+ Flag_is_Call = Flag_is_Copy << 1,
+ Flag_rematerialize = Flag_is_Call << 1,
+ Flag_needs_anti_dependence_check = Flag_rematerialize << 1,
+ Flag_is_macro = Flag_needs_anti_dependence_check << 1,
+ Flag_is_Con = Flag_is_macro << 1,
+ Flag_is_cisc_alternate = Flag_is_Con << 1,
+ Flag_is_Branch = Flag_is_cisc_alternate << 1,
+ Flag_is_block_start = Flag_is_Branch << 1,
+ Flag_is_Goto = Flag_is_block_start << 1,
+ Flag_is_dead_loop_safe = Flag_is_Goto << 1,
+ Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
+ Flag_is_safepoint_node = Flag_may_be_short_branch << 1,
+ Flag_is_pc_relative = Flag_is_safepoint_node << 1,
+ Flag_is_Vector = Flag_is_pc_relative << 1,
+ _max_flags = (Flag_is_Vector << 1) - 1 // allow flags combination
+ };
+
+private:
+ jushort _class_id;
+ jushort _flags;
+
+protected:
+ // These methods should be called from constructors only.
+ void init_class_id(jushort c) {
+ assert(c <= _max_classes, "invalid node class");
+ _class_id = c; // cast out const
+ }
+ void init_flags(jushort fl) {
+ assert(fl <= _max_flags, "invalid node flag");
+ _flags |= fl;
+ }
+ void clear_flag(jushort fl) {
+ assert(fl <= _max_flags, "invalid node flag");
+ _flags &= ~fl;
+ }
+
+public:
+ const jushort class_id() const { return _class_id; }
+
+ const jushort flags() const { return _flags; }
+
+ // Return a dense integer opcode number
+ virtual int Opcode() const;
+
+ // Virtual inherited Node size
+ virtual uint size_of() const;
+
+ // Other interesting Node properties
+
+ // Special case: is_Call() returns true for both CallNode and MachCallNode.
+ bool is_Call() const {
+ return (_flags & Flag_is_Call) != 0;
+ }
+
+ CallNode *as_Call() const { // Only for CallNode (not for MachCallNode)
+ assert((_class_id & ClassMask_Call) == Class_Call, "invalid node class");
+ return (CallNode*)this;
+ }
+
+ #define DEFINE_CLASS_QUERY(type) \
+ bool is_##type() const { \
+ return ((_class_id & ClassMask_##type) == Class_##type); \
+ } \
+ type##Node *as_##type() const { \
+ assert(is_##type(), "invalid node class"); \
+ return (type##Node*)this; \
+ }
+
+ DEFINE_CLASS_QUERY(AbstractLock)
+ DEFINE_CLASS_QUERY(Add)
+ DEFINE_CLASS_QUERY(AddP)
+ DEFINE_CLASS_QUERY(Allocate)
+ DEFINE_CLASS_QUERY(AllocateArray)
+ DEFINE_CLASS_QUERY(Bool)
+ DEFINE_CLASS_QUERY(BoxLock)
+ DEFINE_CLASS_QUERY(CallDynamicJava)
+ DEFINE_CLASS_QUERY(CallJava)
+ DEFINE_CLASS_QUERY(CallLeaf)
+ DEFINE_CLASS_QUERY(CallRuntime)
+ DEFINE_CLASS_QUERY(CallStaticJava)
+ DEFINE_CLASS_QUERY(Catch)
+ DEFINE_CLASS_QUERY(CatchProj)
+ DEFINE_CLASS_QUERY(CheckCastPP)
+ DEFINE_CLASS_QUERY(ConstraintCast)
+ DEFINE_CLASS_QUERY(CMove)
+ DEFINE_CLASS_QUERY(Cmp)
+ DEFINE_CLASS_QUERY(CountedLoop)
+ DEFINE_CLASS_QUERY(CountedLoopEnd)
+ DEFINE_CLASS_QUERY(FastLock)
+ DEFINE_CLASS_QUERY(FastUnlock)
+ DEFINE_CLASS_QUERY(If)
+ DEFINE_CLASS_QUERY(IfFalse)
+ DEFINE_CLASS_QUERY(IfTrue)
+ DEFINE_CLASS_QUERY(Initialize)
+ DEFINE_CLASS_QUERY(Jump)
+ DEFINE_CLASS_QUERY(JumpProj)
+ DEFINE_CLASS_QUERY(Load)
+ DEFINE_CLASS_QUERY(LoadStore)
+ DEFINE_CLASS_QUERY(Lock)
+ DEFINE_CLASS_QUERY(Loop)
+ DEFINE_CLASS_QUERY(Mach)
+ DEFINE_CLASS_QUERY(MachCall)
+ DEFINE_CLASS_QUERY(MachCallDynamicJava)
+ DEFINE_CLASS_QUERY(MachCallJava)
+ DEFINE_CLASS_QUERY(MachCallLeaf)
+ DEFINE_CLASS_QUERY(MachCallRuntime)
+ DEFINE_CLASS_QUERY(MachCallStaticJava)
+ DEFINE_CLASS_QUERY(MachIf)
+ DEFINE_CLASS_QUERY(MachNullCheck)
+ DEFINE_CLASS_QUERY(MachReturn)
+ DEFINE_CLASS_QUERY(MachSafePoint)
+ DEFINE_CLASS_QUERY(MachSpillCopy)
+ DEFINE_CLASS_QUERY(MachTemp)
+ DEFINE_CLASS_QUERY(Mem)
+ DEFINE_CLASS_QUERY(MemBar)
+ DEFINE_CLASS_QUERY(MergeMem)
+ DEFINE_CLASS_QUERY(Mul)
+ DEFINE_CLASS_QUERY(Multi)
+ DEFINE_CLASS_QUERY(MultiBranch)
+ DEFINE_CLASS_QUERY(PCTable)
+ DEFINE_CLASS_QUERY(Phi)
+ DEFINE_CLASS_QUERY(Proj)
+ DEFINE_CLASS_QUERY(Region)
+ DEFINE_CLASS_QUERY(Root)
+ DEFINE_CLASS_QUERY(SafePoint)
+ DEFINE_CLASS_QUERY(Start)
+ DEFINE_CLASS_QUERY(Store)
+ DEFINE_CLASS_QUERY(Sub)
+ DEFINE_CLASS_QUERY(Type)
+ DEFINE_CLASS_QUERY(Unlock)
+
+ #undef DEFINE_CLASS_QUERY
+
+ // duplicate of is_MachSpillCopy()
+ bool is_SpillCopy () const {
+ return ((_class_id & ClassMask_MachSpillCopy) == Class_MachSpillCopy);
+ }
+
+ bool is_Con () const { return (_flags & Flag_is_Con) != 0; }
+ bool is_Goto() const { return (_flags & Flag_is_Goto) != 0; }
+ // The data node which is safe to leave in dead loop during IGVN optimization.
+ bool is_dead_loop_safe() const {
+ return is_Phi() || is_Proj() ||
+ (_flags & (Flag_is_dead_loop_safe | Flag_is_Con)) != 0;
+ }
+
+ // is_Copy() returns copied edge index (0 or 1)
+ uint is_Copy() const { return (_flags & Flag_is_Copy); }
+
+ virtual bool is_CFG() const { return false; }
+
+ // If this node is control-dependent on a test, can it be
+ // rerouted to a dominating equivalent test? This is usually
+ // true of non-CFG nodes, but can be false for operations which
+ // depend for their correct sequencing on more than one test.
+ // (In that case, hoisting to a dominating test may silently
+ // skip some other important test.)
+ virtual bool depends_only_on_test() const { assert(!is_CFG(), ""); return true; };
+
+ // defined for MachNodes that match 'If' | 'Goto' | 'CountedLoopEnd'
+ bool is_Branch() const { return (_flags & Flag_is_Branch) != 0; }
+
+ // When building basic blocks, I need to have a notion of block beginning
+ // Nodes, next block selector Nodes (block enders), and next block
+ // projections. These calls need to work on their machine equivalents. The
+ // Ideal beginning Nodes are RootNode, RegionNode and StartNode.
+ bool is_block_start() const {
+ if ( is_Region() )
+ return this == (const Node*)in(0);
+ else
+ return (_flags & Flag_is_block_start) != 0;
+ }
+
+ // The Ideal control projection Nodes are IfTrue/IfFalse, JumpProjNode, Root,
+ // Goto and Return. This call also returns the block ending Node.
+ virtual const Node *is_block_proj() const;
+
+ // The node is a "macro" node which needs to be expanded before matching
+ bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
+
+ // Value is a vector of primitive values
+ bool is_Vector() const { return (_flags & Flag_is_Vector) != 0; }
+
+//----------------- Optimization
+
+ // Get the worst-case Type output for this Node.
+ virtual const class Type *bottom_type() const;
+
+ // If we find a better type for a node, try to record it permanently.
+ // Return true if this node actually changed.
+ // Be sure to do the hash_delete game in the "rehash" variant.
+ void raise_bottom_type(const Type* new_type);
+
+ // Get the address type with which this node uses and/or defs memory,
+ // or NULL if none. The address type is conservatively wide.
+ // Returns non-null for calls, membars, loads, stores, etc.
+ // Returns TypePtr::BOTTOM if the node touches memory "broadly".
+ virtual const class TypePtr *adr_type() const { return NULL; }
+
+ // Return an existing node which computes the same function as this node.
+ // The optimistic combined algorithm requires this to return a Node which
+ // is a small number of steps away (e.g., one of my inputs).
+ virtual Node *Identity( PhaseTransform *phase );
+
+ // Return the set of values this Node can take on at runtime.
+ virtual const Type *Value( PhaseTransform *phase ) const;
+
+ // Return a node which is more "ideal" than the current node.
+ // The invariants on this call are subtle. If in doubt, read the
+ // treatise in node.cpp above the default implemention AND TEST WITH
+ // +VerifyIterativeGVN!
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+ // Some nodes have specific Ideal subgraph transformations only if they are
+ // unique users of specific nodes. Such nodes should be put on IGVN worklist
+ // for the transformations to happen.
+ bool has_special_unique_user() const;
+
+protected:
+ bool remove_dead_region(PhaseGVN *phase, bool can_reshape);
+public:
+
+ // Idealize graph, using DU info. Done after constant propagation
+ virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
+
+ // See if there is valid pipeline info
+ static const Pipeline *pipeline_class();
+ virtual const Pipeline *pipeline() const;
+
+ // Compute the latency from the def to this instruction of the ith input node
+ uint latency(uint i);
+
+ // Hash & compare functions, for pessimistic value numbering
+
+ // If the hash function returns the special sentinel value NO_HASH,
+ // the node is guaranteed never to compare equal to any other node.
+ // If we accidently generate a hash with value NO_HASH the node
+ // won't go into the table and we'll lose a little optimization.
+ enum { NO_HASH = 0 };
+ virtual uint hash() const;
+ virtual uint cmp( const Node &n ) const;
+
+ // Operation appears to be iteratively computed (such as an induction variable)
+ // It is possible for this operation to return false for a loop-varying
+ // value, if it appears (by local graph inspection) to be computed by a simple conditional.
+ bool is_iteratively_computed();
+
+ // Determine if a node is Counted loop induction variable.
+ // The method is defined in loopnode.cpp.
+ const Node* is_loop_iv() const;
+
+ // Return a node with opcode "opc" and same inputs as "this" if one can
+ // be found; Otherwise return NULL;
+ Node* find_similar(int opc);
+
+ // Return the unique control out if only one. Null if none or more than one.
+ Node* unique_ctrl_out();
+
+//----------------- Code Generation
+
+ // Ideal register class for Matching. Zero means unmatched instruction
+ // (these are cloned instead of converted to machine nodes).
+ virtual uint ideal_reg() const;
+
+ static const uint NotAMachineReg; // must be > max. machine register
+
+ // Do we Match on this edge index or not? Generally false for Control
+ // and true for everything else. Weird for calls & returns.
+ virtual uint match_edge(uint idx) const;
+
+ // Register class output is returned in
+ virtual const RegMask &out_RegMask() const;
+ // Register class input is expected in
+ virtual const RegMask &in_RegMask(uint) const;
+ // Should we clone rather than spill this instruction?
+ bool rematerialize() const;
+
+ // Return JVM State Object if this Node carries debug info, or NULL otherwise
+ virtual JVMState* jvms() const;
+
+ // Print as assembly
+ virtual void format( PhaseRegAlloc *, outputStream* st = tty ) const;
+ // Emit bytes starting at parameter 'ptr'
+ // Bump 'ptr' by the number of output bytes
+ virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+ // Size of instruction in bytes
+ virtual uint size(PhaseRegAlloc *ra_) const;
+
+ // Convenience function to extract an integer constant from a node.
+ // If it is not an integer constant (either Con, CastII, or Mach),
+ // return value_if_unknown.
+ jint find_int_con(jint value_if_unknown) const {
+ const TypeInt* t = find_int_type();
+ return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+ }
+ // Return the constant, knowing it is an integer constant already
+ jint get_int() const {
+ const TypeInt* t = find_int_type();
+ guarantee(t != NULL, "must be con");
+ return t->get_con();
+ }
+ // Here's where the work is done. Can produce non-constant int types too.
+ const TypeInt* find_int_type() const;
+
+ // Same thing for long (and intptr_t, via type.hpp):
+ jlong get_long() const {
+ const TypeLong* t = find_long_type();
+ guarantee(t != NULL, "must be con");
+ return t->get_con();
+ }
+ jlong find_long_con(jint value_if_unknown) const {
+ const TypeLong* t = find_long_type();
+ return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+ }
+ const TypeLong* find_long_type() const;
+
+ // These guys are called by code generated by ADLC:
+ intptr_t get_ptr() const;
+ jdouble getd() const;
+ jfloat getf() const;
+
+ // Nodes which are pinned into basic blocks
+ virtual bool pinned() const { return false; }
+
+ // Nodes which use memory without consuming it, hence need antidependences
+ // More specifically, needs_anti_dependence_check returns true iff the node
+ // (a) does a load, and (b) does not perform a store (except perhaps to a
+ // stack slot or some other unaliased location).
+ bool needs_anti_dependence_check() const;
+
+ // Return which operand this instruction may cisc-spill. In other words,
+ // return operand position that can convert from reg to memory access
+ virtual int cisc_operand() const { return AdlcVMDeps::Not_cisc_spillable; }
+ bool is_cisc_alternate() const { return (_flags & Flag_is_cisc_alternate) != 0; }
+
+//----------------- Graph walking
+public:
+ // Walk and apply member functions recursively.
+ // Supplied (this) pointer is root.
+ void walk(NFunc pre, NFunc post, void *env);
+ static void nop(Node &, void*); // Dummy empty function
+ static void packregion( Node &n, void* );
+private:
+ void walk_(NFunc pre, NFunc post, void *env, VectorSet &visited);
+
+//----------------- Printing, etc
+public:
+#ifndef PRODUCT
+ Node* find(int idx) const; // Search the graph for the given idx.
+ Node* find_ctrl(int idx) const; // Search control ancestors for the given idx.
+ void dump() const; // Print this node,
+ void dump(int depth) const; // Print this node, recursively to depth d
+ void dump_ctrl(int depth) const; // Print control nodes, to depth d
+ virtual void dump_req() const; // Print required-edge info
+ virtual void dump_prec() const; // Print precedence-edge info
+ virtual void dump_out() const; // Print the output edge info
+ virtual void dump_spec(outputStream *st) const {}; // Print per-node info
+ void verify_edges(Unique_Node_List &visited); // Verify bi-directional edges
+ void verify() const; // Check Def-Use info for my subgraph
+ static void verify_recur(const Node *n, int verify_depth, VectorSet &old_space, VectorSet &new_space);
+
+ // This call defines a class-unique string used to identify class instances
+ virtual const char *Name() const;
+
+ void dump_format(PhaseRegAlloc *ra) const; // debug access to MachNode::format(...)
+ // RegMask Print Functions
+ void dump_in_regmask(int idx) { in_RegMask(idx).dump(); }
+ void dump_out_regmask() { out_RegMask().dump(); }
+ static int _in_dump_cnt;
+ static bool in_dump() { return _in_dump_cnt > 0; }
+ void fast_dump() const {
+ tty->print("%4d: %-17s", _idx, Name());
+ for (uint i = 0; i < len(); i++)
+ if (in(i))
+ tty->print(" %4d", in(i)->_idx);
+ else
+ tty->print(" NULL");
+ tty->print("\n");
+ }
+#endif
+#ifdef ASSERT
+ void verify_construction();
+ bool verify_jvms(const JVMState* jvms) const;
+ int _debug_idx; // Unique value assigned to every node.
+ int debug_idx() const { return _debug_idx; }
+ void set_debug_idx( int debug_idx ) { _debug_idx = debug_idx; }
+
+ Node* _debug_orig; // Original version of this, if any.
+ Node* debug_orig() const { return _debug_orig; }
+ void set_debug_orig(Node* orig); // _debug_orig = orig
+
+ int _hash_lock; // Barrier to modifications of nodes in the hash table
+ void enter_hash_lock() { ++_hash_lock; assert(_hash_lock < 99, "in too many hash tables?"); }
+ void exit_hash_lock() { --_hash_lock; assert(_hash_lock >= 0, "mispaired hash locks"); }
+
+ static void init_NodeProperty();
+
+ #if OPTO_DU_ITERATOR_ASSERT
+ const Node* _last_del; // The last deleted node.
+ uint _del_tick; // Bumped when a deletion happens..
+ #endif
+#endif
+};
+
+//-----------------------------------------------------------------------------
+// Iterators over DU info, and associated Node functions.
+
+#if OPTO_DU_ITERATOR_ASSERT
+
+// Common code for assertion checking on DU iterators.
+class DUIterator_Common VALUE_OBJ_CLASS_SPEC {
+#ifdef ASSERT
+ protected:
+ bool _vdui; // cached value of VerifyDUIterators
+ const Node* _node; // the node containing the _out array
+ uint _outcnt; // cached node->_outcnt
+ uint _del_tick; // cached node->_del_tick
+ Node* _last; // last value produced by the iterator
+
+ void sample(const Node* node); // used by c'tor to set up for verifies
+ void verify(const Node* node, bool at_end_ok = false);
+ void verify_resync();
+ void reset(const DUIterator_Common& that);
+
+// The VDUI_ONLY macro protects code conditionalized on VerifyDUIterators
+ #define I_VDUI_ONLY(i,x) { if ((i)._vdui) { x; } }
+#else
+ #define I_VDUI_ONLY(i,x) { }
+#endif //ASSERT
+};
+
+#define VDUI_ONLY(x) I_VDUI_ONLY(*this, x)
+
+// Default DU iterator. Allows appends onto the out array.
+// Allows deletion from the out array only at the current point.
+// Usage:
+// for (DUIterator i = x->outs(); x->has_out(i); i++) {
+// Node* y = x->out(i);
+// ...
+// }
+// Compiles in product mode to a unsigned integer index, which indexes
+// onto a repeatedly reloaded base pointer of x->_out. The loop predicate
+// also reloads x->_outcnt. If you delete, you must perform "--i" just
+// before continuing the loop. You must delete only the last-produced
+// edge. You must delete only a single copy of the last-produced edge,
+// or else you must delete all copies at once (the first time the edge
+// is produced by the iterator).
+class DUIterator : public DUIterator_Common {
+ friend class Node;
+
+ // This is the index which provides the product-mode behavior.
+ // Whatever the product-mode version of the system does to the
+ // DUI index is done to this index. All other fields in
+ // this class are used only for assertion checking.
+ uint _idx;
+
+ #ifdef ASSERT
+ uint _refresh_tick; // Records the refresh activity.
+
+ void sample(const Node* node); // Initialize _refresh_tick etc.
+ void verify(const Node* node, bool at_end_ok = false);
+ void verify_increment(); // Verify an increment operation.
+ void verify_resync(); // Verify that we can back up over a deletion.
+ void verify_finish(); // Verify that the loop terminated properly.
+ void refresh(); // Resample verification info.
+ void reset(const DUIterator& that); // Resample after assignment.
+ #endif
+
+ DUIterator(const Node* node, int dummy_to_avoid_conversion)
+ { _idx = 0; debug_only(sample(node)); }
+
+ public:
+ // initialize to garbage; clear _vdui to disable asserts
+ DUIterator()
+ { /*initialize to garbage*/ debug_only(_vdui = false); }
+
+ void operator++(int dummy_to_specify_postfix_op)
+ { _idx++; VDUI_ONLY(verify_increment()); }
+
+ void operator--()
+ { VDUI_ONLY(verify_resync()); --_idx; }
+
+ ~DUIterator()
+ { VDUI_ONLY(verify_finish()); }
+
+ void operator=(const DUIterator& that)
+ { _idx = that._idx; debug_only(reset(that)); }
+};
+
+DUIterator Node::outs() const
+ { return DUIterator(this, 0); }
+DUIterator& Node::refresh_out_pos(DUIterator& i) const
+ { I_VDUI_ONLY(i, i.refresh()); return i; }
+bool Node::has_out(DUIterator& i) const
+ { I_VDUI_ONLY(i, i.verify(this,true));return i._idx < _outcnt; }
+Node* Node::out(DUIterator& i) const
+ { I_VDUI_ONLY(i, i.verify(this)); return debug_only(i._last=) _out[i._idx]; }
+
+
+// Faster DU iterator. Disallows insertions into the out array.
+// Allows deletion from the out array only at the current point.
+// Usage:
+// for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
+// Node* y = x->fast_out(i);
+// ...
+// }
+// Compiles in product mode to raw Node** pointer arithmetic, with
+// no reloading of pointers from the original node x. If you delete,
+// you must perform "--i; --imax" just before continuing the loop.
+// If you delete multiple copies of the same edge, you must decrement
+// imax, but not i, multiple times: "--i, imax -= num_edges".
+class DUIterator_Fast : public DUIterator_Common {
+ friend class Node;
+ friend class DUIterator_Last;
+
+ // This is the pointer which provides the product-mode behavior.
+ // Whatever the product-mode version of the system does to the
+ // DUI pointer is done to this pointer. All other fields in
+ // this class are used only for assertion checking.
+ Node** _outp;
+
+ #ifdef ASSERT
+ void verify(const Node* node, bool at_end_ok = false);
+ void verify_limit();
+ void verify_resync();
+ void verify_relimit(uint n);
+ void reset(const DUIterator_Fast& that);
+ #endif
+
+ // Note: offset must be signed, since -1 is sometimes passed
+ DUIterator_Fast(const Node* node, ptrdiff_t offset)
+ { _outp = node->_out + offset; debug_only(sample(node)); }
+
+ public:
+ // initialize to garbage; clear _vdui to disable asserts
+ DUIterator_Fast()
+ { /*initialize to garbage*/ debug_only(_vdui = false); }
+
+ void operator++(int dummy_to_specify_postfix_op)
+ { _outp++; VDUI_ONLY(verify(_node, true)); }
+
+ void operator--()
+ { VDUI_ONLY(verify_resync()); --_outp; }
+
+ void operator-=(uint n) // applied to the limit only
+ { _outp -= n; VDUI_ONLY(verify_relimit(n)); }
+
+ bool operator<(DUIterator_Fast& limit) {
+ I_VDUI_ONLY(*this, this->verify(_node, true));
+ I_VDUI_ONLY(limit, limit.verify_limit());
+ return _outp < limit._outp;
+ }
+
+ void operator=(const DUIterator_Fast& that)
+ { _outp = that._outp; debug_only(reset(that)); }
+};
+
+DUIterator_Fast Node::fast_outs(DUIterator_Fast& imax) const {
+ // Assign a limit pointer to the reference argument:
+ imax = DUIterator_Fast(this, (ptrdiff_t)_outcnt);
+ // Return the base pointer:
+ return DUIterator_Fast(this, 0);
+}
+Node* Node::fast_out(DUIterator_Fast& i) const {
+ I_VDUI_ONLY(i, i.verify(this));
+ return debug_only(i._last=) *i._outp;
+}
+
+
+// Faster DU iterator. Requires each successive edge to be removed.
+// Does not allow insertion of any edges.
+// Usage:
+// for (DUIterator_Last imin, i = x->last_outs(imin); i >= imin; i -= num_edges) {
+// Node* y = x->last_out(i);
+// ...
+// }
+// Compiles in product mode to raw Node** pointer arithmetic, with
+// no reloading of pointers from the original node x.
+class DUIterator_Last : private DUIterator_Fast {
+ friend class Node;
+
+ #ifdef ASSERT
+ void verify(const Node* node, bool at_end_ok = false);
+ void verify_limit();
+ void verify_step(uint num_edges);
+ #endif
+
+ // Note: offset must be signed, since -1 is sometimes passed
+ DUIterator_Last(const Node* node, ptrdiff_t offset)
+ : DUIterator_Fast(node, offset) { }
+
+ void operator++(int dummy_to_specify_postfix_op) {} // do not use
+ void operator<(int) {} // do not use
+
+ public:
+ DUIterator_Last() { }
+ // initialize to garbage
+
+ void operator--()
+ { _outp--; VDUI_ONLY(verify_step(1)); }
+
+ void operator-=(uint n)
+ { _outp -= n; VDUI_ONLY(verify_step(n)); }
+
+ bool operator>=(DUIterator_Last& limit) {
+ I_VDUI_ONLY(*this, this->verify(_node, true));
+ I_VDUI_ONLY(limit, limit.verify_limit());
+ return _outp >= limit._outp;
+ }
+
+ void operator=(const DUIterator_Last& that)
+ { DUIterator_Fast::operator=(that); }
+};
+
+DUIterator_Last Node::last_outs(DUIterator_Last& imin) const {
+ // Assign a limit pointer to the reference argument:
+ imin = DUIterator_Last(this, 0);
+ // Return the initial pointer:
+ return DUIterator_Last(this, (ptrdiff_t)_outcnt - 1);
+}
+Node* Node::last_out(DUIterator_Last& i) const {
+ I_VDUI_ONLY(i, i.verify(this));
+ return debug_only(i._last=) *i._outp;
+}
+
+#endif //OPTO_DU_ITERATOR_ASSERT
+
+#undef I_VDUI_ONLY
+#undef VDUI_ONLY
+
+
+//-----------------------------------------------------------------------------
+// Map dense integer indices to Nodes. Uses classic doubling-array trick.
+// Abstractly provides an infinite array of Node*'s, initialized to NULL.
+// Note that the constructor just zeros things, and since I use Arena
+// allocation I do not need a destructor to reclaim storage.
+class Node_Array : public ResourceObj {
+protected:
+ Arena *_a; // Arena to allocate in
+ uint _max;
+ Node **_nodes;
+ void grow( uint i ); // Grow array node to fit
+public:
+ Node_Array(Arena *a) : _a(a), _max(OptoNodeListSize) {
+ _nodes = NEW_ARENA_ARRAY( a, Node *, OptoNodeListSize );
+ for( int i = 0; i < OptoNodeListSize; i++ ) {
+ _nodes[i] = NULL;
+ }
+ }
+
+ Node_Array(Node_Array *na) : _a(na->_a), _max(na->_max), _nodes(na->_nodes) {}
+ Node *operator[] ( uint i ) const // Lookup, or NULL for not mapped
+ { return (i<_max) ? _nodes[i] : (Node*)NULL; }
+ Node *at( uint i ) const { assert(i<_max,"oob"); return _nodes[i]; }
+ Node **adr() { return _nodes; }
+ // Extend the mapping: index i maps to Node *n.
+ void map( uint i, Node *n ) { if( i>=_max ) grow(i); _nodes[i] = n; }
+ void insert( uint i, Node *n );
+ void remove( uint i ); // Remove, preserving order
+ void sort( C_sort_func_t func);
+ void reset( Arena *new_a ); // Zap mapping to empty; reclaim storage
+ void clear(); // Set all entries to NULL, keep storage
+ uint Size() const { return _max; }
+ void dump() const;
+};
+
+class Node_List : public Node_Array {
+ uint _cnt;
+public:
+ Node_List() : Node_Array(Thread::current()->resource_area()), _cnt(0) {}
+ Node_List(Arena *a) : Node_Array(a), _cnt(0) {}
+ void insert( uint i, Node *n ) { Node_Array::insert(i,n); _cnt++; }
+ void remove( uint i ) { Node_Array::remove(i); _cnt--; }
+ void push( Node *b ) { map(_cnt++,b); }
+ void yank( Node *n ); // Find and remove
+ Node *pop() { return _nodes[--_cnt]; }
+ Node *rpop() { Node *b = _nodes[0]; _nodes[0]=_nodes[--_cnt]; return b;}
+ void clear() { _cnt = 0; Node_Array::clear(); } // retain storage
+ uint size() const { return _cnt; }
+ void dump() const;
+};
+
+//------------------------------Unique_Node_List-------------------------------
+class Unique_Node_List : public Node_List {
+ VectorSet _in_worklist;
+ uint _clock_index; // Index in list where to pop from next
+public:
+ Unique_Node_List() : Node_List(), _in_worklist(Thread::current()->resource_area()), _clock_index(0) {}
+ Unique_Node_List(Arena *a) : Node_List(a), _in_worklist(a), _clock_index(0) {}
+
+ void remove( Node *n );
+ bool member( Node *n ) { return _in_worklist.test(n->_idx) != 0; }
+ VectorSet &member_set(){ return _in_worklist; }
+
+ void push( Node *b ) {
+ if( !_in_worklist.test_set(b->_idx) )
+ Node_List::push(b);
+ }
+ Node *pop() {
+ if( _clock_index >= size() ) _clock_index = 0;
+ Node *b = at(_clock_index);
+ map( _clock_index++, Node_List::pop());
+ _in_worklist >>= b->_idx;
+ return b;
+ }
+ Node *remove( uint i ) {
+ Node *b = Node_List::at(i);
+ _in_worklist >>= b->_idx;
+ map(i,Node_List::pop());
+ return b;
+ }
+ void yank( Node *n ) { _in_worklist >>= n->_idx; Node_List::yank(n); }
+ void clear() {
+ _in_worklist.Clear(); // Discards storage but grows automatically
+ Node_List::clear();
+ _clock_index = 0;
+ }
+
+ // Used after parsing to remove useless nodes before Iterative GVN
+ void remove_useless_nodes(VectorSet &useful);
+
+#ifndef PRODUCT
+ void print_set() const { _in_worklist.print(); }
+#endif
+};
+
+// Inline definition of Compile::record_for_igvn must be deferred to this point.
+inline void Compile::record_for_igvn(Node* n) {
+ _for_igvn->push(n);
+ record_for_escape_analysis(n);
+}
+
+//------------------------------Node_Stack-------------------------------------
+class Node_Stack {
+protected:
+ struct INode {
+ Node *node; // Processed node
+ uint indx; // Index of next node's child
+ };
+ INode *_inode_top; // tos, stack grows up
+ INode *_inode_max; // End of _inodes == _inodes + _max
+ INode *_inodes; // Array storage for the stack
+ Arena *_a; // Arena to allocate in
+ void grow();
+public:
+ Node_Stack(int size) {
+ size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize;
+ _a = Thread::current()->resource_area();
+ _inodes = NEW_ARENA_ARRAY( _a, INode, max );
+ _inode_max = _inodes + max;
+ _inode_top = _inodes - 1; // stack is empty
+ }
+
+ Node_Stack(Arena *a, int size) : _a(a) {
+ size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize;
+ _inodes = NEW_ARENA_ARRAY( _a, INode, max );
+ _inode_max = _inodes + max;
+ _inode_top = _inodes - 1; // stack is empty
+ }
+
+ void pop() {
+ assert(_inode_top >= _inodes, "node stack underflow");
+ --_inode_top;
+ }
+ void push(Node *n, uint i) {
+ ++_inode_top;
+ if (_inode_top >= _inode_max) grow();
+ INode *top = _inode_top; // optimization
+ top->node = n;
+ top->indx = i;
+ }
+ Node *node() const {
+ return _inode_top->node;
+ }
+ Node* node_at(uint i) const {
+ assert(_inodes + i <= _inode_top, "in range");
+ return _inodes[i].node;
+ }
+ uint index() const {
+ return _inode_top->indx;
+ }
+ void set_node(Node *n) {
+ _inode_top->node = n;
+ }
+ void set_index(uint i) {
+ _inode_top->indx = i;
+ }
+ uint size_max() const { return (uint)pointer_delta(_inode_max, _inodes, sizeof(INode)); } // Max size
+ uint size() const { return (uint)pointer_delta(_inode_top, _inodes, sizeof(INode)) + 1; } // Current size
+ bool is_nonempty() const { return (_inode_top >= _inodes); }
+ bool is_empty() const { return (_inode_top < _inodes); }
+ void clear() { _inode_top = _inodes - 1; } // retain storage
+};
+
+
+//-----------------------------Node_Notes--------------------------------------
+// Debugging or profiling annotations loosely and sparsely associated
+// with some nodes. See Compile::node_notes_at for the accessor.
+class Node_Notes VALUE_OBJ_CLASS_SPEC {
+ JVMState* _jvms;
+
+public:
+ Node_Notes(JVMState* jvms = NULL) {
+ _jvms = jvms;
+ }
+
+ JVMState* jvms() { return _jvms; }
+ void set_jvms(JVMState* x) { _jvms = x; }
+
+ // True if there is nothing here.
+ bool is_clear() {
+ return (_jvms == NULL);
+ }
+
+ // Make there be nothing here.
+ void clear() {
+ _jvms = NULL;
+ }
+
+ // Make a new, clean node notes.
+ static Node_Notes* make(Compile* C) {
+ Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1);
+ nn->clear();
+ return nn;
+ }
+
+ Node_Notes* clone(Compile* C) {
+ Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1);
+ (*nn) = (*this);
+ return nn;
+ }
+
+ // Absorb any information from source.
+ bool update_from(Node_Notes* source) {
+ bool changed = false;
+ if (source != NULL) {
+ if (source->jvms() != NULL) {
+ set_jvms(source->jvms());
+ changed = true;
+ }
+ }
+ return changed;
+ }
+};
+
+// Inlined accessors for Compile::node_nodes that require the preceding class:
+inline Node_Notes*
+Compile::locate_node_notes(GrowableArray<Node_Notes*>* arr,
+ int idx, bool can_grow) {
+ assert(idx >= 0, "oob");
+ int block_idx = (idx >> _log2_node_notes_block_size);
+ int grow_by = (block_idx - (arr == NULL? 0: arr->length()));
+ if (grow_by >= 0) {
+ if (!can_grow) return NULL;
+ grow_node_notes(arr, grow_by + 1);
+ }
+ // (Every element of arr is a sub-array of length _node_notes_block_size.)
+ return arr->at(block_idx) + (idx & (_node_notes_block_size-1));
+}
+
+inline bool
+Compile::set_node_notes_at(int idx, Node_Notes* value) {
+ if (value == NULL || value->is_clear())
+ return false; // nothing to write => write nothing
+ Node_Notes* loc = locate_node_notes(_node_note_array, idx, true);
+ assert(loc != NULL, "");
+ return loc->update_from(value);
+}
+
+
+//------------------------------TypeNode---------------------------------------
+// Node with a Type constant.
+class TypeNode : public Node {
+protected:
+ virtual uint hash() const; // Check the type
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const; // Size is bigger
+ const Type* const _type;
+public:
+ void set_type(const Type* t) {
+ assert(t != NULL, "sanity");
+ debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
+ *(const Type**)&_type = t; // cast away const-ness
+ // If this node is in the hash table, make sure it doesn't need a rehash.
+ assert(check_hash == NO_HASH || check_hash == hash(), "type change must preserve hash code");
+ }
+ const Type* type() const { assert(_type != NULL, "sanity"); return _type; };
+ TypeNode( const Type *t, uint required ) : Node(required), _type(t) {
+ init_class_id(Class_Type);
+ }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const;
+ virtual uint ideal_reg() const;
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
diff --git a/src/share/vm/opto/opcodes.cpp b/src/share/vm/opto/opcodes.cpp
new file mode 100644
index 000000000..533cff06c
--- /dev/null
+++ b/src/share/vm/opto/opcodes.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// ----------------------------------------------------------------------------
+// Build a table of class names as strings. Used both for debugging printouts
+// and in the ADL machine descriptions.
+#define macro(x) #x,
+const char *NodeClassNames[] = {
+ "Node",
+ "Set",
+ "RegI",
+ "RegP",
+ "RegF",
+ "RegD",
+ "RegL",
+ "RegFlags",
+ "_last_machine_leaf",
+#include "classes.hpp"
+ "_last_class_name",
+};
+#undef macro
diff --git a/src/share/vm/opto/opcodes.hpp b/src/share/vm/opto/opcodes.hpp
new file mode 100644
index 000000000..7c3e38a15
--- /dev/null
+++ b/src/share/vm/opto/opcodes.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright 1997-2003 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Build a big enum of class names to give them dense integer indices
+#define macro(x) Op_##x,
+enum Opcodes {
+ Op_Node = 0,
+ macro(Set) // Instruction selection match rule
+ macro(RegI) // Machine integer register
+ macro(RegP) // Machine pointer register
+ macro(RegF) // Machine float register
+ macro(RegD) // Machine double register
+ macro(RegL) // Machine long register
+ macro(RegFlags) // Machine flags register
+ _last_machine_leaf, // Split between regular opcodes and machine
+#include "classes.hpp"
+ _last_opcode
+};
+#undef macro
+
+// Table of names, indexed by Opcode
+extern const char *NodeClassNames[];
diff --git a/src/share/vm/opto/optoreg.hpp b/src/share/vm/opto/optoreg.hpp
new file mode 100644
index 000000000..68a2df2cd
--- /dev/null
+++ b/src/share/vm/opto/optoreg.hpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2006-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------OptoReg----------------------------------------
+// We eventually need Registers for the Real World. Registers are essentially
+// non-SSA names. A Register is represented as a number. Non-regular values
+// (e.g., Control, Memory, I/O) use the Special register. The actual machine
+// registers (as described in the ADL file for a machine) start at zero.
+// Stack-slots (spill locations) start at the nest Chunk past the last machine
+// register.
+//
+// Note that stack spill-slots are treated as a very large register set.
+// They have all the correct properties for a Register: not aliased (unique
+// named). There is some simple mapping from a stack-slot register number
+// to the actual location on the stack; this mapping depends on the calling
+// conventions and is described in the ADL.
+//
+// Note that Name is not enum. C++ standard defines that the range of enum
+// is the range of smallest bit-field that can represent all enumerators
+// declared in the enum. The result of assigning a value to enum is undefined
+// if the value is outside the enumeration's valid range. OptoReg::Name is
+// typedef'ed as int, because it needs to be able to represent spill-slots.
+//
+class OptoReg VALUE_OBJ_CLASS_SPEC {
+
+ friend class C2Compiler;
+ public:
+ typedef int Name;
+ enum {
+ // Chunk 0
+ Physical = AdlcVMDeps::Physical, // Start of physical regs
+ // A few oddballs at the edge of the world
+ Special = -2, // All special (not allocated) values
+ Bad = -1 // Not a register
+ };
+
+ private:
+
+ static const VMReg opto2vm[REG_COUNT];
+ static Name vm2opto[ConcreteRegisterImpl::number_of_registers];
+
+ public:
+
+ // Stack pointer register
+ static OptoReg::Name c_frame_pointer;
+
+
+
+ // Increment a register number. As in:
+ // "for ( OptoReg::Name i; i=Control; i = add(i,1) ) ..."
+ static Name add( Name x, int y ) { return Name(x+y); }
+
+ // (We would like to have an operator+ for RegName, but it is not
+ // a class, so this would be illegal in C++.)
+
+ static void dump( int );
+
+ // Get the stack slot number of an OptoReg::Name
+ static unsigned int reg2stack( OptoReg::Name r) {
+ assert( r >= stack0(), " must be");
+ return r - stack0();
+ }
+
+ // convert a stack slot number into an OptoReg::Name
+ static OptoReg::Name stack2reg( int idx) {
+ return Name(stack0() + idx);
+ }
+
+ static bool is_stack(Name n) {
+ return n >= stack0();
+ }
+
+ static bool is_valid(Name n) {
+ return (n != Bad);
+ }
+
+ static bool is_reg(Name n) {
+ return is_valid(n) && !is_stack(n);
+ }
+
+ static VMReg as_VMReg(OptoReg::Name n) {
+ if (is_reg(n)) {
+ // Must use table, it'd be nice if Bad was indexable...
+ return opto2vm[n];
+ } else {
+ assert(!is_stack(n), "must un warp");
+ return VMRegImpl::Bad();
+ }
+ }
+
+ // Can un-warp a stack slot or convert a register or Bad
+ static VMReg as_VMReg(OptoReg::Name n, int frame_size, int arg_count) {
+ if (is_reg(n)) {
+ // Must use table, it'd be nice if Bad was indexable...
+ return opto2vm[n];
+ } else if (is_stack(n)) {
+ int stack_slot = reg2stack(n);
+ if (stack_slot < arg_count) {
+ return VMRegImpl::stack2reg(stack_slot + frame_size);
+ }
+ return VMRegImpl::stack2reg(stack_slot - arg_count);
+ // return return VMRegImpl::stack2reg(reg2stack(OptoReg::add(n, -arg_count)));
+ } else {
+ return VMRegImpl::Bad();
+ }
+ }
+
+ static OptoReg::Name as_OptoReg(VMReg r) {
+ if (r->is_stack()) {
+ assert(false, "must warp");
+ return stack2reg(r->reg2stack());
+ } else if (r->is_valid()) {
+ // Must use table, it'd be nice if Bad was indexable...
+ return vm2opto[r->value()];
+ } else {
+ return Bad;
+ }
+ }
+
+ static OptoReg::Name stack0() {
+ return VMRegImpl::stack0->value();
+ }
+
+ static const char* regname(OptoReg::Name n) {
+ return as_VMReg(n)->name();
+ }
+
+};
+
+//---------------------------OptoRegPair-------------------------------------------
+// Pairs of 32-bit registers for the allocator.
+// This is a very similar class to VMRegPair. C2 only interfaces with VMRegPair
+// via the calling convention code which is shared between the compilers.
+// Since C2 uses OptoRegs for register allocation it is more efficient to use
+// VMRegPair internally for nodes that can contain a pair of OptoRegs rather
+// than use VMRegPair and continually be converting back and forth. So normally
+// C2 will take in a VMRegPair from the calling convention code and immediately
+// convert them to an OptoRegPair and stay in the OptoReg world. The only over
+// conversion between OptoRegs and VMRegs is for debug info and oopMaps. This
+// is not a high bandwidth spot and so it is not an issue.
+// Note that onde other consequence of staying in the OptoReg world with OptoRegPairs
+// is that there are "physical" OptoRegs that are not representable in the VMReg
+// world, notably flags. [ But by design there is "space" in the VMReg world
+// for such registers they just may not be concrete ]. So if we were to use VMRegPair
+// then the VMReg world would have to have a representation for these registers
+// so that a OptoReg->VMReg->OptoReg would reproduce ther original OptoReg. As it
+// stands if you convert a flag (condition code) to a VMReg you will get VMRegImpl::Bad
+// and converting that will return OptoReg::Bad losing the identity of the OptoReg.
+
+class OptoRegPair {
+private:
+ short _second;
+ short _first;
+public:
+ void set_bad ( ) { _second = OptoReg::Bad; _first = OptoReg::Bad; }
+ void set1 ( OptoReg::Name n ) { _second = OptoReg::Bad; _first = n; }
+ void set2 ( OptoReg::Name n ) { _second = n + 1; _first = n; }
+ void set_pair( OptoReg::Name second, OptoReg::Name first ) { _second= second; _first= first; }
+ void set_ptr ( OptoReg::Name ptr ) {
+#ifdef _LP64
+ _second = ptr+1;
+#else
+ _second = OptoReg::Bad;
+#endif
+ _first = ptr;
+ }
+
+ OptoReg::Name second() const { return _second; }
+ OptoReg::Name first() const { return _first; }
+ OptoRegPair(OptoReg::Name second, OptoReg::Name first) { _second = second; _first = first; }
+ OptoRegPair(OptoReg::Name f) { _second = OptoReg::Bad; _first = f; }
+ OptoRegPair() { _second = OptoReg::Bad; _first = OptoReg::Bad; }
+};
diff --git a/src/share/vm/opto/output.cpp b/src/share/vm/opto/output.cpp
new file mode 100644
index 000000000..7868ec237
--- /dev/null
+++ b/src/share/vm/opto/output.cpp
@@ -0,0 +1,2680 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_output.cpp.incl"
+
+extern uint size_java_to_interp();
+extern uint reloc_java_to_interp();
+extern uint size_exception_handler();
+extern uint size_deopt_handler();
+
+#ifndef PRODUCT
+#define DEBUG_ARG(x) , x
+#else
+#define DEBUG_ARG(x)
+#endif
+
+extern int emit_exception_handler(CodeBuffer &cbuf);
+extern int emit_deopt_handler(CodeBuffer &cbuf);
+
+//------------------------------Output-----------------------------------------
+// Convert Nodes to instruction bits and pass off to the VM
+void Compile::Output() {
+ // RootNode goes
+ assert( _cfg->_broot->_nodes.size() == 0, "" );
+
+ // Initialize the space for the BufferBlob used to find and verify
+ // instruction size in MachNode::emit_size()
+ init_scratch_buffer_blob();
+
+ // Make sure I can find the Start Node
+ Block_Array& bbs = _cfg->_bbs;
+ Block *entry = _cfg->_blocks[1];
+ Block *broot = _cfg->_broot;
+
+ const StartNode *start = entry->_nodes[0]->as_Start();
+
+ // Replace StartNode with prolog
+ MachPrologNode *prolog = new (this) MachPrologNode();
+ entry->_nodes.map( 0, prolog );
+ bbs.map( prolog->_idx, entry );
+ bbs.map( start->_idx, NULL ); // start is no longer in any block
+
+ // Virtual methods need an unverified entry point
+
+ if( is_osr_compilation() ) {
+ if( PoisonOSREntry ) {
+ // TODO: Should use a ShouldNotReachHereNode...
+ _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
+ }
+ } else {
+ if( _method && !_method->flags().is_static() ) {
+ // Insert unvalidated entry point
+ _cfg->insert( broot, 0, new (this) MachUEPNode() );
+ }
+
+ }
+
+
+ // Break before main entry point
+ if( (_method && _method->break_at_execute())
+#ifndef PRODUCT
+ ||(OptoBreakpoint && is_method_compilation())
+ ||(OptoBreakpointOSR && is_osr_compilation())
+ ||(OptoBreakpointC2R && !_method)
+#endif
+ ) {
+ // checking for _method means that OptoBreakpoint does not apply to
+ // runtime stubs or frame converters
+ _cfg->insert( entry, 1, new (this) MachBreakpointNode() );
+ }
+
+ // Insert epilogs before every return
+ for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+ Block *b = _cfg->_blocks[i];
+ if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
+ Node *m = b->end();
+ if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
+ MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
+ b->add_inst( epilog );
+ bbs.map(epilog->_idx, b);
+ //_regalloc->set_bad(epilog->_idx); // Already initialized this way.
+ }
+ }
+ }
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ if ( ZapDeadCompiledLocals ) Insert_zap_nodes();
+# endif
+
+ ScheduleAndBundle();
+
+#ifndef PRODUCT
+ if (trace_opto_output()) {
+ tty->print("\n---- After ScheduleAndBundle ----\n");
+ for (uint i = 0; i < _cfg->_num_blocks; i++) {
+ tty->print("\nBB#%03d:\n", i);
+ Block *bb = _cfg->_blocks[i];
+ for (uint j = 0; j < bb->_nodes.size(); j++) {
+ Node *n = bb->_nodes[j];
+ OptoReg::Name reg = _regalloc->get_reg_first(n);
+ tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
+ n->dump();
+ }
+ }
+ }
+#endif
+
+ if (failing()) return;
+
+ BuildOopMaps();
+
+ if (failing()) return;
+
+ Fill_buffer();
+}
+
+bool Compile::need_stack_bang(int frame_size_in_bytes) const {
+ // Determine if we need to generate a stack overflow check.
+ // Do it if the method is not a stub function and
+ // has java calls or has frame size > vm_page_size/8.
+ return (stub_function() == NULL &&
+ (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3));
+}
+
+bool Compile::need_register_stack_bang() const {
+ // Determine if we need to generate a register stack overflow check.
+ // This is only used on architectures which have split register
+ // and memory stacks (ie. IA64).
+ // Bang if the method is not a stub function and has java calls
+ return (stub_function() == NULL && has_java_calls());
+}
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+
+
+// In order to catch compiler oop-map bugs, we have implemented
+// a debugging mode called ZapDeadCompilerLocals.
+// This mode causes the compiler to insert a call to a runtime routine,
+// "zap_dead_locals", right before each place in compiled code
+// that could potentially be a gc-point (i.e., a safepoint or oop map point).
+// The runtime routine checks that locations mapped as oops are really
+// oops, that locations mapped as values do not look like oops,
+// and that locations mapped as dead are not used later
+// (by zapping them to an invalid address).
+
+int Compile::_CompiledZap_count = 0;
+
+void Compile::Insert_zap_nodes() {
+ bool skip = false;
+
+
+ // Dink with static counts because code code without the extra
+ // runtime calls is MUCH faster for debugging purposes
+
+ if ( CompileZapFirst == 0 ) ; // nothing special
+ else if ( CompileZapFirst > CompiledZap_count() ) skip = true;
+ else if ( CompileZapFirst == CompiledZap_count() )
+ warning("starting zap compilation after skipping");
+
+ if ( CompileZapLast == -1 ) ; // nothing special
+ else if ( CompileZapLast < CompiledZap_count() ) skip = true;
+ else if ( CompileZapLast == CompiledZap_count() )
+ warning("about to compile last zap");
+
+ ++_CompiledZap_count; // counts skipped zaps, too
+
+ if ( skip ) return;
+
+
+ if ( _method == NULL )
+ return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
+
+ // Insert call to zap runtime stub before every node with an oop map
+ for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+ Block *b = _cfg->_blocks[i];
+ for ( uint j = 0; j < b->_nodes.size(); ++j ) {
+ Node *n = b->_nodes[j];
+
+ // Determining if we should insert a zap-a-lot node in output.
+ // We do that for all nodes that has oopmap info, except for calls
+ // to allocation. Calls to allocation passes in the old top-of-eden pointer
+ // and expect the C code to reset it. Hence, there can be no safepoints between
+ // the inlined-allocation and the call to new_Java, etc.
+ // We also cannot zap monitor calls, as they must hold the microlock
+ // during the call to Zap, which also wants to grab the microlock.
+ bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL);
+ if ( insert ) { // it is MachSafePoint
+ if ( !n->is_MachCall() ) {
+ insert = false;
+ } else if ( n->is_MachCall() ) {
+ MachCallNode* call = n->as_MachCall();
+ if (call->entry_point() == OptoRuntime::new_instance_Java() ||
+ call->entry_point() == OptoRuntime::new_array_Java() ||
+ call->entry_point() == OptoRuntime::multianewarray2_Java() ||
+ call->entry_point() == OptoRuntime::multianewarray3_Java() ||
+ call->entry_point() == OptoRuntime::multianewarray4_Java() ||
+ call->entry_point() == OptoRuntime::multianewarray5_Java() ||
+ call->entry_point() == OptoRuntime::slow_arraycopy_Java() ||
+ call->entry_point() == OptoRuntime::complete_monitor_locking_Java()
+ ) {
+ insert = false;
+ }
+ }
+ if (insert) {
+ Node *zap = call_zap_node(n->as_MachSafePoint(), i);
+ b->_nodes.insert( j, zap );
+ _cfg->_bbs.map( zap->_idx, b );
+ ++j;
+ }
+ }
+ }
+ }
+}
+
+
+Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
+ const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type();
+ CallStaticJavaNode* ideal_node =
+ new (this, tf->domain()->cnt()) CallStaticJavaNode( tf,
+ OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()),
+ "call zap dead locals stub", 0, TypePtr::BOTTOM);
+ // We need to copy the OopMap from the site we're zapping at.
+ // We have to make a copy, because the zap site might not be
+ // a call site, and zap_dead is a call site.
+ OopMap* clone = node_to_check->oop_map()->deep_copy();
+
+ // Add the cloned OopMap to the zap node
+ ideal_node->set_oop_map(clone);
+ return _matcher->match_sfpt(ideal_node);
+}
+
+//------------------------------is_node_getting_a_safepoint--------------------
+bool Compile::is_node_getting_a_safepoint( Node* n) {
+ // This code duplicates the logic prior to the call of add_safepoint
+ // below in this file.
+ if( n->is_MachSafePoint() ) return true;
+ return false;
+}
+
+# endif // ENABLE_ZAP_DEAD_LOCALS
+
+//------------------------------compute_loop_first_inst_sizes------------------
+// Compute the size of first NumberOfLoopInstrToAlign instructions at head
+// of a loop. When aligning a loop we need to provide enough instructions
+// in cpu's fetch buffer to feed decoders. The loop alignment could be
+// avoided if we have enough instructions in fetch buffer at the head of a loop.
+// By default, the size is set to 999999 by Block's constructor so that
+// a loop will be aligned if the size is not reset here.
+//
+// Note: Mach instructions could contain several HW instructions
+// so the size is estimated only.
+//
+void Compile::compute_loop_first_inst_sizes() {
+ // The next condition is used to gate the loop alignment optimization.
+ // Don't aligned a loop if there are enough instructions at the head of a loop
+ // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
+ // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
+ // equal to 11 bytes which is the largest address NOP instruction.
+ if( MaxLoopPad < OptoLoopAlignment-1 ) {
+ uint last_block = _cfg->_num_blocks-1;
+ for( uint i=1; i <= last_block; i++ ) {
+ Block *b = _cfg->_blocks[i];
+ // Check the first loop's block which requires an alignment.
+ if( b->head()->is_Loop() &&
+ b->code_alignment() > (uint)relocInfo::addr_unit() ) {
+ uint sum_size = 0;
+ uint inst_cnt = NumberOfLoopInstrToAlign;
+ inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt,
+ _regalloc);
+ // Check the next fallthrough block if first loop's block does not have
+ // enough instructions.
+ if( inst_cnt > 0 && i < last_block ) {
+ // First, check if the first loop's block contains whole loop.
+ // LoopNode::LoopBackControl == 2.
+ Block *bx = _cfg->_bbs[b->pred(2)->_idx];
+ // Skip connector blocks (with limit in case of irreducible loops).
+ int search_limit = 16;
+ while( bx->is_connector() && search_limit-- > 0) {
+ bx = _cfg->_bbs[bx->pred(1)->_idx];
+ }
+ if( bx != b ) { // loop body is in several blocks.
+ Block *nb = NULL;
+ while( inst_cnt > 0 && i < last_block && nb != bx &&
+ !_cfg->_blocks[i+1]->head()->is_Loop() ) {
+ i++;
+ nb = _cfg->_blocks[i];
+ inst_cnt = nb->compute_first_inst_size(sum_size, inst_cnt,
+ _regalloc);
+ } // while( inst_cnt > 0 && i < last_block )
+ } // if( bx != b )
+ } // if( inst_cnt > 0 && i < last_block )
+ b->set_first_inst_size(sum_size);
+ } // f( b->head()->is_Loop() )
+ } // for( i <= last_block )
+ } // if( MaxLoopPad < OptoLoopAlignment-1 )
+}
+
+//----------------------Shorten_branches---------------------------------------
+// The architecture description provides short branch variants for some long
+// branch instructions. Replace eligible long branches with short branches.
+void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size) {
+
+ // fill in the nop array for bundling computations
+ MachNode *_nop_list[Bundle::_nop_count];
+ Bundle::initialize_nops(_nop_list, this);
+
+ // ------------------
+ // Compute size of each block, method size, and relocation information size
+ uint *jmp_end = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
+ uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
+ DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
+ blk_starts[0] = 0;
+
+ // Initialize the sizes to 0
+ code_size = 0; // Size in bytes of generated code
+ stub_size = 0; // Size in bytes of all stub entries
+ // Size in bytes of all relocation entries, including those in local stubs.
+ // Start with 2-bytes of reloc info for the unvalidated entry point
+ reloc_size = 1; // Number of relocation entries
+ const_size = 0; // size of fp constants in words
+
+ // Make three passes. The first computes pessimistic blk_starts,
+ // relative jmp_end, reloc_size and const_size information.
+ // The second performs short branch substitution using the pessimistic
+ // sizing. The third inserts nops where needed.
+
+ Node *nj; // tmp
+
+ // Step one, perform a pessimistic sizing pass.
+ uint i;
+ uint min_offset_from_last_call = 1; // init to a positive value
+ uint nop_size = (new (this) MachNopNode())->size(_regalloc);
+ for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
+ Block *b = _cfg->_blocks[i];
+
+ // Sum all instruction sizes to compute block size
+ uint last_inst = b->_nodes.size();
+ uint blk_size = 0;
+ for( uint j = 0; j<last_inst; j++ ) {
+ nj = b->_nodes[j];
+ uint inst_size = nj->size(_regalloc);
+ blk_size += inst_size;
+ // Handle machine instruction nodes
+ if( nj->is_Mach() ) {
+ MachNode *mach = nj->as_Mach();
+ blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
+ reloc_size += mach->reloc();
+ const_size += mach->const_size();
+ if( mach->is_MachCall() ) {
+ MachCallNode *mcall = mach->as_MachCall();
+ // This destination address is NOT PC-relative
+
+ mcall->method_set((intptr_t)mcall->entry_point());
+
+ if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
+ stub_size += size_java_to_interp();
+ reloc_size += reloc_java_to_interp();
+ }
+ } else if (mach->is_MachSafePoint()) {
+ // If call/safepoint are adjacent, account for possible
+ // nop to disambiguate the two safepoints.
+ if (min_offset_from_last_call == 0) {
+ blk_size += nop_size;
+ }
+ }
+ }
+ min_offset_from_last_call += inst_size;
+ // Remember end of call offset
+ if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
+ min_offset_from_last_call = 0;
+ }
+ }
+
+ // During short branch replacement, we store the relative (to blk_starts)
+ // end of jump in jmp_end, rather than the absolute end of jump. This
+ // is so that we do not need to recompute sizes of all nodes when we compute
+ // correct blk_starts in our next sizing pass.
+ jmp_end[i] = blk_size;
+ DEBUG_ONLY( jmp_target[i] = 0; )
+
+ // When the next block starts a loop, we may insert pad NOP
+ // instructions. Since we cannot know our future alignment,
+ // assume the worst.
+ if( i<_cfg->_num_blocks-1 ) {
+ Block *nb = _cfg->_blocks[i+1];
+ int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
+ if( max_loop_pad > 0 ) {
+ assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
+ blk_size += max_loop_pad;
+ }
+ }
+
+ // Save block size; update total method size
+ blk_starts[i+1] = blk_starts[i]+blk_size;
+ }
+
+ // Step two, replace eligible long jumps.
+
+ // Note: this will only get the long branches within short branch
+ // range. Another pass might detect more branches that became
+ // candidates because the shortening in the first pass exposed
+ // more opportunities. Unfortunately, this would require
+ // recomputing the starting and ending positions for the blocks
+ for( i=0; i<_cfg->_num_blocks; i++ ) {
+ Block *b = _cfg->_blocks[i];
+
+ int j;
+ // Find the branch; ignore trailing NOPs.
+ for( j = b->_nodes.size()-1; j>=0; j-- ) {
+ nj = b->_nodes[j];
+ if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con )
+ break;
+ }
+
+ if (j >= 0) {
+ if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) {
+ MachNode *mach = nj->as_Mach();
+ // This requires the TRUE branch target be in succs[0]
+ uint bnum = b->non_connector_successor(0)->_pre_order;
+ uintptr_t target = blk_starts[bnum];
+ if( mach->is_pc_relative() ) {
+ int offset = target-(blk_starts[i] + jmp_end[i]);
+ if (_matcher->is_short_branch_offset(offset)) {
+ // We've got a winner. Replace this branch.
+ MachNode *replacement = mach->short_branch_version(this);
+ b->_nodes.map(j, replacement);
+
+ // Update the jmp_end size to save time in our
+ // next pass.
+ jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc));
+ DEBUG_ONLY( jmp_target[i] = bnum; );
+ }
+ } else {
+#ifndef PRODUCT
+ mach->dump(3);
+#endif
+ Unimplemented();
+ }
+ }
+ }
+ }
+
+ // Compute the size of first NumberOfLoopInstrToAlign instructions at head
+ // of a loop. It is used to determine the padding for loop alignment.
+ compute_loop_first_inst_sizes();
+
+ // Step 3, compute the offsets of all the labels
+ uint last_call_adr = max_uint;
+ for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
+ // copy the offset of the beginning to the corresponding label
+ assert(labels[i].is_unused(), "cannot patch at this point");
+ labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS);
+
+ // insert padding for any instructions that need it
+ Block *b = _cfg->_blocks[i];
+ uint last_inst = b->_nodes.size();
+ uint adr = blk_starts[i];
+ for( uint j = 0; j<last_inst; j++ ) {
+ nj = b->_nodes[j];
+ if( nj->is_Mach() ) {
+ int padding = nj->as_Mach()->compute_padding(adr);
+ // If call/safepoint are adjacent insert a nop (5010568)
+ if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() &&
+ adr == last_call_adr ) {
+ padding = nop_size;
+ }
+ if(padding > 0) {
+ assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
+ int nops_cnt = padding / nop_size;
+ MachNode *nop = new (this) MachNopNode(nops_cnt);
+ b->_nodes.insert(j++, nop);
+ _cfg->_bbs.map( nop->_idx, b );
+ adr += padding;
+ last_inst++;
+ }
+ }
+ adr += nj->size(_regalloc);
+
+ // Remember end of call offset
+ if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
+ last_call_adr = adr;
+ }
+ }
+
+ if ( i != _cfg->_num_blocks-1) {
+ // Get the size of the block
+ uint blk_size = adr - blk_starts[i];
+
+ // When the next block starts a loop, we may insert pad NOP
+ // instructions.
+ Block *nb = _cfg->_blocks[i+1];
+ int current_offset = blk_starts[i] + blk_size;
+ current_offset += nb->alignment_padding(current_offset);
+ // Save block size; update total method size
+ blk_starts[i+1] = current_offset;
+ }
+ }
+
+#ifdef ASSERT
+ for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
+ if( jmp_target[i] != 0 ) {
+ int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]);
+ if (!_matcher->is_short_branch_offset(offset)) {
+ tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]);
+ }
+ assert(_matcher->is_short_branch_offset(offset), "Displacement too large for short jmp");
+ }
+ }
+#endif
+
+ // ------------------
+ // Compute size for code buffer
+ code_size = blk_starts[i-1] + jmp_end[i-1];
+
+ // Relocation records
+ reloc_size += 1; // Relo entry for exception handler
+
+ // Adjust reloc_size to number of record of relocation info
+ // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
+ // a relocation index.
+ // The CodeBuffer will expand the locs array if this estimate is too low.
+ reloc_size *= 10 / sizeof(relocInfo);
+
+ // Adjust const_size to number of bytes
+ const_size *= 2*jintSize; // both float and double take two words per entry
+
+}
+
+//------------------------------FillLocArray-----------------------------------
+// Create a bit of debug info and append it to the array. The mapping is from
+// Java local or expression stack to constant, register or stack-slot. For
+// doubles, insert 2 mappings and return 1 (to tell the caller that the next
+// entry has been taken care of and caller should skip it).
+static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
+ // This should never have accepted Bad before
+ assert(OptoReg::is_valid(regnum), "location must be valid");
+ return (OptoReg::is_reg(regnum))
+ ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
+ : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum)));
+}
+
+void Compile::FillLocArray( int idx, Node *local, GrowableArray<ScopeValue*> *array ) {
+ assert( local, "use _top instead of null" );
+ if (array->length() != idx) {
+ assert(array->length() == idx + 1, "Unexpected array count");
+ // Old functionality:
+ // return
+ // New functionality:
+ // Assert if the local is not top. In product mode let the new node
+ // override the old entry.
+ assert(local == top(), "LocArray collision");
+ if (local == top()) {
+ return;
+ }
+ array->pop();
+ }
+ const Type *t = local->bottom_type();
+
+ // Grab the register number for the local
+ OptoReg::Name regnum = _regalloc->get_reg_first(local);
+ if( OptoReg::is_valid(regnum) ) {// Got a register/stack?
+ // Record the double as two float registers.
+ // The register mask for such a value always specifies two adjacent
+ // float registers, with the lower register number even.
+ // Normally, the allocation of high and low words to these registers
+ // is irrelevant, because nearly all operations on register pairs
+ // (e.g., StoreD) treat them as a single unit.
+ // Here, we assume in addition that the words in these two registers
+ // stored "naturally" (by operations like StoreD and double stores
+ // within the interpreter) such that the lower-numbered register
+ // is written to the lower memory address. This may seem like
+ // a machine dependency, but it is not--it is a requirement on
+ // the author of the <arch>.ad file to ensure that, for every
+ // even/odd double-register pair to which a double may be allocated,
+ // the word in the even single-register is stored to the first
+ // memory word. (Note that register numbers are completely
+ // arbitrary, and are not tied to any machine-level encodings.)
+#ifdef _LP64
+ if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) {
+ array->append(new ConstantIntValue(0));
+ array->append(new_loc_value( _regalloc, regnum, Location::dbl ));
+ } else if ( t->base() == Type::Long ) {
+ array->append(new ConstantIntValue(0));
+ array->append(new_loc_value( _regalloc, regnum, Location::lng ));
+ } else if ( t->base() == Type::RawPtr ) {
+ // jsr/ret return address which must be restored into a the full
+ // width 64-bit stack slot.
+ array->append(new_loc_value( _regalloc, regnum, Location::lng ));
+ }
+#else //_LP64
+#ifdef SPARC
+ if (t->base() == Type::Long && OptoReg::is_reg(regnum)) {
+ // For SPARC we have to swap high and low words for
+ // long values stored in a single-register (g0-g7).
+ array->append(new_loc_value( _regalloc, regnum , Location::normal ));
+ array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
+ } else
+#endif //SPARC
+ if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) {
+ // Repack the double/long as two jints.
+ // The convention the interpreter uses is that the second local
+ // holds the first raw word of the native double representation.
+ // This is actually reasonable, since locals and stack arrays
+ // grow downwards in all implementations.
+ // (If, on some machine, the interpreter's Java locals or stack
+ // were to grow upwards, the embedded doubles would be word-swapped.)
+ array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
+ array->append(new_loc_value( _regalloc, regnum , Location::normal ));
+ }
+#endif //_LP64
+ else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
+ OptoReg::is_reg(regnum) ) {
+ array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double
+ ? Location::float_in_dbl : Location::normal ));
+ } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
+ array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
+ ? Location::int_in_long : Location::normal ));
+ } else {
+ array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
+ }
+ return;
+ }
+
+ // No register. It must be constant data.
+ switch (t->base()) {
+ case Type::Half: // Second half of a double
+ ShouldNotReachHere(); // Caller should skip 2nd halves
+ break;
+ case Type::AnyPtr:
+ array->append(new ConstantOopWriteValue(NULL));
+ break;
+ case Type::AryPtr:
+ case Type::InstPtr:
+ case Type::KlassPtr: // fall through
+ array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->encoding()));
+ break;
+ case Type::Int:
+ array->append(new ConstantIntValue(t->is_int()->get_con()));
+ break;
+ case Type::RawPtr:
+ // A return address (T_ADDRESS).
+ assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
+#ifdef _LP64
+ // Must be restored to the full-width 64-bit stack slot.
+ array->append(new ConstantLongValue(t->is_ptr()->get_con()));
+#else
+ array->append(new ConstantIntValue(t->is_ptr()->get_con()));
+#endif
+ break;
+ case Type::FloatCon: {
+ float f = t->is_float_constant()->getf();
+ array->append(new ConstantIntValue(jint_cast(f)));
+ break;
+ }
+ case Type::DoubleCon: {
+ jdouble d = t->is_double_constant()->getd();
+#ifdef _LP64
+ array->append(new ConstantIntValue(0));
+ array->append(new ConstantDoubleValue(d));
+#else
+ // Repack the double as two jints.
+ // The convention the interpreter uses is that the second local
+ // holds the first raw word of the native double representation.
+ // This is actually reasonable, since locals and stack arrays
+ // grow downwards in all implementations.
+ // (If, on some machine, the interpreter's Java locals or stack
+ // were to grow upwards, the embedded doubles would be word-swapped.)
+ jint *dp = (jint*)&d;
+ array->append(new ConstantIntValue(dp[1]));
+ array->append(new ConstantIntValue(dp[0]));
+#endif
+ break;
+ }
+ case Type::Long: {
+ jlong d = t->is_long()->get_con();
+#ifdef _LP64
+ array->append(new ConstantIntValue(0));
+ array->append(new ConstantLongValue(d));
+#else
+ // Repack the long as two jints.
+ // The convention the interpreter uses is that the second local
+ // holds the first raw word of the native double representation.
+ // This is actually reasonable, since locals and stack arrays
+ // grow downwards in all implementations.
+ // (If, on some machine, the interpreter's Java locals or stack
+ // were to grow upwards, the embedded doubles would be word-swapped.)
+ jint *dp = (jint*)&d;
+ array->append(new ConstantIntValue(dp[1]));
+ array->append(new ConstantIntValue(dp[0]));
+#endif
+ break;
+ }
+ case Type::Top: // Add an illegal value here
+ array->append(new LocationValue(Location()));
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+}
+
+// Determine if this node starts a bundle
+bool Compile::starts_bundle(const Node *n) const {
+ return (_node_bundling_limit > n->_idx &&
+ _node_bundling_base[n->_idx].starts_bundle());
+}
+
+//--------------------------Process_OopMap_Node--------------------------------
+void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
+
+ // Handle special safepoint nodes for synchronization
+ MachSafePointNode *sfn = mach->as_MachSafePoint();
+ MachCallNode *mcall;
+
+#ifdef ENABLE_ZAP_DEAD_LOCALS
+ assert( is_node_getting_a_safepoint(mach), "logic does not match; false negative");
+#endif
+
+ int safepoint_pc_offset = current_offset;
+
+ // Add the safepoint in the DebugInfoRecorder
+ if( !mach->is_MachCall() ) {
+ mcall = NULL;
+ debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
+ } else {
+ mcall = mach->as_MachCall();
+ safepoint_pc_offset += mcall->ret_addr_offset();
+ debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map);
+ }
+
+ // Loop over the JVMState list to add scope information
+ // Do not skip safepoints with a NULL method, they need monitor info
+ JVMState* youngest_jvms = sfn->jvms();
+ int max_depth = youngest_jvms->depth();
+
+ // Visit scopes from oldest to youngest.
+ for (int depth = 1; depth <= max_depth; depth++) {
+ JVMState* jvms = youngest_jvms->of_depth(depth);
+ int idx;
+ ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
+ // Safepoints that do not have method() set only provide oop-map and monitor info
+ // to support GC; these do not support deoptimization.
+ int num_locs = (method == NULL) ? 0 : jvms->loc_size();
+ int num_exps = (method == NULL) ? 0 : jvms->stk_size();
+ int num_mon = jvms->nof_monitors();
+ assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(),
+ "JVMS local count must match that of the method");
+
+ // Add Local and Expression Stack Information
+
+ // Insert locals into the locarray
+ GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs);
+ for( idx = 0; idx < num_locs; idx++ ) {
+ FillLocArray( idx, sfn->local(jvms, idx), locarray );
+ }
+
+ // Insert expression stack entries into the exparray
+ GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps);
+ for( idx = 0; idx < num_exps; idx++ ) {
+ FillLocArray( idx, sfn->stack(jvms, idx), exparray );
+ }
+
+ // Add in mappings of the monitors
+ assert( !method ||
+ !method->is_synchronized() ||
+ method->is_native() ||
+ num_mon > 0 ||
+ !GenerateSynchronizationCode,
+ "monitors must always exist for synchronized methods");
+
+ // Build the growable array of ScopeValues for exp stack
+ GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon);
+
+ // Loop over monitors and insert into array
+ for(idx = 0; idx < num_mon; idx++) {
+ // Grab the node that defines this monitor
+ Node* box_node;
+ Node* obj_node;
+ box_node = sfn->monitor_box(jvms, idx);
+ obj_node = sfn->monitor_obj(jvms, idx);
+
+ // Create ScopeValue for object
+ ScopeValue *scval = NULL;
+ if( !obj_node->is_Con() ) {
+ OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node);
+ scval = new_loc_value( _regalloc, obj_reg, Location::oop );
+ } else {
+ scval = new ConstantOopWriteValue(obj_node->bottom_type()->is_instptr()->const_oop()->encoding());
+ }
+
+ OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node);
+ monarray->append(new MonitorValue(scval, Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg))));
+ }
+
+ // Build first class objects to pass to scope
+ DebugToken *locvals = debug_info()->create_scope_values(locarray);
+ DebugToken *expvals = debug_info()->create_scope_values(exparray);
+ DebugToken *monvals = debug_info()->create_monitor_values(monarray);
+
+ // Make method available for all Safepoints
+ ciMethod* scope_method = method ? method : _method;
+ // Describe the scope here
+ assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI");
+ debug_info()->describe_scope(safepoint_pc_offset,scope_method,jvms->bci(),locvals,expvals,monvals);
+ } // End jvms loop
+
+ // Mark the end of the scope set.
+ debug_info()->end_safepoint(safepoint_pc_offset);
+}
+
+
+
+// A simplified version of Process_OopMap_Node, to handle non-safepoints.
+class NonSafepointEmitter {
+ Compile* C;
+ JVMState* _pending_jvms;
+ int _pending_offset;
+
+ void emit_non_safepoint();
+
+ public:
+ NonSafepointEmitter(Compile* compile) {
+ this->C = compile;
+ _pending_jvms = NULL;
+ _pending_offset = 0;
+ }
+
+ void observe_instruction(Node* n, int pc_offset) {
+ if (!C->debug_info()->recording_non_safepoints()) return;
+
+ Node_Notes* nn = C->node_notes_at(n->_idx);
+ if (nn == NULL || nn->jvms() == NULL) return;
+ if (_pending_jvms != NULL &&
+ _pending_jvms->same_calls_as(nn->jvms())) {
+ // Repeated JVMS? Stretch it up here.
+ _pending_offset = pc_offset;
+ } else {
+ if (_pending_jvms != NULL &&
+ _pending_offset < pc_offset) {
+ emit_non_safepoint();
+ }
+ _pending_jvms = NULL;
+ if (pc_offset > C->debug_info()->last_pc_offset()) {
+ // This is the only way _pending_jvms can become non-NULL:
+ _pending_jvms = nn->jvms();
+ _pending_offset = pc_offset;
+ }
+ }
+ }
+
+ // Stay out of the way of real safepoints:
+ void observe_safepoint(JVMState* jvms, int pc_offset) {
+ if (_pending_jvms != NULL &&
+ !_pending_jvms->same_calls_as(jvms) &&
+ _pending_offset < pc_offset) {
+ emit_non_safepoint();
+ }
+ _pending_jvms = NULL;
+ }
+
+ void flush_at_end() {
+ if (_pending_jvms != NULL) {
+ emit_non_safepoint();
+ }
+ _pending_jvms = NULL;
+ }
+};
+
+void NonSafepointEmitter::emit_non_safepoint() {
+ JVMState* youngest_jvms = _pending_jvms;
+ int pc_offset = _pending_offset;
+
+ // Clear it now:
+ _pending_jvms = NULL;
+
+ DebugInformationRecorder* debug_info = C->debug_info();
+ assert(debug_info->recording_non_safepoints(), "sanity");
+
+ debug_info->add_non_safepoint(pc_offset);
+ int max_depth = youngest_jvms->depth();
+
+ // Visit scopes from oldest to youngest.
+ for (int depth = 1; depth <= max_depth; depth++) {
+ JVMState* jvms = youngest_jvms->of_depth(depth);
+ ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
+ debug_info->describe_scope(pc_offset, method, jvms->bci());
+ }
+
+ // Mark the end of the scope set.
+ debug_info->end_non_safepoint(pc_offset);
+}
+
+
+
+// helper for Fill_buffer bailout logic
+static void turn_off_compiler(Compile* C) {
+ if (CodeCache::unallocated_capacity() >= CodeCacheMinimumFreeSpace*10) {
+ // Do not turn off compilation if a single giant method has
+ // blown the code cache size.
+ C->record_failure("excessive request to CodeCache");
+ } else {
+ UseInterpreter = true;
+ UseCompiler = false;
+ AlwaysCompileLoopMethods = false;
+ C->record_failure("CodeCache is full");
+ warning("CodeCache is full. Compiling has been disabled");
+ }
+}
+
+
+//------------------------------Fill_buffer------------------------------------
+void Compile::Fill_buffer() {
+
+ // Set the initially allocated size
+ int code_req = initial_code_capacity;
+ int locs_req = initial_locs_capacity;
+ int stub_req = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
+ int const_req = initial_const_capacity;
+ bool labels_not_set = true;
+
+ int pad_req = NativeCall::instruction_size;
+ // The extra spacing after the code is necessary on some platforms.
+ // Sometimes we need to patch in a jump after the last instruction,
+ // if the nmethod has been deoptimized. (See 4932387, 4894843.)
+
+ uint i;
+ // Compute the byte offset where we can store the deopt pc.
+ if (fixed_slots() != 0) {
+ _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
+ }
+
+ // Compute prolog code size
+ _method_size = 0;
+ _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
+#ifdef IA64
+ if (save_argument_registers()) {
+ // 4815101: this is a stub with implicit and unknown precision fp args.
+ // The usual spill mechanism can only generate stfd's in this case, which
+ // doesn't work if the fp reg to spill contains a single-precision denorm.
+ // Instead, we hack around the normal spill mechanism using stfspill's and
+ // ldffill's in the MachProlog and MachEpilog emit methods. We allocate
+ // space here for the fp arg regs (f8-f15) we're going to thusly spill.
+ //
+ // If we ever implement 16-byte 'registers' == stack slots, we can
+ // get rid of this hack and have SpillCopy generate stfspill/ldffill
+ // instead of stfd/stfs/ldfd/ldfs.
+ _frame_slots += 8*(16/BytesPerInt);
+ }
+#endif
+ assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
+
+ // Create an array of unused labels, one for each basic block
+ Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
+
+ for( i=0; i <= _cfg->_num_blocks; i++ ) {
+ blk_labels[i].init();
+ }
+
+ // If this machine supports different size branch offsets, then pre-compute
+ // the length of the blocks
+ if( _matcher->is_short_branch_offset(0) ) {
+ Shorten_branches(blk_labels, code_req, locs_req, stub_req, const_req);
+ labels_not_set = false;
+ }
+
+ // nmethod and CodeBuffer count stubs & constants as part of method's code.
+ int exception_handler_req = size_exception_handler();
+ int deopt_handler_req = size_deopt_handler();
+ exception_handler_req += MAX_stubs_size; // add marginal slop for handler
+ deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
+ stub_req += MAX_stubs_size; // ensure per-stub margin
+ code_req += MAX_inst_size; // ensure per-instruction margin
+ if (StressCodeBuffers)
+ code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10; // force expansion
+ int total_req = code_req + pad_req + stub_req + exception_handler_req + deopt_handler_req + const_req;
+ CodeBuffer* cb = code_buffer();
+ cb->initialize(total_req, locs_req);
+
+ // Have we run out of code space?
+ if (cb->blob() == NULL) {
+ turn_off_compiler(this);
+ return;
+ }
+ // Configure the code buffer.
+ cb->initialize_consts_size(const_req);
+ cb->initialize_stubs_size(stub_req);
+ cb->initialize_oop_recorder(env()->oop_recorder());
+
+ // fill in the nop array for bundling computations
+ MachNode *_nop_list[Bundle::_nop_count];
+ Bundle::initialize_nops(_nop_list, this);
+
+ // Create oopmap set.
+ _oop_map_set = new OopMapSet();
+
+ // !!!!! This preserves old handling of oopmaps for now
+ debug_info()->set_oopmaps(_oop_map_set);
+
+ // Count and start of implicit null check instructions
+ uint inct_cnt = 0;
+ uint *inct_starts = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);
+
+ // Count and start of calls
+ uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);
+
+ uint return_offset = 0;
+ MachNode *nop = new (this) MachNopNode();
+
+ int previous_offset = 0;
+ int current_offset = 0;
+ int last_call_offset = -1;
+
+ // Create an array of unused labels, one for each basic block, if printing is enabled
+#ifndef PRODUCT
+ int *node_offsets = NULL;
+ uint node_offset_limit = unique();
+
+
+ if ( print_assembly() )
+ node_offsets = NEW_RESOURCE_ARRAY(int, node_offset_limit);
+#endif
+
+ NonSafepointEmitter non_safepoints(this); // emit non-safepoints lazily
+
+ // ------------------
+ // Now fill in the code buffer
+ Node *delay_slot = NULL;
+
+ for( i=0; i < _cfg->_num_blocks; i++ ) {
+ Block *b = _cfg->_blocks[i];
+
+ Node *head = b->head();
+
+ // If this block needs to start aligned (i.e, can be reached other
+ // than by falling-thru from the previous block), then force the
+ // start of a new bundle.
+ if( Pipeline::requires_bundling() && starts_bundle(head) )
+ cb->flush_bundle(true);
+
+ // Define the label at the beginning of the basic block
+ if( labels_not_set )
+ MacroAssembler(cb).bind( blk_labels[b->_pre_order] );
+
+ else
+ assert( blk_labels[b->_pre_order].loc_pos() == cb->code_size(),
+ "label position does not match code offset" );
+
+ uint last_inst = b->_nodes.size();
+
+ // Emit block normally, except for last instruction.
+ // Emit means "dump code bits into code buffer".
+ for( uint j = 0; j<last_inst; j++ ) {
+
+ // Get the node
+ Node* n = b->_nodes[j];
+
+ // See if delay slots are supported
+ if (valid_bundle_info(n) &&
+ node_bundling(n)->used_in_unconditional_delay()) {
+ assert(delay_slot == NULL, "no use of delay slot node");
+ assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
+
+ delay_slot = n;
+ continue;
+ }
+
+ // If this starts a new instruction group, then flush the current one
+ // (but allow split bundles)
+ if( Pipeline::requires_bundling() && starts_bundle(n) )
+ cb->flush_bundle(false);
+
+ // The following logic is duplicated in the code ifdeffed for
+ // ENABLE_ZAP_DEAD_LOCALS which apppears above in this file. It
+ // should be factored out. Or maybe dispersed to the nodes?
+
+ // Special handling for SafePoint/Call Nodes
+ bool is_mcall = false;
+ if( n->is_Mach() ) {
+ MachNode *mach = n->as_Mach();
+ is_mcall = n->is_MachCall();
+ bool is_sfn = n->is_MachSafePoint();
+
+ // If this requires all previous instructions be flushed, then do so
+ if( is_sfn || is_mcall || mach->alignment_required() != 1) {
+ cb->flush_bundle(true);
+ current_offset = cb->code_size();
+ }
+
+ // align the instruction if necessary
+ int nop_size = nop->size(_regalloc);
+ int padding = mach->compute_padding(current_offset);
+ // Make sure safepoint node for polling is distinct from a call's
+ // return by adding a nop if needed.
+ if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) {
+ padding = nop_size;
+ }
+ assert( labels_not_set || padding == 0, "instruction should already be aligned")
+
+ if(padding > 0) {
+ assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
+ int nops_cnt = padding / nop_size;
+ MachNode *nop = new (this) MachNopNode(nops_cnt);
+ b->_nodes.insert(j++, nop);
+ last_inst++;
+ _cfg->_bbs.map( nop->_idx, b );
+ nop->emit(*cb, _regalloc);
+ cb->flush_bundle(true);
+ current_offset = cb->code_size();
+ }
+
+ // Remember the start of the last call in a basic block
+ if (is_mcall) {
+ MachCallNode *mcall = mach->as_MachCall();
+
+ // This destination address is NOT PC-relative
+ mcall->method_set((intptr_t)mcall->entry_point());
+
+ // Save the return address
+ call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
+
+ if (!mcall->is_safepoint_node()) {
+ is_mcall = false;
+ is_sfn = false;
+ }
+ }
+
+ // sfn will be valid whenever mcall is valid now because of inheritance
+ if( is_sfn || is_mcall ) {
+
+ // Handle special safepoint nodes for synchronization
+ if( !is_mcall ) {
+ MachSafePointNode *sfn = mach->as_MachSafePoint();
+ // !!!!! Stubs only need an oopmap right now, so bail out
+ if( sfn->jvms()->method() == NULL) {
+ // Write the oopmap directly to the code blob??!!
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ assert( !is_node_getting_a_safepoint(sfn), "logic does not match; false positive");
+# endif
+ continue;
+ }
+ } // End synchronization
+
+ non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
+ current_offset);
+ Process_OopMap_Node(mach, current_offset);
+ } // End if safepoint
+
+ // If this is a null check, then add the start of the previous instruction to the list
+ else if( mach->is_MachNullCheck() ) {
+ inct_starts[inct_cnt++] = previous_offset;
+ }
+
+ // If this is a branch, then fill in the label with the target BB's label
+ else if ( mach->is_Branch() ) {
+
+ if ( mach->ideal_Opcode() == Op_Jump ) {
+ for (uint h = 0; h < b->_num_succs; h++ ) {
+ Block* succs_block = b->_succs[h];
+ for (uint j = 1; j < succs_block->num_preds(); j++) {
+ Node* jpn = succs_block->pred(j);
+ if ( jpn->is_JumpProj() && jpn->in(0) == mach ) {
+ uint block_num = succs_block->non_connector()->_pre_order;
+ Label *blkLabel = &blk_labels[block_num];
+ mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
+ }
+ }
+ }
+ } else {
+ // For Branchs
+ // This requires the TRUE branch target be in succs[0]
+ uint block_num = b->non_connector_successor(0)->_pre_order;
+ mach->label_set( blk_labels[block_num], block_num );
+ }
+ }
+
+#ifdef ASSERT
+ // Check that oop-store preceeds the card-mark
+ else if( mach->ideal_Opcode() == Op_StoreCM ) {
+ uint storeCM_idx = j;
+ Node *oop_store = mach->in(mach->_cnt); // First precedence edge
+ assert( oop_store != NULL, "storeCM expects a precedence edge");
+ uint i4;
+ for( i4 = 0; i4 < last_inst; ++i4 ) {
+ if( b->_nodes[i4] == oop_store ) break;
+ }
+ // Note: This test can provide a false failure if other precedence
+ // edges have been added to the storeCMNode.
+ assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
+ }
+#endif
+
+ else if( !n->is_Proj() ) {
+ // Remember the begining of the previous instruction, in case
+ // it's followed by a flag-kill and a null-check. Happens on
+ // Intel all the time, with add-to-memory kind of opcodes.
+ previous_offset = current_offset;
+ }
+ }
+
+ // Verify that there is sufficient space remaining
+ cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
+ if (cb->blob() == NULL) {
+ turn_off_compiler(this);
+ return;
+ }
+
+ // Save the offset for the listing
+#ifndef PRODUCT
+ if( node_offsets && n->_idx < node_offset_limit )
+ node_offsets[n->_idx] = cb->code_size();
+#endif
+
+ // "Normal" instruction case
+ n->emit(*cb, _regalloc);
+ current_offset = cb->code_size();
+ non_safepoints.observe_instruction(n, current_offset);
+
+ // mcall is last "call" that can be a safepoint
+ // record it so we can see if a poll will directly follow it
+ // in which case we'll need a pad to make the PcDesc sites unique
+ // see 5010568. This can be slightly inaccurate but conservative
+ // in the case that return address is not actually at current_offset.
+ // This is a small price to pay.
+
+ if (is_mcall) {
+ last_call_offset = current_offset;
+ }
+
+ // See if this instruction has a delay slot
+ if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
+ assert(delay_slot != NULL, "expecting delay slot node");
+
+ // Back up 1 instruction
+ cb->set_code_end(
+ cb->code_end()-Pipeline::instr_unit_size());
+
+ // Save the offset for the listing
+#ifndef PRODUCT
+ if( node_offsets && delay_slot->_idx < node_offset_limit )
+ node_offsets[delay_slot->_idx] = cb->code_size();
+#endif
+
+ // Support a SafePoint in the delay slot
+ if( delay_slot->is_MachSafePoint() ) {
+ MachNode *mach = delay_slot->as_Mach();
+ // !!!!! Stubs only need an oopmap right now, so bail out
+ if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) {
+ // Write the oopmap directly to the code blob??!!
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ assert( !is_node_getting_a_safepoint(mach), "logic does not match; false positive");
+# endif
+ delay_slot = NULL;
+ continue;
+ }
+
+ int adjusted_offset = current_offset - Pipeline::instr_unit_size();
+ non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
+ adjusted_offset);
+ // Generate an OopMap entry
+ Process_OopMap_Node(mach, adjusted_offset);
+ }
+
+ // Insert the delay slot instruction
+ delay_slot->emit(*cb, _regalloc);
+
+ // Don't reuse it
+ delay_slot = NULL;
+ }
+
+ } // End for all instructions in block
+
+ // If the next block _starts_ a loop, pad this block out to align
+ // the loop start a little. Helps prevent pipe stalls at loop starts
+ int nop_size = (new (this) MachNopNode())->size(_regalloc);
+ if( i<_cfg->_num_blocks-1 ) {
+ Block *nb = _cfg->_blocks[i+1];
+ uint padding = nb->alignment_padding(current_offset);
+ if( padding > 0 ) {
+ MachNode *nop = new (this) MachNopNode(padding / nop_size);
+ b->_nodes.insert( b->_nodes.size(), nop );
+ _cfg->_bbs.map( nop->_idx, b );
+ nop->emit(*cb, _regalloc);
+ current_offset = cb->code_size();
+ }
+ }
+
+ } // End of for all blocks
+
+ non_safepoints.flush_at_end();
+
+ // Offset too large?
+ if (failing()) return;
+
+ // Define a pseudo-label at the end of the code
+ MacroAssembler(cb).bind( blk_labels[_cfg->_num_blocks] );
+
+ // Compute the size of the first block
+ _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
+
+ assert(cb->code_size() < 500000, "method is unreasonably large");
+
+ // ------------------
+
+#ifndef PRODUCT
+ // Information on the size of the method, without the extraneous code
+ Scheduling::increment_method_size(cb->code_size());
+#endif
+
+ // ------------------
+ // Fill in exception table entries.
+ FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels);
+
+ // Only java methods have exception handlers and deopt handlers
+ if (_method) {
+ // Emit the exception handler code.
+ _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb));
+ // Emit the deopt handler code.
+ _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb));
+ }
+
+ // One last check for failed CodeBuffer::expand:
+ if (cb->blob() == NULL) {
+ turn_off_compiler(this);
+ return;
+ }
+
+#ifndef PRODUCT
+ // Dump the assembly code, including basic-block numbers
+ if (print_assembly()) {
+ ttyLocker ttyl; // keep the following output all in one block
+ if (!VMThread::should_terminate()) { // test this under the tty lock
+ // This output goes directly to the tty, not the compiler log.
+ // To enable tools to match it up with the compilation activity,
+ // be sure to tag this tty output with the compile ID.
+ if (xtty != NULL) {
+ xtty->head("opto_assembly compile_id='%d'%s", compile_id(),
+ is_osr_compilation() ? " compile_kind='osr'" :
+ "");
+ }
+ if (method() != NULL) {
+ method()->print_oop();
+ print_codes();
+ }
+ dump_asm(node_offsets, node_offset_limit);
+ if (xtty != NULL) {
+ xtty->tail("opto_assembly");
+ }
+ }
+ }
+#endif
+
+}
+
+void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) {
+ _inc_table.set_size(cnt);
+
+ uint inct_cnt = 0;
+ for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+ Block *b = _cfg->_blocks[i];
+ Node *n = NULL;
+ int j;
+
+ // Find the branch; ignore trailing NOPs.
+ for( j = b->_nodes.size()-1; j>=0; j-- ) {
+ n = b->_nodes[j];
+ if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con )
+ break;
+ }
+
+ // If we didn't find anything, continue
+ if( j < 0 ) continue;
+
+ // Compute ExceptionHandlerTable subtable entry and add it
+ // (skip empty blocks)
+ if( n->is_Catch() ) {
+
+ // Get the offset of the return from the call
+ uint call_return = call_returns[b->_pre_order];
+#ifdef ASSERT
+ assert( call_return > 0, "no call seen for this basic block" );
+ while( b->_nodes[--j]->Opcode() == Op_MachProj ) ;
+ assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" );
+#endif
+ // last instruction is a CatchNode, find it's CatchProjNodes
+ int nof_succs = b->_num_succs;
+ // allocate space
+ GrowableArray<intptr_t> handler_bcis(nof_succs);
+ GrowableArray<intptr_t> handler_pcos(nof_succs);
+ // iterate through all successors
+ for (int j = 0; j < nof_succs; j++) {
+ Block* s = b->_succs[j];
+ bool found_p = false;
+ for( uint k = 1; k < s->num_preds(); k++ ) {
+ Node *pk = s->pred(k);
+ if( pk->is_CatchProj() && pk->in(0) == n ) {
+ const CatchProjNode* p = pk->as_CatchProj();
+ found_p = true;
+ // add the corresponding handler bci & pco information
+ if( p->_con != CatchProjNode::fall_through_index ) {
+ // p leads to an exception handler (and is not fall through)
+ assert(s == _cfg->_blocks[s->_pre_order],"bad numbering");
+ // no duplicates, please
+ if( !handler_bcis.contains(p->handler_bci()) ) {
+ uint block_num = s->non_connector()->_pre_order;
+ handler_bcis.append(p->handler_bci());
+ handler_pcos.append(blk_labels[block_num].loc_pos());
+ }
+ }
+ }
+ }
+ assert(found_p, "no matching predecessor found");
+ // Note: Due to empty block removal, one block may have
+ // several CatchProj inputs, from the same Catch.
+ }
+
+ // Set the offset of the return from the call
+ _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos);
+ continue;
+ }
+
+ // Handle implicit null exception table updates
+ if( n->is_MachNullCheck() ) {
+ uint block_num = b->non_connector_successor(0)->_pre_order;
+ _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
+ continue;
+ }
+ } // End of for all blocks fill in exception table entries
+}
+
+// Static Variables
+#ifndef PRODUCT
+uint Scheduling::_total_nop_size = 0;
+uint Scheduling::_total_method_size = 0;
+uint Scheduling::_total_branches = 0;
+uint Scheduling::_total_unconditional_delays = 0;
+uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
+#endif
+
+// Initializer for class Scheduling
+
+Scheduling::Scheduling(Arena *arena, Compile &compile)
+ : _arena(arena),
+ _cfg(compile.cfg()),
+ _bbs(compile.cfg()->_bbs),
+ _regalloc(compile.regalloc()),
+ _reg_node(arena),
+ _bundle_instr_count(0),
+ _bundle_cycle_number(0),
+ _scheduled(arena),
+ _available(arena),
+ _next_node(NULL),
+ _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]),
+ _pinch_free_list(arena)
+#ifndef PRODUCT
+ , _branches(0)
+ , _unconditional_delays(0)
+#endif
+{
+ // Create a MachNopNode
+ _nop = new (&compile) MachNopNode();
+
+ // Now that the nops are in the array, save the count
+ // (but allow entries for the nops)
+ _node_bundling_limit = compile.unique();
+ uint node_max = _regalloc->node_regs_max_index();
+
+ compile.set_node_bundling_limit(_node_bundling_limit);
+
+ // This one is persistant within the Compile class
+ _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max);
+
+ // Allocate space for fixed-size arrays
+ _node_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
+ _uses = NEW_ARENA_ARRAY(arena, short, node_max);
+ _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
+
+ // Clear the arrays
+ memset(_node_bundling_base, 0, node_max * sizeof(Bundle));
+ memset(_node_latency, 0, node_max * sizeof(unsigned short));
+ memset(_uses, 0, node_max * sizeof(short));
+ memset(_current_latency, 0, node_max * sizeof(unsigned short));
+
+ // Clear the bundling information
+ memcpy(_bundle_use_elements,
+ Pipeline_Use::elaborated_elements,
+ sizeof(Pipeline_Use::elaborated_elements));
+
+ // Get the last node
+ Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1];
+
+ _next_node = bb->_nodes[bb->_nodes.size()-1];
+}
+
+#ifndef PRODUCT
+// Scheduling destructor
+Scheduling::~Scheduling() {
+ _total_branches += _branches;
+ _total_unconditional_delays += _unconditional_delays;
+}
+#endif
+
+// Step ahead "i" cycles
+void Scheduling::step(uint i) {
+
+ Bundle *bundle = node_bundling(_next_node);
+ bundle->set_starts_bundle();
+
+ // Update the bundle record, but leave the flags information alone
+ if (_bundle_instr_count > 0) {
+ bundle->set_instr_count(_bundle_instr_count);
+ bundle->set_resources_used(_bundle_use.resourcesUsed());
+ }
+
+ // Update the state information
+ _bundle_instr_count = 0;
+ _bundle_cycle_number += i;
+ _bundle_use.step(i);
+}
+
+void Scheduling::step_and_clear() {
+ Bundle *bundle = node_bundling(_next_node);
+ bundle->set_starts_bundle();
+
+ // Update the bundle record
+ if (_bundle_instr_count > 0) {
+ bundle->set_instr_count(_bundle_instr_count);
+ bundle->set_resources_used(_bundle_use.resourcesUsed());
+
+ _bundle_cycle_number += 1;
+ }
+
+ // Clear the bundling information
+ _bundle_instr_count = 0;
+ _bundle_use.reset();
+
+ memcpy(_bundle_use_elements,
+ Pipeline_Use::elaborated_elements,
+ sizeof(Pipeline_Use::elaborated_elements));
+}
+
+//------------------------------ScheduleAndBundle------------------------------
+// Perform instruction scheduling and bundling over the sequence of
+// instructions in backwards order.
+void Compile::ScheduleAndBundle() {
+
+ // Don't optimize this if it isn't a method
+ if (!_method)
+ return;
+
+ // Don't optimize this if scheduling is disabled
+ if (!do_scheduling())
+ return;
+
+ NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
+
+ // Create a data structure for all the scheduling information
+ Scheduling scheduling(Thread::current()->resource_area(), *this);
+
+ // Walk backwards over each basic block, computing the needed alignment
+ // Walk over all the basic blocks
+ scheduling.DoScheduling();
+}
+
+//------------------------------ComputeLocalLatenciesForward-------------------
+// Compute the latency of all the instructions. This is fairly simple,
+// because we already have a legal ordering. Walk over the instructions
+// from first to last, and compute the latency of the instruction based
+// on the latency of the preceeding instruction(s).
+void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# -> ComputeLocalLatenciesForward\n");
+#endif
+
+ // Walk over all the schedulable instructions
+ for( uint j=_bb_start; j < _bb_end; j++ ) {
+
+ // This is a kludge, forcing all latency calculations to start at 1.
+ // Used to allow latency 0 to force an instruction to the beginning
+ // of the bb
+ uint latency = 1;
+ Node *use = bb->_nodes[j];
+ uint nlen = use->len();
+
+ // Walk over all the inputs
+ for ( uint k=0; k < nlen; k++ ) {
+ Node *def = use->in(k);
+ if (!def)
+ continue;
+
+ uint l = _node_latency[def->_idx] + use->latency(k);
+ if (latency < l)
+ latency = l;
+ }
+
+ _node_latency[use->_idx] = latency;
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# latency %4d: ", latency);
+ use->dump();
+ }
+#endif
+ }
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# <- ComputeLocalLatenciesForward\n");
+#endif
+
+} // end ComputeLocalLatenciesForward
+
+// See if this node fits into the present instruction bundle
+bool Scheduling::NodeFitsInBundle(Node *n) {
+ uint n_idx = n->_idx;
+
+ // If this is the unconditional delay instruction, then it fits
+ if (n == _unconditional_delay_slot) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx);
+#endif
+ return (true);
+ }
+
+ // If the node cannot be scheduled this cycle, skip it
+ if (_current_latency[n_idx] > _bundle_cycle_number) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
+ n->_idx, _current_latency[n_idx], _bundle_cycle_number);
+#endif
+ return (false);
+ }
+
+ const Pipeline *node_pipeline = n->pipeline();
+
+ uint instruction_count = node_pipeline->instructionCount();
+ if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
+ instruction_count = 0;
+ else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
+ instruction_count++;
+
+ if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
+ n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
+#endif
+ return (false);
+ }
+
+ // Don't allow non-machine nodes to be handled this way
+ if (!n->is_Mach() && instruction_count == 0)
+ return (false);
+
+ // See if there is any overlap
+ uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse());
+
+ if (delay > 0) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx);
+#endif
+ return false;
+ }
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# NodeFitsInBundle [%4d]: TRUE\n", n_idx);
+#endif
+
+ return true;
+}
+
+Node * Scheduling::ChooseNodeToBundle() {
+ uint siz = _available.size();
+
+ if (siz == 0) {
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# ChooseNodeToBundle: NULL\n");
+#endif
+ return (NULL);
+ }
+
+ // Fast path, if only 1 instruction in the bundle
+ if (siz == 1) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# ChooseNodeToBundle (only 1): ");
+ _available[0]->dump();
+ }
+#endif
+ return (_available[0]);
+ }
+
+ // Don't bother, if the bundle is already full
+ if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) {
+ for ( uint i = 0; i < siz; i++ ) {
+ Node *n = _available[i];
+
+ // Skip projections, we'll handle them another way
+ if (n->is_Proj())
+ continue;
+
+ // This presupposed that instructions are inserted into the
+ // available list in a legality order; i.e. instructions that
+ // must be inserted first are at the head of the list
+ if (NodeFitsInBundle(n)) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# ChooseNodeToBundle: ");
+ n->dump();
+ }
+#endif
+ return (n);
+ }
+ }
+ }
+
+ // Nothing fits in this bundle, choose the highest priority
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# ChooseNodeToBundle: ");
+ _available[0]->dump();
+ }
+#endif
+
+ return _available[0];
+}
+
+//------------------------------AddNodeToAvailableList-------------------------
+void Scheduling::AddNodeToAvailableList(Node *n) {
+ assert( !n->is_Proj(), "projections never directly made available" );
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# AddNodeToAvailableList: ");
+ n->dump();
+ }
+#endif
+
+ int latency = _current_latency[n->_idx];
+
+ // Insert in latency order (insertion sort)
+ uint i;
+ for ( i=0; i < _available.size(); i++ )
+ if (_current_latency[_available[i]->_idx] > latency)
+ break;
+
+ // Special Check for compares following branches
+ if( n->is_Mach() && _scheduled.size() > 0 ) {
+ int op = n->as_Mach()->ideal_Opcode();
+ Node *last = _scheduled[0];
+ if( last->is_MachIf() && last->in(1) == n &&
+ ( op == Op_CmpI ||
+ op == Op_CmpU ||
+ op == Op_CmpP ||
+ op == Op_CmpF ||
+ op == Op_CmpD ||
+ op == Op_CmpL ) ) {
+
+ // Recalculate position, moving to front of same latency
+ for ( i=0 ; i < _available.size(); i++ )
+ if (_current_latency[_available[i]->_idx] >= latency)
+ break;
+ }
+ }
+
+ // Insert the node in the available list
+ _available.insert(i, n);
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ dump_available();
+#endif
+}
+
+//------------------------------DecrementUseCounts-----------------------------
+void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
+ for ( uint i=0; i < n->len(); i++ ) {
+ Node *def = n->in(i);
+ if (!def) continue;
+ if( def->is_Proj() ) // If this is a machine projection, then
+ def = def->in(0); // propagate usage thru to the base instruction
+
+ if( _bbs[def->_idx] != bb ) // Ignore if not block-local
+ continue;
+
+ // Compute the latency
+ uint l = _bundle_cycle_number + n->latency(i);
+ if (_current_latency[def->_idx] < l)
+ _current_latency[def->_idx] = l;
+
+ // If this does not have uses then schedule it
+ if ((--_uses[def->_idx]) == 0)
+ AddNodeToAvailableList(def);
+ }
+}
+
+//------------------------------AddNodeToBundle--------------------------------
+void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# AddNodeToBundle: ");
+ n->dump();
+ }
+#endif
+
+ // Remove this from the available list
+ uint i;
+ for (i = 0; i < _available.size(); i++)
+ if (_available[i] == n)
+ break;
+ assert(i < _available.size(), "entry in _available list not found");
+ _available.remove(i);
+
+ // See if this fits in the current bundle
+ const Pipeline *node_pipeline = n->pipeline();
+ const Pipeline_Use& node_usage = node_pipeline->resourceUse();
+
+ // Check for instructions to be placed in the delay slot. We
+ // do this before we actually schedule the current instruction,
+ // because the delay slot follows the current instruction.
+ if (Pipeline::_branch_has_delay_slot &&
+ node_pipeline->hasBranchDelay() &&
+ !_unconditional_delay_slot) {
+
+ uint siz = _available.size();
+
+ // Conditional branches can support an instruction that
+ // is unconditionally executed and not dependant by the
+ // branch, OR a conditionally executed instruction if
+ // the branch is taken. In practice, this means that
+ // the first instruction at the branch target is
+ // copied to the delay slot, and the branch goes to
+ // the instruction after that at the branch target
+ if ( n->is_Mach() && n->is_Branch() ) {
+
+ assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" );
+ assert( !n->is_Catch(), "should not look for delay slot for Catch" );
+
+#ifndef PRODUCT
+ _branches++;
+#endif
+
+ // At least 1 instruction is on the available list
+ // that is not dependant on the branch
+ for (uint i = 0; i < siz; i++) {
+ Node *d = _available[i];
+ const Pipeline *avail_pipeline = d->pipeline();
+
+ // Don't allow safepoints in the branch shadow, that will
+ // cause a number of difficulties
+ if ( avail_pipeline->instructionCount() == 1 &&
+ !avail_pipeline->hasMultipleBundles() &&
+ !avail_pipeline->hasBranchDelay() &&
+ Pipeline::instr_has_unit_size() &&
+ d->size(_regalloc) == Pipeline::instr_unit_size() &&
+ NodeFitsInBundle(d) &&
+ !node_bundling(d)->used_in_delay()) {
+
+ if (d->is_Mach() && !d->is_MachSafePoint()) {
+ // A node that fits in the delay slot was found, so we need to
+ // set the appropriate bits in the bundle pipeline information so
+ // that it correctly indicates resource usage. Later, when we
+ // attempt to add this instruction to the bundle, we will skip
+ // setting the resource usage.
+ _unconditional_delay_slot = d;
+ node_bundling(n)->set_use_unconditional_delay();
+ node_bundling(d)->set_used_in_unconditional_delay();
+ _bundle_use.add_usage(avail_pipeline->resourceUse());
+ _current_latency[d->_idx] = _bundle_cycle_number;
+ _next_node = d;
+ ++_bundle_instr_count;
+#ifndef PRODUCT
+ _unconditional_delays++;
+#endif
+ break;
+ }
+ }
+ }
+ }
+
+ // No delay slot, add a nop to the usage
+ if (!_unconditional_delay_slot) {
+ // See if adding an instruction in the delay slot will overflow
+ // the bundle.
+ if (!NodeFitsInBundle(_nop)) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# *** STEP(1 instruction for delay slot) ***\n");
+#endif
+ step(1);
+ }
+
+ _bundle_use.add_usage(_nop->pipeline()->resourceUse());
+ _next_node = _nop;
+ ++_bundle_instr_count;
+ }
+
+ // See if the instruction in the delay slot requires a
+ // step of the bundles
+ if (!NodeFitsInBundle(n)) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# *** STEP(branch won't fit) ***\n");
+#endif
+ // Update the state information
+ _bundle_instr_count = 0;
+ _bundle_cycle_number += 1;
+ _bundle_use.step(1);
+ }
+ }
+
+ // Get the number of instructions
+ uint instruction_count = node_pipeline->instructionCount();
+ if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
+ instruction_count = 0;
+
+ // Compute the latency information
+ uint delay = 0;
+
+ if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) {
+ int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number;
+ if (relative_latency < 0)
+ relative_latency = 0;
+
+ delay = _bundle_use.full_latency(relative_latency, node_usage);
+
+ // Does not fit in this bundle, start a new one
+ if (delay > 0) {
+ step(delay);
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# *** STEP(%d) ***\n", delay);
+#endif
+ }
+ }
+
+ // If this was placed in the delay slot, ignore it
+ if (n != _unconditional_delay_slot) {
+
+ if (delay == 0) {
+ if (node_pipeline->hasMultipleBundles()) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# *** STEP(multiple instructions) ***\n");
+#endif
+ step(1);
+ }
+
+ else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# *** STEP(%d >= %d instructions) ***\n",
+ instruction_count + _bundle_instr_count,
+ Pipeline::_max_instrs_per_cycle);
+#endif
+ step(1);
+ }
+ }
+
+ if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
+ _bundle_instr_count++;
+
+ // Set the node's latency
+ _current_latency[n->_idx] = _bundle_cycle_number;
+
+ // Now merge the functional unit information
+ if (instruction_count > 0 || !node_pipeline->mayHaveNoCode())
+ _bundle_use.add_usage(node_usage);
+
+ // Increment the number of instructions in this bundle
+ _bundle_instr_count += instruction_count;
+
+ // Remember this node for later
+ if (n->is_Mach())
+ _next_node = n;
+ }
+
+ // It's possible to have a BoxLock in the graph and in the _bbs mapping but
+ // not in the bb->_nodes array. This happens for debug-info-only BoxLocks.
+ // 'Schedule' them (basically ignore in the schedule) but do not insert them
+ // into the block. All other scheduled nodes get put in the schedule here.
+ int op = n->Opcode();
+ if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR
+ (op != Op_Node && // Not an unused antidepedence node and
+ // not an unallocated boxlock
+ (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) {
+
+ // Push any trailing projections
+ if( bb->_nodes[bb->_nodes.size()-1] != n ) {
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node *foi = n->fast_out(i);
+ if( foi->is_Proj() )
+ _scheduled.push(foi);
+ }
+ }
+
+ // Put the instruction in the schedule list
+ _scheduled.push(n);
+ }
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ dump_available();
+#endif
+
+ // Walk all the definitions, decrementing use counts, and
+ // if a definition has a 0 use count, place it in the available list.
+ DecrementUseCounts(n,bb);
+}
+
+//------------------------------ComputeUseCount--------------------------------
+// This method sets the use count within a basic block. We will ignore all
+// uses outside the current basic block. As we are doing a backwards walk,
+// any node we reach that has a use count of 0 may be scheduled. This also
+// avoids the problem of cyclic references from phi nodes, as long as phi
+// nodes are at the front of the basic block. This method also initializes
+// the available list to the set of instructions that have no uses within this
+// basic block.
+void Scheduling::ComputeUseCount(const Block *bb) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# -> ComputeUseCount\n");
+#endif
+
+ // Clear the list of available and scheduled instructions, just in case
+ _available.clear();
+ _scheduled.clear();
+
+ // No delay slot specified
+ _unconditional_delay_slot = NULL;
+
+#ifdef ASSERT
+ for( uint i=0; i < bb->_nodes.size(); i++ )
+ assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" );
+#endif
+
+ // Force the _uses count to never go to zero for unscheduable pieces
+ // of the block
+ for( uint k = 0; k < _bb_start; k++ )
+ _uses[bb->_nodes[k]->_idx] = 1;
+ for( uint l = _bb_end; l < bb->_nodes.size(); l++ )
+ _uses[bb->_nodes[l]->_idx] = 1;
+
+ // Iterate backwards over the instructions in the block. Don't count the
+ // branch projections at end or the block header instructions.
+ for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
+ Node *n = bb->_nodes[j];
+ if( n->is_Proj() ) continue; // Projections handled another way
+
+ // Account for all uses
+ for ( uint k = 0; k < n->len(); k++ ) {
+ Node *inp = n->in(k);
+ if (!inp) continue;
+ assert(inp != n, "no cycles allowed" );
+ if( _bbs[inp->_idx] == bb ) { // Block-local use?
+ if( inp->is_Proj() ) // Skip through Proj's
+ inp = inp->in(0);
+ ++_uses[inp->_idx]; // Count 1 block-local use
+ }
+ }
+
+ // If this instruction has a 0 use count, then it is available
+ if (!_uses[n->_idx]) {
+ _current_latency[n->_idx] = _bundle_cycle_number;
+ AddNodeToAvailableList(n);
+ }
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# uses: %3d: ", _uses[n->_idx]);
+ n->dump();
+ }
+#endif
+ }
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# <- ComputeUseCount\n");
+#endif
+}
+
+// This routine performs scheduling on each basic block in reverse order,
+// using instruction latencies and taking into account function unit
+// availability.
+void Scheduling::DoScheduling() {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# -> DoScheduling\n");
+#endif
+
+ Block *succ_bb = NULL;
+ Block *bb;
+
+ // Walk over all the basic blocks in reverse order
+ for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) {
+ bb = _cfg->_blocks[i];
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# Schedule BB#%03d (initial)\n", i);
+ for (uint j = 0; j < bb->_nodes.size(); j++)
+ bb->_nodes[j]->dump();
+ }
+#endif
+
+ // On the head node, skip processing
+ if( bb == _cfg->_broot )
+ continue;
+
+ // Skip empty, connector blocks
+ if (bb->is_connector())
+ continue;
+
+ // If the following block is not the sole successor of
+ // this one, then reset the pipeline information
+ if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("*** bundle start of next BB, node %d, for %d instructions\n",
+ _next_node->_idx, _bundle_instr_count);
+ }
+#endif
+ step_and_clear();
+ }
+
+ // Leave untouched the starting instruction, any Phis, a CreateEx node
+ // or Top. bb->_nodes[_bb_start] is the first schedulable instruction.
+ _bb_end = bb->_nodes.size()-1;
+ for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) {
+ Node *n = bb->_nodes[_bb_start];
+ // Things not matched, like Phinodes and ProjNodes don't get scheduled.
+ // Also, MachIdealNodes do not get scheduled
+ if( !n->is_Mach() ) continue; // Skip non-machine nodes
+ MachNode *mach = n->as_Mach();
+ int iop = mach->ideal_Opcode();
+ if( iop == Op_CreateEx ) continue; // CreateEx is pinned
+ if( iop == Op_Con ) continue; // Do not schedule Top
+ if( iop == Op_Node && // Do not schedule PhiNodes, ProjNodes
+ mach->pipeline() == MachNode::pipeline_class() &&
+ !n->is_SpillCopy() ) // Breakpoints, Prolog, etc
+ continue;
+ break; // Funny loop structure to be sure...
+ }
+ // Compute last "interesting" instruction in block - last instruction we
+ // might schedule. _bb_end points just after last schedulable inst. We
+ // normally schedule conditional branches (despite them being forced last
+ // in the block), because they have delay slots we can fill. Calls all
+ // have their delay slots filled in the template expansions, so we don't
+ // bother scheduling them.
+ Node *last = bb->_nodes[_bb_end];
+ if( last->is_Catch() ||
+ (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
+ // There must be a prior call. Skip it.
+ while( !bb->_nodes[--_bb_end]->is_Call() ) {
+ assert( bb->_nodes[_bb_end]->is_Proj(), "skipping projections after expected call" );
+ }
+ } else if( last->is_MachNullCheck() ) {
+ // Backup so the last null-checked memory instruction is
+ // outside the schedulable range. Skip over the nullcheck,
+ // projection, and the memory nodes.
+ Node *mem = last->in(1);
+ do {
+ _bb_end--;
+ } while (mem != bb->_nodes[_bb_end]);
+ } else {
+ // Set _bb_end to point after last schedulable inst.
+ _bb_end++;
+ }
+
+ assert( _bb_start <= _bb_end, "inverted block ends" );
+
+ // Compute the register antidependencies for the basic block
+ ComputeRegisterAntidependencies(bb);
+ if (_cfg->C->failing()) return; // too many D-U pinch points
+
+ // Compute intra-bb latencies for the nodes
+ ComputeLocalLatenciesForward(bb);
+
+ // Compute the usage within the block, and set the list of all nodes
+ // in the block that have no uses within the block.
+ ComputeUseCount(bb);
+
+ // Schedule the remaining instructions in the block
+ while ( _available.size() > 0 ) {
+ Node *n = ChooseNodeToBundle();
+ AddNodeToBundle(n,bb);
+ }
+
+ assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" );
+#ifdef ASSERT
+ for( uint l = _bb_start; l < _bb_end; l++ ) {
+ Node *n = bb->_nodes[l];
+ uint m;
+ for( m = 0; m < _bb_end-_bb_start; m++ )
+ if( _scheduled[m] == n )
+ break;
+ assert( m < _bb_end-_bb_start, "instruction missing in schedule" );
+ }
+#endif
+
+ // Now copy the instructions (in reverse order) back to the block
+ for ( uint k = _bb_start; k < _bb_end; k++ )
+ bb->_nodes.map(k, _scheduled[_bb_end-k-1]);
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ tty->print("# Schedule BB#%03d (final)\n", i);
+ uint current = 0;
+ for (uint j = 0; j < bb->_nodes.size(); j++) {
+ Node *n = bb->_nodes[j];
+ if( valid_bundle_info(n) ) {
+ Bundle *bundle = node_bundling(n);
+ if (bundle->instr_count() > 0 || bundle->flags() > 0) {
+ tty->print("*** Bundle: ");
+ bundle->dump();
+ }
+ n->dump();
+ }
+ }
+ }
+#endif
+#ifdef ASSERT
+ verify_good_schedule(bb,"after block local scheduling");
+#endif
+ }
+
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output())
+ tty->print("# <- DoScheduling\n");
+#endif
+
+ // Record final node-bundling array location
+ _regalloc->C->set_node_bundling_base(_node_bundling_base);
+
+} // end DoScheduling
+
+//------------------------------verify_good_schedule---------------------------
+// Verify that no live-range used in the block is killed in the block by a
+// wrong DEF. This doesn't verify live-ranges that span blocks.
+
+// Check for edge existence. Used to avoid adding redundant precedence edges.
+static bool edge_from_to( Node *from, Node *to ) {
+ for( uint i=0; i<from->len(); i++ )
+ if( from->in(i) == to )
+ return true;
+ return false;
+}
+
+#ifdef ASSERT
+//------------------------------verify_do_def----------------------------------
+void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
+ // Check for bad kills
+ if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
+ Node *prior_use = _reg_node[def];
+ if( prior_use && !edge_from_to(prior_use,n) ) {
+ tty->print("%s = ",OptoReg::as_VMReg(def)->name());
+ n->dump();
+ tty->print_cr("...");
+ prior_use->dump();
+ assert_msg(edge_from_to(prior_use,n),msg);
+ }
+ _reg_node.map(def,NULL); // Kill live USEs
+ }
+}
+
+//------------------------------verify_good_schedule---------------------------
+void Scheduling::verify_good_schedule( Block *b, const char *msg ) {
+
+ // Zap to something reasonable for the verify code
+ _reg_node.clear();
+
+ // Walk over the block backwards. Check to make sure each DEF doesn't
+ // kill a live value (other than the one it's supposed to). Add each
+ // USE to the live set.
+ for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) {
+ Node *n = b->_nodes[i];
+ int n_op = n->Opcode();
+ if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
+ // Fat-proj kills a slew of registers
+ RegMask rm = n->out_RegMask();// Make local copy
+ while( rm.is_NotEmpty() ) {
+ OptoReg::Name kill = rm.find_first_elem();
+ rm.Remove(kill);
+ verify_do_def( n, kill, msg );
+ }
+ } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes
+ // Get DEF'd registers the normal way
+ verify_do_def( n, _regalloc->get_reg_first(n), msg );
+ verify_do_def( n, _regalloc->get_reg_second(n), msg );
+ }
+
+ // Now make all USEs live
+ for( uint i=1; i<n->req(); i++ ) {
+ Node *def = n->in(i);
+ assert(def != 0, "input edge required");
+ OptoReg::Name reg_lo = _regalloc->get_reg_first(def);
+ OptoReg::Name reg_hi = _regalloc->get_reg_second(def);
+ if( OptoReg::is_valid(reg_lo) ) {
+ assert_msg(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg );
+ _reg_node.map(reg_lo,n);
+ }
+ if( OptoReg::is_valid(reg_hi) ) {
+ assert_msg(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg );
+ _reg_node.map(reg_hi,n);
+ }
+ }
+
+ }
+
+ // Zap to something reasonable for the Antidependence code
+ _reg_node.clear();
+}
+#endif
+
+// Conditionally add precedence edges. Avoid putting edges on Projs.
+static void add_prec_edge_from_to( Node *from, Node *to ) {
+ if( from->is_Proj() ) { // Put precedence edge on Proj's input
+ assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
+ from = from->in(0);
+ }
+ if( from != to && // No cycles (for things like LD L0,[L0+4] )
+ !edge_from_to( from, to ) ) // Avoid duplicate edge
+ from->add_prec(to);
+}
+
+//------------------------------anti_do_def------------------------------------
+void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
+ if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
+ return;
+
+ Node *pinch = _reg_node[def_reg]; // Get pinch point
+ if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet?
+ is_def ) { // Check for a true def (not a kill)
+ _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
+ return;
+ }
+
+ Node *kill = def; // Rename 'def' to more descriptive 'kill'
+ debug_only( def = (Node*)0xdeadbeef; )
+
+ // After some number of kills there _may_ be a later def
+ Node *later_def = NULL;
+
+ // Finding a kill requires a real pinch-point.
+ // Check for not already having a pinch-point.
+ // Pinch points are Op_Node's.
+ if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
+ later_def = pinch; // Must be def/kill as optimistic pinch-point
+ if ( _pinch_free_list.size() > 0) {
+ pinch = _pinch_free_list.pop();
+ } else {
+ pinch = new (_cfg->C, 1) Node(1); // Pinch point to-be
+ }
+ if (pinch->_idx >= _regalloc->node_regs_max_index()) {
+ _cfg->C->record_method_not_compilable("too many D-U pinch points");
+ return;
+ }
+ _bbs.map(pinch->_idx,b); // Pretend it's valid in this block (lazy init)
+ _reg_node.map(def_reg,pinch); // Record pinch-point
+ //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
+ if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
+ pinch->init_req(0, _cfg->C->top()); // set not NULL for the next call
+ add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
+ later_def = NULL; // and no later def
+ }
+ pinch->set_req(0,later_def); // Hook later def so we can find it
+ } else { // Else have valid pinch point
+ if( pinch->in(0) ) // If there is a later-def
+ later_def = pinch->in(0); // Get it
+ }
+
+ // Add output-dependence edge from later def to kill
+ if( later_def ) // If there is some original def
+ add_prec_edge_from_to(later_def,kill); // Add edge from def to kill
+
+ // See if current kill is also a use, and so is forced to be the pinch-point.
+ if( pinch->Opcode() == Op_Node ) {
+ Node *uses = kill->is_Proj() ? kill->in(0) : kill;
+ for( uint i=1; i<uses->req(); i++ ) {
+ if( _regalloc->get_reg_first(uses->in(i)) == def_reg ||
+ _regalloc->get_reg_second(uses->in(i)) == def_reg ) {
+ // Yes, found a use/kill pinch-point
+ pinch->set_req(0,NULL); //
+ pinch->replace_by(kill); // Move anti-dep edges up
+ pinch = kill;
+ _reg_node.map(def_reg,pinch);
+ return;
+ }
+ }
+ }
+
+ // Add edge from kill to pinch-point
+ add_prec_edge_from_to(kill,pinch);
+}
+
+//------------------------------anti_do_use------------------------------------
+void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
+ if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
+ return;
+ Node *pinch = _reg_node[use_reg]; // Get pinch point
+ // Check for no later def_reg/kill in block
+ if( pinch && _bbs[pinch->_idx] == b &&
+ // Use has to be block-local as well
+ _bbs[use->_idx] == b ) {
+ if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
+ pinch->req() == 1 ) { // pinch not yet in block?
+ pinch->del_req(0); // yank pointer to later-def, also set flag
+ // Insert the pinch-point in the block just after the last use
+ b->_nodes.insert(b->find_node(use)+1,pinch);
+ _bb_end++; // Increase size scheduled region in block
+ }
+
+ add_prec_edge_from_to(pinch,use);
+ }
+}
+
+//------------------------------ComputeRegisterAntidependences-----------------
+// We insert antidependences between the reads and following write of
+// allocated registers to prevent illegal code motion. Hopefully, the
+// number of added references should be fairly small, especially as we
+// are only adding references within the current basic block.
+void Scheduling::ComputeRegisterAntidependencies(Block *b) {
+
+#ifdef ASSERT
+ verify_good_schedule(b,"before block local scheduling");
+#endif
+
+ // A valid schedule, for each register independently, is an endless cycle
+ // of: a def, then some uses (connected to the def by true dependencies),
+ // then some kills (defs with no uses), finally the cycle repeats with a new
+ // def. The uses are allowed to float relative to each other, as are the
+ // kills. No use is allowed to slide past a kill (or def). This requires
+ // antidependencies between all uses of a single def and all kills that
+ // follow, up to the next def. More edges are redundant, because later defs
+ // & kills are already serialized with true or antidependencies. To keep
+ // the edge count down, we add a 'pinch point' node if there's more than
+ // one use or more than one kill/def.
+
+ // We add dependencies in one bottom-up pass.
+
+ // For each instruction we handle it's DEFs/KILLs, then it's USEs.
+
+ // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this
+ // register. If not, we record the DEF/KILL in _reg_node, the
+ // register-to-def mapping. If there is a prior DEF/KILL, we insert a
+ // "pinch point", a new Node that's in the graph but not in the block.
+ // We put edges from the prior and current DEF/KILLs to the pinch point.
+ // We put the pinch point in _reg_node. If there's already a pinch point
+ // we merely add an edge from the current DEF/KILL to the pinch point.
+
+ // After doing the DEF/KILLs, we handle USEs. For each used register, we
+ // put an edge from the pinch point to the USE.
+
+ // To be expedient, the _reg_node array is pre-allocated for the whole
+ // compilation. _reg_node is lazily initialized; it either contains a NULL,
+ // or a valid def/kill/pinch-point, or a leftover node from some prior
+ // block. Leftover node from some prior block is treated like a NULL (no
+ // prior def, so no anti-dependence needed). Valid def is distinguished by
+ // it being in the current block.
+ bool fat_proj_seen = false;
+ uint last_safept = _bb_end-1;
+ Node* end_node = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL;
+ Node* last_safept_node = end_node;
+ for( uint i = _bb_end-1; i >= _bb_start; i-- ) {
+ Node *n = b->_nodes[i];
+ int is_def = n->outcnt(); // def if some uses prior to adding precedence edges
+ if( n->Opcode() == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
+ // Fat-proj kills a slew of registers
+ // This can add edges to 'n' and obscure whether or not it was a def,
+ // hence the is_def flag.
+ fat_proj_seen = true;
+ RegMask rm = n->out_RegMask();// Make local copy
+ while( rm.is_NotEmpty() ) {
+ OptoReg::Name kill = rm.find_first_elem();
+ rm.Remove(kill);
+ anti_do_def( b, n, kill, is_def );
+ }
+ } else {
+ // Get DEF'd registers the normal way
+ anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
+ anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
+ }
+
+ // Check each register used by this instruction for a following DEF/KILL
+ // that must occur afterward and requires an anti-dependence edge.
+ for( uint j=0; j<n->req(); j++ ) {
+ Node *def = n->in(j);
+ if( def ) {
+ assert( def->Opcode() != Op_MachProj || def->ideal_reg() != MachProjNode::fat_proj, "" );
+ anti_do_use( b, n, _regalloc->get_reg_first(def) );
+ anti_do_use( b, n, _regalloc->get_reg_second(def) );
+ }
+ }
+ // Do not allow defs of new derived values to float above GC
+ // points unless the base is definitely available at the GC point.
+
+ Node *m = b->_nodes[i];
+
+ // Add precedence edge from following safepoint to use of derived pointer
+ if( last_safept_node != end_node &&
+ m != last_safept_node) {
+ for (uint k = 1; k < m->req(); k++) {
+ const Type *t = m->in(k)->bottom_type();
+ if( t->isa_oop_ptr() &&
+ t->is_ptr()->offset() != 0 ) {
+ last_safept_node->add_prec( m );
+ break;
+ }
+ }
+ }
+
+ if( n->jvms() ) { // Precedence edge from derived to safept
+ // Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
+ if( b->_nodes[last_safept] != last_safept_node ) {
+ last_safept = b->find_node(last_safept_node);
+ }
+ for( uint j=last_safept; j > i; j-- ) {
+ Node *mach = b->_nodes[j];
+ if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP )
+ mach->add_prec( n );
+ }
+ last_safept = i;
+ last_safept_node = m;
+ }
+ }
+
+ if (fat_proj_seen) {
+ // Garbage collect pinch nodes that were not consumed.
+ // They are usually created by a fat kill MachProj for a call.
+ garbage_collect_pinch_nodes();
+ }
+}
+
+//------------------------------garbage_collect_pinch_nodes-------------------------------
+
+// Garbage collect pinch nodes for reuse by other blocks.
+//
+// The block scheduler's insertion of anti-dependence
+// edges creates many pinch nodes when the block contains
+// 2 or more Calls. A pinch node is used to prevent a
+// combinatorial explosion of edges. If a set of kills for a
+// register is anti-dependent on a set of uses (or defs), rather
+// than adding an edge in the graph between each pair of kill
+// and use (or def), a pinch is inserted between them:
+//
+// use1 use2 use3
+// \ | /
+// \ | /
+// pinch
+// / | \
+// / | \
+// kill1 kill2 kill3
+//
+// One pinch node is created per register killed when
+// the second call is encountered during a backwards pass
+// over the block. Most of these pinch nodes are never
+// wired into the graph because the register is never
+// used or def'ed in the block.
+//
+void Scheduling::garbage_collect_pinch_nodes() {
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
+#endif
+ int trace_cnt = 0;
+ for (uint k = 0; k < _reg_node.Size(); k++) {
+ Node* pinch = _reg_node[k];
+ if (pinch != NULL && pinch->Opcode() == Op_Node &&
+ // no predecence input edges
+ (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
+ cleanup_pinch(pinch);
+ _pinch_free_list.push(pinch);
+ _reg_node.map(k, NULL);
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) {
+ trace_cnt++;
+ if (trace_cnt > 40) {
+ tty->print("\n");
+ trace_cnt = 0;
+ }
+ tty->print(" %d", pinch->_idx);
+ }
+#endif
+ }
+ }
+#ifndef PRODUCT
+ if (_cfg->C->trace_opto_output()) tty->print("\n");
+#endif
+}
+
+// Clean up a pinch node for reuse.
+void Scheduling::cleanup_pinch( Node *pinch ) {
+ assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking");
+
+ for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) {
+ Node* use = pinch->last_out(i);
+ uint uses_found = 0;
+ for (uint j = use->req(); j < use->len(); j++) {
+ if (use->in(j) == pinch) {
+ use->rm_prec(j);
+ uses_found++;
+ }
+ }
+ assert(uses_found > 0, "must be a precedence edge");
+ i -= uses_found; // we deleted 1 or more copies of this edge
+ }
+ // May have a later_def entry
+ pinch->set_req(0, NULL);
+}
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+
+void Scheduling::dump_available() const {
+ tty->print("#Availist ");
+ for (uint i = 0; i < _available.size(); i++)
+ tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]);
+ tty->cr();
+}
+
+// Print Scheduling Statistics
+void Scheduling::print_statistics() {
+ // Print the size added by nops for bundling
+ tty->print("Nops added %d bytes to total of %d bytes",
+ _total_nop_size, _total_method_size);
+ if (_total_method_size > 0)
+ tty->print(", for %.2f%%",
+ ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
+ tty->print("\n");
+
+ // Print the number of branch shadows filled
+ if (Pipeline::_branch_has_delay_slot) {
+ tty->print("Of %d branches, %d had unconditional delay slots filled",
+ _total_branches, _total_unconditional_delays);
+ if (_total_branches > 0)
+ tty->print(", for %.2f%%",
+ ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
+ tty->print("\n");
+ }
+
+ uint total_instructions = 0, total_bundles = 0;
+
+ for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) {
+ uint bundle_count = _total_instructions_per_bundle[i];
+ total_instructions += bundle_count * i;
+ total_bundles += bundle_count;
+ }
+
+ if (total_bundles > 0)
+ tty->print("Average ILP (excluding nops) is %.2f\n",
+ ((double)total_instructions) / ((double)total_bundles));
+}
+#endif
diff --git a/src/share/vm/opto/output.hpp b/src/share/vm/opto/output.hpp
new file mode 100644
index 000000000..386e2be16
--- /dev/null
+++ b/src/share/vm/opto/output.hpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2000-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Arena;
+class Bundle;
+class Block;
+class Block_Array;
+class Node;
+class Node_Array;
+class Node_List;
+class PhaseCFG;
+class PhaseChaitin;
+class Pipeline_Use_Element;
+class Pipeline_Use;
+
+#ifndef PRODUCT
+#define DEBUG_ARG(x) , x
+#else
+#define DEBUG_ARG(x)
+#endif
+
+// Define the initial sizes for allocation of the resizable code buffer
+enum {
+ initial_code_capacity = 16 * 1024,
+ initial_stub_capacity = 4 * 1024,
+ initial_const_capacity = 4 * 1024,
+ initial_locs_capacity = 3 * 1024
+};
+
+//------------------------------Scheduling----------------------------------
+// This class contains all the information necessary to implement instruction
+// scheduling and bundling.
+class Scheduling {
+
+private:
+ // Arena to use
+ Arena *_arena;
+
+ // Control-Flow Graph info
+ PhaseCFG *_cfg;
+
+ // Register Allocation info
+ PhaseRegAlloc *_regalloc;
+
+ // Number of nodes in the method
+ uint _node_bundling_limit;
+
+ // List of scheduled nodes. Generated in reverse order
+ Node_List _scheduled;
+
+ // List of nodes currently available for choosing for scheduling
+ Node_List _available;
+
+ // Mapping from node (index) to basic block
+ Block_Array& _bbs;
+
+ // For each instruction beginning a bundle, the number of following
+ // nodes to be bundled with it.
+ Bundle *_node_bundling_base;
+
+ // Mapping from register to Node
+ Node_List _reg_node;
+
+ // Free list for pinch nodes.
+ Node_List _pinch_free_list;
+
+ // Latency from the beginning of the containing basic block (base 1)
+ // for each node.
+ unsigned short *_node_latency;
+
+ // Number of uses of this node within the containing basic block.
+ short *_uses;
+
+ // Schedulable portion of current block. Skips Region/Phi/CreateEx up
+ // front, branch+proj at end. Also skips Catch/CProj (same as
+ // branch-at-end), plus just-prior exception-throwing call.
+ uint _bb_start, _bb_end;
+
+ // Latency from the end of the basic block as scheduled
+ unsigned short *_current_latency;
+
+ // Remember the next node
+ Node *_next_node;
+
+ // Use this for an unconditional branch delay slot
+ Node *_unconditional_delay_slot;
+
+ // Pointer to a Nop
+ MachNopNode *_nop;
+
+ // Length of the current bundle, in instructions
+ uint _bundle_instr_count;
+
+ // Current Cycle number, for computing latencies and bundling
+ uint _bundle_cycle_number;
+
+ // Bundle information
+ Pipeline_Use_Element _bundle_use_elements[resource_count];
+ Pipeline_Use _bundle_use;
+
+ // Dump the available list
+ void dump_available() const;
+
+public:
+ Scheduling(Arena *arena, Compile &compile);
+
+ // Destructor
+ NOT_PRODUCT( ~Scheduling(); )
+
+ // Step ahead "i" cycles
+ void step(uint i);
+
+ // Step ahead 1 cycle, and clear the bundle state (for example,
+ // at a branch target)
+ void step_and_clear();
+
+ Bundle* node_bundling(const Node *n) {
+ assert(valid_bundle_info(n), "oob");
+ return (&_node_bundling_base[n->_idx]);
+ }
+
+ bool valid_bundle_info(const Node *n) const {
+ return (_node_bundling_limit > n->_idx);
+ }
+
+ bool starts_bundle(const Node *n) const {
+ return (_node_bundling_limit > n->_idx && _node_bundling_base[n->_idx].starts_bundle());
+ }
+
+ // Do the scheduling
+ void DoScheduling();
+
+ // Compute the local latencies walking forward over the list of
+ // nodes for a basic block
+ void ComputeLocalLatenciesForward(const Block *bb);
+
+ // Compute the register antidependencies within a basic block
+ void ComputeRegisterAntidependencies(Block *bb);
+ void verify_do_def( Node *n, OptoReg::Name def, const char *msg );
+ void verify_good_schedule( Block *b, const char *msg );
+ void anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def );
+ void anti_do_use( Block *b, Node *use, OptoReg::Name use_reg );
+
+ // Add a node to the current bundle
+ void AddNodeToBundle(Node *n, const Block *bb);
+
+ // Add a node to the list of available nodes
+ void AddNodeToAvailableList(Node *n);
+
+ // Compute the local use count for the nodes in a block, and compute
+ // the list of instructions with no uses in the block as available
+ void ComputeUseCount(const Block *bb);
+
+ // Choose an instruction from the available list to add to the bundle
+ Node * ChooseNodeToBundle();
+
+ // See if this Node fits into the currently accumulating bundle
+ bool NodeFitsInBundle(Node *n);
+
+ // Decrement the use count for a node
+ void DecrementUseCounts(Node *n, const Block *bb);
+
+ // Garbage collect pinch nodes for reuse by other blocks.
+ void garbage_collect_pinch_nodes();
+ // Clean up a pinch node for reuse (helper for above).
+ void cleanup_pinch( Node *pinch );
+
+ // Information for statistics gathering
+#ifndef PRODUCT
+private:
+ // Gather information on size of nops relative to total
+ uint _branches, _unconditional_delays;
+
+ static uint _total_nop_size, _total_method_size;
+ static uint _total_branches, _total_unconditional_delays;
+ static uint _total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
+
+public:
+ static void print_statistics();
+
+ static void increment_instructions_per_bundle(uint i) {
+ _total_instructions_per_bundle[i]++;
+ }
+
+ static void increment_nop_size(uint s) {
+ _total_nop_size += s;
+ }
+
+ static void increment_method_size(uint s) {
+ _total_method_size += s;
+ }
+#endif
+
+};
diff --git a/src/share/vm/opto/parse.hpp b/src/share/vm/opto/parse.hpp
new file mode 100644
index 000000000..60ffdf17d
--- /dev/null
+++ b/src/share/vm/opto/parse.hpp
@@ -0,0 +1,555 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class BytecodeParseHistogram;
+class InlineTree;
+class Parse;
+class SwitchRange;
+
+
+//------------------------------InlineTree-------------------------------------
+class InlineTree : public ResourceObj {
+ Compile* C; // cache
+ JVMState* _caller_jvms; // state of caller
+ ciMethod* _method; // method being called by the caller_jvms
+ InlineTree* _caller_tree;
+ uint _count_inline_bcs; // Accumulated count of inlined bytecodes
+ // Call-site count / interpreter invocation count, scaled recursively.
+ // Always between 0.0 and 1.0. Represents the percentage of the method's
+ // total execution time used at this call site.
+ const float _site_invoke_ratio;
+ float compute_callee_frequency( int caller_bci ) const;
+
+ GrowableArray<InlineTree*> _subtrees;
+ friend class Compile;
+
+protected:
+ InlineTree(Compile* C,
+ const InlineTree* caller_tree,
+ ciMethod* callee_method,
+ JVMState* caller_jvms,
+ int caller_bci,
+ float site_invoke_ratio);
+ InlineTree *build_inline_tree_for_callee(ciMethod* callee_method,
+ JVMState* caller_jvms,
+ int caller_bci);
+ const char* try_to_inline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result);
+ const char* shouldInline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const;
+ const char* shouldNotInline(ciMethod* callee_method, WarmCallInfo* wci_result) const;
+ void print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const PRODUCT_RETURN;
+
+ InlineTree *caller_tree() const { return _caller_tree; }
+ InlineTree* callee_at(int bci, ciMethod* m) const;
+ int inline_depth() const { return _caller_jvms ? _caller_jvms->depth() : 0; }
+
+public:
+ static InlineTree* build_inline_tree_root();
+ static InlineTree* find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee, bool create_if_not_found = false);
+
+ // For temporary (stack-allocated, stateless) ilts:
+ InlineTree(Compile* c, ciMethod* callee_method, JVMState* caller_jvms, float site_invoke_ratio);
+
+ // InlineTree enum
+ enum InlineStyle {
+ Inline_do_not_inline = 0, //
+ Inline_cha_is_monomorphic = 1, //
+ Inline_type_profile_monomorphic = 2 //
+ };
+
+ // See if it is OK to inline.
+ // The reciever is the inline tree for the caller.
+ //
+ // The result is a temperature indication. If it is hot or cold,
+ // inlining is immediate or undesirable. Otherwise, the info block
+ // returned is newly allocated and may be enqueued.
+ //
+ // If the method is inlinable, a new inline subtree is created on the fly,
+ // and may be accessed by find_subtree_from_root.
+ // The call_method is the dest_method for a special or static invocation.
+ // The call_method is an optimized virtual method candidate otherwise.
+ WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci);
+
+ // Information about inlined method
+ JVMState* caller_jvms() const { return _caller_jvms; }
+ ciMethod *method() const { return _method; }
+ int caller_bci() const { return _caller_jvms ? _caller_jvms->bci() : InvocationEntryBci; }
+ uint count_inline_bcs() const { return _count_inline_bcs; }
+ float site_invoke_ratio() const { return _site_invoke_ratio; };
+
+#ifndef PRODUCT
+private:
+ uint _count_inlines; // Count of inlined methods
+public:
+ // Debug information collected during parse
+ uint count_inlines() const { return _count_inlines; };
+#endif
+ GrowableArray<InlineTree*> subtrees() { return _subtrees; }
+};
+
+
+//-----------------------------------------------------------------------------
+//------------------------------Parse------------------------------------------
+// Parse bytecodes, build a Graph
+class Parse : public GraphKit {
+ public:
+ // Per-block information needed by the parser:
+ class Block {
+ private:
+ ciTypeFlow::Block* _flow;
+ int _pred_count; // how many predecessors in CFG?
+ int _preds_parsed; // how many of these have been parsed?
+ uint _count; // how many times executed? Currently only set by _goto's
+ bool _is_parsed; // has this block been parsed yet?
+ bool _is_handler; // is this block an exception handler?
+ SafePointNode* _start_map; // all values flowing into this block
+ MethodLivenessResult _live_locals; // lazily initialized liveness bitmap
+
+ int _num_successors; // Includes only normal control flow.
+ int _all_successors; // Include exception paths also.
+ Block** _successors;
+
+ // Use init_node/init_graph to initialize Blocks.
+ // Block() : _live_locals((uintptr_t*)NULL,0) { ShouldNotReachHere(); }
+ Block() : _live_locals(NULL,0) { ShouldNotReachHere(); }
+
+ public:
+
+ // Set up the block data structure itself.
+ void init_node(Parse* outer, int po);
+ // Set up the block's relations to other blocks.
+ void init_graph(Parse* outer);
+
+ ciTypeFlow::Block* flow() const { return _flow; }
+ int pred_count() const { return _pred_count; }
+ int preds_parsed() const { return _preds_parsed; }
+ bool is_parsed() const { return _is_parsed; }
+ bool is_handler() const { return _is_handler; }
+ void set_count( uint x ) { _count = x; }
+ uint count() const { return _count; }
+
+ SafePointNode* start_map() const { assert(is_merged(),""); return _start_map; }
+ void set_start_map(SafePointNode* m) { assert(!is_merged(), ""); _start_map = m; }
+
+ // True after any predecessor flows control into this block
+ bool is_merged() const { return _start_map != NULL; }
+
+ // True when all non-exception predecessors have been parsed.
+ bool is_ready() const { return preds_parsed() == pred_count(); }
+
+ int num_successors() const { return _num_successors; }
+ int all_successors() const { return _all_successors; }
+ Block* successor_at(int i) const {
+ assert((uint)i < (uint)all_successors(), "");
+ return _successors[i];
+ }
+ Block* successor_for_bci(int bci);
+
+ int start() const { return flow()->start(); }
+ int limit() const { return flow()->limit(); }
+ int pre_order() const { return flow()->pre_order(); }
+ int start_sp() const { return flow()->stack_size(); }
+
+ const Type* peek(int off=0) const { return stack_type_at(start_sp() - (off+1)); }
+
+ const Type* stack_type_at(int i) const;
+ const Type* local_type_at(int i) const;
+ static const Type* get_type(ciType* t) { return Type::get_typeflow_type(t); }
+
+ bool has_trap_at(int bci) const { return flow()->has_trap() && flow()->trap_bci() == bci; }
+
+ // Call this just before parsing a block.
+ void mark_parsed() {
+ assert(!_is_parsed, "must parse each block exactly once");
+ _is_parsed = true;
+ }
+
+ // Return the phi/region input index for the "current" pred,
+ // and bump the pred number. For historical reasons these index
+ // numbers are handed out in descending order. The last index is
+ // always PhiNode::Input (i.e., 1). The value returned is known
+ // as a "path number" because it distinguishes by which path we are
+ // entering the block.
+ int next_path_num() {
+ assert(preds_parsed() < pred_count(), "too many preds?");
+ return pred_count() - _preds_parsed++;
+ }
+
+ // Add a previously unaccounted predecessor to this block.
+ // This operates by increasing the size of the block's region
+ // and all its phi nodes (if any). The value returned is a
+ // path number ("pnum").
+ int add_new_path();
+
+ // Initialize me by recording the parser's map. My own map must be NULL.
+ void record_state(Parse* outer);
+ };
+
+#ifndef PRODUCT
+ // BytecodeParseHistogram collects number of bytecodes parsed, nodes constructed, and transformations.
+ class BytecodeParseHistogram : public ResourceObj {
+ private:
+ enum BPHType {
+ BPH_transforms,
+ BPH_values
+ };
+ static bool _initialized;
+ static uint _bytecodes_parsed [Bytecodes::number_of_codes];
+ static uint _nodes_constructed[Bytecodes::number_of_codes];
+ static uint _nodes_transformed[Bytecodes::number_of_codes];
+ static uint _new_values [Bytecodes::number_of_codes];
+
+ Bytecodes::Code _initial_bytecode;
+ int _initial_node_count;
+ int _initial_transforms;
+ int _initial_values;
+
+ Parse *_parser;
+ Compile *_compiler;
+
+ // Initialization
+ static void reset();
+
+ // Return info being collected, select with global flag 'BytecodeParseInfo'
+ int current_count(BPHType info_selector);
+
+ public:
+ BytecodeParseHistogram(Parse *p, Compile *c);
+ static bool initialized();
+
+ // Record info when starting to parse one bytecode
+ void set_initial_state( Bytecodes::Code bc );
+ // Record results of parsing one bytecode
+ void record_change();
+
+ // Profile printing
+ static void print(float cutoff = 0.01F); // cutoff in percent
+ };
+
+ public:
+ // Record work done during parsing
+ BytecodeParseHistogram* _parse_histogram;
+ void set_parse_histogram(BytecodeParseHistogram *bph) { _parse_histogram = bph; }
+ BytecodeParseHistogram* parse_histogram() { return _parse_histogram; }
+#endif
+
+ private:
+ friend class Block;
+
+ // Variables which characterize this compilation as a whole:
+
+ JVMState* _caller; // JVMS which carries incoming args & state.
+ float _expected_uses; // expected number of calls to this code
+ float _prof_factor; // discount applied to my profile counts
+ int _depth; // Inline tree depth, for debug printouts
+ const TypeFunc*_tf; // My kind of function type
+ int _entry_bci; // the osr bci or InvocationEntryBci
+
+ ciTypeFlow* _flow; // Results of previous flow pass.
+ Block* _blocks; // Array of basic-block structs.
+ int _block_count; // Number of elements in _blocks.
+
+ GraphKit _exits; // Record all normal returns and throws here.
+ bool _wrote_final; // Did we write a final field?
+ bool _count_invocations; // update and test invocation counter
+ bool _method_data_update; // update method data oop
+
+ // Variables which track Java semantics during bytecode parsing:
+
+ Block* _block; // block currently getting parsed
+ ciBytecodeStream _iter; // stream of this method's bytecodes
+
+ int _blocks_merged; // Progress meter: state merges from BB preds
+ int _blocks_parsed; // Progress meter: BBs actually parsed
+
+ const FastLockNode* _synch_lock; // FastLockNode for synchronized method
+
+#ifndef PRODUCT
+ int _max_switch_depth; // Debugging SwitchRanges.
+ int _est_switch_depth; // Debugging SwitchRanges.
+#endif
+
+ public:
+ // Constructor
+ Parse(JVMState* caller, ciMethod* parse_method, float expected_uses);
+
+ virtual Parse* is_Parse() const { return (Parse*)this; }
+
+ public:
+ // Accessors.
+ JVMState* caller() const { return _caller; }
+ float expected_uses() const { return _expected_uses; }
+ float prof_factor() const { return _prof_factor; }
+ int depth() const { return _depth; }
+ const TypeFunc* tf() const { return _tf; }
+ // entry_bci() -- see osr_bci, etc.
+
+ ciTypeFlow* flow() const { return _flow; }
+ // blocks() -- see pre_order_at, start_block, etc.
+ int block_count() const { return _block_count; }
+
+ GraphKit& exits() { return _exits; }
+ bool wrote_final() const { return _wrote_final; }
+ void set_wrote_final(bool z) { _wrote_final = z; }
+ bool count_invocations() const { return _count_invocations; }
+ bool method_data_update() const { return _method_data_update; }
+
+ Block* block() const { return _block; }
+ ciBytecodeStream& iter() { return _iter; }
+ Bytecodes::Code bc() const { return _iter.cur_bc(); }
+
+ void set_block(Block* b) { _block = b; }
+
+ // Derived accessors:
+ bool is_normal_parse() const { return _entry_bci == InvocationEntryBci; }
+ bool is_osr_parse() const { return _entry_bci != InvocationEntryBci; }
+ int osr_bci() const { assert(is_osr_parse(),""); return _entry_bci; }
+
+ void set_parse_bci(int bci);
+
+ // Must this parse be aborted?
+ bool failing() { return C->failing(); }
+
+ Block* pre_order_at(int po) {
+ assert(0 <= po && po < _block_count, "oob");
+ return &_blocks[po];
+ }
+ Block* start_block() {
+ return pre_order_at(flow()->start_block()->pre_order());
+ }
+ // Can return NULL if the flow pass did not complete a block.
+ Block* successor_for_bci(int bci) {
+ return block()->successor_for_bci(bci);
+ }
+
+ private:
+ // Create a JVMS & map for the initial state of this method.
+ SafePointNode* create_entry_map();
+
+ // OSR helpers
+ Node *fetch_interpreter_state(int index, BasicType bt, Node *local_addrs, Node *local_addrs_base);
+ Node* check_interpreter_type(Node* l, const Type* type, SafePointNode* &bad_type_exit);
+ void load_interpreter_state(Node* osr_buf);
+
+ // Functions for managing basic blocks:
+ void init_blocks();
+ void load_state_from(Block* b);
+ void store_state_to(Block* b) { b->record_state(this); }
+
+ // Parse all the basic blocks.
+ void do_all_blocks();
+
+ // Helper for do_all_blocks; makes one pass in pre-order.
+ void visit_blocks();
+
+ // Parse the current basic block
+ void do_one_block();
+
+ // Raise an error if we get a bad ciTypeFlow CFG.
+ void handle_missing_successor(int bci);
+
+ // first actions (before BCI 0)
+ void do_method_entry();
+
+ // implementation of monitorenter/monitorexit
+ void do_monitor_enter();
+ void do_monitor_exit();
+
+ // Eagerly create phie throughout the state, to cope with back edges.
+ void ensure_phis_everywhere();
+
+ // Merge the current mapping into the basic block starting at bci
+ void merge( int target_bci);
+ // Same as plain merge, except that it allocates a new path number.
+ void merge_new_path( int target_bci);
+ // Merge the current mapping into an exception handler.
+ void merge_exception(int target_bci);
+ // Helper: Merge the current mapping into the given basic block
+ void merge_common(Block* target, int pnum);
+ // Helper functions for merging individual cells.
+ PhiNode *ensure_phi( int idx, bool nocreate = false);
+ PhiNode *ensure_memory_phi(int idx, bool nocreate = false);
+ // Helper to merge the current memory state into the given basic block
+ void merge_memory_edges(MergeMemNode* n, int pnum, bool nophi);
+
+ // Parse this bytecode, and alter the Parsers JVM->Node mapping
+ void do_one_bytecode();
+
+ // helper function to generate array store check
+ void array_store_check();
+ // Helper function to generate array load
+ void array_load(BasicType etype);
+ // Helper function to generate array store
+ void array_store(BasicType etype);
+ // Helper function to compute array addressing
+ Node* array_addressing(BasicType type, int vals, const Type* *result2=NULL);
+
+ // Pass current map to exits
+ void return_current(Node* value);
+
+ // Register finalizers on return from Object.<init>
+ void call_register_finalizer();
+
+ // Insert a compiler safepoint into the graph
+ void add_safepoint();
+
+ // Insert a compiler safepoint into the graph, if there is a back-branch.
+ void maybe_add_safepoint(int target_bci) {
+ if (UseLoopSafepoints && target_bci <= bci()) {
+ add_safepoint();
+ }
+ }
+
+ // Note: Intrinsic generation routines may be found in library_call.cpp.
+
+ // Helper function to setup Ideal Call nodes
+ void do_call();
+
+ // Helper function to uncommon-trap or bailout for non-compilable call-sites
+ bool can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass *klass);
+
+ // Helper function to identify inlining potential at call-site
+ ciMethod* optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+ ciMethod *dest_method, const TypeOopPtr* receiver_type);
+
+ // Helper function to setup for type-profile based inlining
+ bool prepare_type_profile_inline(ciInstanceKlass* prof_klass, ciMethod* prof_method);
+
+ // Helper functions for type checking bytecodes:
+ void do_checkcast();
+ void do_instanceof();
+
+ // Helper functions for shifting & arithmetic
+ void modf();
+ void modd();
+ void l2f();
+
+ void do_irem();
+
+ // implementation of _get* and _put* bytecodes
+ void do_getstatic() { do_field_access(true, false); }
+ void do_getfield () { do_field_access(true, true); }
+ void do_putstatic() { do_field_access(false, false); }
+ void do_putfield () { do_field_access(false, true); }
+
+ // common code for making initial checks and forming addresses
+ void do_field_access(bool is_get, bool is_field);
+ bool static_field_ok_in_clinit(ciField *field, ciMethod *method);
+
+ // common code for actually performing the load or store
+ void do_get_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field);
+ void do_put_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field);
+
+ // loading from a constant field or the constant pool
+ // returns false if push failed (non-perm field constants only, not ldcs)
+ bool push_constant(ciConstant con);
+
+ // implementation of object creation bytecodes
+ void do_new();
+ void do_newarray(BasicType elemtype);
+ void do_anewarray();
+ void do_multianewarray();
+ Node* expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, int ndimensions);
+
+ // implementation of jsr/ret
+ void do_jsr();
+ void do_ret();
+
+ float dynamic_branch_prediction(float &cnt);
+ float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci);
+ bool seems_never_taken(float prob);
+
+ void do_ifnull(BoolTest::mask btest);
+ void do_if(BoolTest::mask btest, Node* c);
+ void repush_if_args();
+ void adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
+ Block* path, Block* other_path);
+ IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask);
+ Node* jump_if_join(Node* iffalse, Node* iftrue);
+ void jump_if_true_fork(IfNode *ifNode, int dest_bci_if_true, int prof_table_index);
+ void jump_if_false_fork(IfNode *ifNode, int dest_bci_if_false, int prof_table_index);
+ void jump_if_always_fork(int dest_bci_if_true, int prof_table_index);
+
+ friend class SwitchRange;
+ void do_tableswitch();
+ void do_lookupswitch();
+ void jump_switch_ranges(Node* a, SwitchRange* lo, SwitchRange* hi, int depth = 0);
+ bool create_jump_tables(Node* a, SwitchRange* lo, SwitchRange* hi);
+
+ // helper functions for methodData style profiling
+ void test_counter_against_threshold(Node* cnt, int limit);
+ void increment_and_test_invocation_counter(int limit);
+ void test_for_osr_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize offset, int limit);
+ Node* method_data_addressing(ciMethodData* md, ciProfileData* data, ByteSize offset, Node* idx = NULL, uint stride = 0);
+ void increment_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize offset, Node* idx = NULL, uint stride = 0);
+ void set_md_flag_at(ciMethodData* md, ciProfileData* data, int flag_constant);
+
+ void profile_method_entry();
+ void profile_taken_branch(int target_bci, bool force_update = false);
+ void profile_not_taken_branch(bool force_update = false);
+ void profile_call(Node* receiver);
+ void profile_generic_call();
+ void profile_receiver_type(Node* receiver);
+ void profile_ret(int target_bci);
+ void profile_null_checkcast();
+ void profile_switch_case(int table_index);
+
+ // helper function for call statistics
+ void count_compiled_calls(bool at_method_entry, bool is_inline) PRODUCT_RETURN;
+
+ Node_Notes* make_node_notes(Node_Notes* caller_nn);
+
+ // Helper functions for handling normal and abnormal exits.
+ void build_exits();
+
+ // Fix up all exceptional control flow exiting a single bytecode.
+ void do_exceptions();
+
+ // Fix up all exiting control flow at the end of the parse.
+ void do_exits();
+
+ // Add Catch/CatchProjs
+ // The call is either a Java call or the VM's rethrow stub
+ void catch_call_exceptions(ciExceptionHandlerStream&);
+
+ // Handle all exceptions thrown by the inlined method.
+ // Also handles exceptions for individual bytecodes.
+ void catch_inline_exceptions(SafePointNode* ex_map);
+
+ // Bytecode classifier, helps decide to use uncommon_trap vs. rethrow_C.
+ bool can_rerun_bytecode();
+
+ // Merge the given map into correct exceptional exit state.
+ // Assumes that there is no applicable local handler.
+ void throw_to_exit(SafePointNode* ex_map);
+
+ public:
+#ifndef PRODUCT
+ // Handle PrintOpto, etc.
+ void show_parse_info();
+ void dump_map_adr_mem() const;
+ static void print_statistics(); // Print some performance counters
+ void dump();
+ void dump_bci(int bci);
+#endif
+};
diff --git a/src/share/vm/opto/parse1.cpp b/src/share/vm/opto/parse1.cpp
new file mode 100644
index 000000000..4f8e93162
--- /dev/null
+++ b/src/share/vm/opto/parse1.cpp
@@ -0,0 +1,2166 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parse1.cpp.incl"
+
+// Static array so we can figure out which bytecodes stop us from compiling
+// the most. Some of the non-static variables are needed in bytecodeInfo.cpp
+// and eventually should be encapsulated in a proper class (gri 8/18/98).
+
+int nodes_created = 0; int nodes_created_old = 0;
+int methods_parsed = 0; int methods_parsed_old = 0;
+int methods_seen = 0; int methods_seen_old = 0;
+
+int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0;
+int explicit_null_checks_elided = 0, explicit_null_checks_elided_old = 0;
+int all_null_checks_found = 0, implicit_null_checks = 0;
+int implicit_null_throws = 0;
+
+int parse_idx = 0;
+size_t parse_arena = 0;
+int reclaim_idx = 0;
+int reclaim_in = 0;
+int reclaim_node = 0;
+
+#ifndef PRODUCT
+bool Parse::BytecodeParseHistogram::_initialized = false;
+uint Parse::BytecodeParseHistogram::_bytecodes_parsed [Bytecodes::number_of_codes];
+uint Parse::BytecodeParseHistogram::_nodes_constructed[Bytecodes::number_of_codes];
+uint Parse::BytecodeParseHistogram::_nodes_transformed[Bytecodes::number_of_codes];
+uint Parse::BytecodeParseHistogram::_new_values [Bytecodes::number_of_codes];
+#endif
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+void Parse::print_statistics() {
+ tty->print_cr("--- Compiler Statistics ---");
+ tty->print("Methods seen: %d Methods parsed: %d", methods_seen, methods_parsed);
+ tty->print(" Nodes created: %d", nodes_created);
+ tty->cr();
+ if (methods_seen != methods_parsed)
+ tty->print_cr("Reasons for parse failures (NOT cumulative):");
+
+ if( explicit_null_checks_inserted )
+ tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
+ if( all_null_checks_found )
+ tty->print_cr("%d made implicit (%2d%%)", implicit_null_checks,
+ (100*implicit_null_checks)/all_null_checks_found);
+ if( implicit_null_throws )
+ tty->print_cr("%d implicit null exceptions at runtime",
+ implicit_null_throws);
+
+ if( PrintParseStatistics && BytecodeParseHistogram::initialized() ) {
+ BytecodeParseHistogram::print();
+ }
+}
+#endif
+
+//------------------------------ON STACK REPLACEMENT---------------------------
+
+// Construct a node which can be used to get incoming state for
+// on stack replacement.
+Node *Parse::fetch_interpreter_state(int index,
+ BasicType bt,
+ Node *local_addrs,
+ Node *local_addrs_base) {
+ Node *mem = memory(Compile::AliasIdxRaw);
+ Node *adr = basic_plus_adr( local_addrs_base, local_addrs, -index*wordSize );
+
+ // Very similar to LoadNode::make, except we handle un-aligned longs and
+ // doubles on Sparc. Intel can handle them just fine directly.
+ Node *l;
+ switch( bt ) { // Signature is flattened
+ case T_INT: l = new (C, 3) LoadINode( 0, mem, adr, TypeRawPtr::BOTTOM ); break;
+ case T_FLOAT: l = new (C, 3) LoadFNode( 0, mem, adr, TypeRawPtr::BOTTOM ); break;
+ case T_ADDRESS:
+ case T_OBJECT: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break;
+ case T_LONG:
+ case T_DOUBLE: {
+ // Since arguments are in reverse order, the argument address 'adr'
+ // refers to the back half of the long/double. Recompute adr.
+ adr = basic_plus_adr( local_addrs_base, local_addrs, -(index+1)*wordSize );
+ if( Matcher::misaligned_doubles_ok ) {
+ l = (bt == T_DOUBLE)
+ ? (Node*)new (C, 3) LoadDNode( 0, mem, adr, TypeRawPtr::BOTTOM )
+ : (Node*)new (C, 3) LoadLNode( 0, mem, adr, TypeRawPtr::BOTTOM );
+ } else {
+ l = (bt == T_DOUBLE)
+ ? (Node*)new (C, 3) LoadD_unalignedNode( 0, mem, adr, TypeRawPtr::BOTTOM )
+ : (Node*)new (C, 3) LoadL_unalignedNode( 0, mem, adr, TypeRawPtr::BOTTOM );
+ }
+ break;
+ }
+ default: ShouldNotReachHere();
+ }
+ return _gvn.transform(l);
+}
+
+// Helper routine to prevent the interpreter from handing
+// unexpected typestate to an OSR method.
+// The Node l is a value newly dug out of the interpreter frame.
+// The type is the type predicted by ciTypeFlow. Note that it is
+// not a general type, but can only come from Type::get_typeflow_type.
+// The safepoint is a map which will feed an uncommon trap.
+Node* Parse::check_interpreter_type(Node* l, const Type* type,
+ SafePointNode* &bad_type_exit) {
+
+ const TypeOopPtr* tp = type->isa_oopptr();
+
+ // TypeFlow may assert null-ness if a type appears unloaded.
+ if (type == TypePtr::NULL_PTR ||
+ (tp != NULL && !tp->klass()->is_loaded())) {
+ // Value must be null, not a real oop.
+ Node* chk = _gvn.transform( new (C, 3) CmpPNode(l, null()) );
+ Node* tst = _gvn.transform( new (C, 2) BoolNode(chk, BoolTest::eq) );
+ IfNode* iff = create_and_map_if(control(), tst, PROB_MAX, COUNT_UNKNOWN);
+ set_control(_gvn.transform( new (C, 1) IfTrueNode(iff) ));
+ Node* bad_type = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+ bad_type_exit->control()->add_req(bad_type);
+ l = null();
+ }
+
+ // Typeflow can also cut off paths from the CFG, based on
+ // types which appear unloaded, or call sites which appear unlinked.
+ // When paths are cut off, values at later merge points can rise
+ // toward more specific classes. Make sure these specific classes
+ // are still in effect.
+ if (tp != NULL && tp->klass() != C->env()->Object_klass()) {
+ // TypeFlow asserted a specific object type. Value must have that type.
+ Node* bad_type_ctrl = NULL;
+ l = gen_checkcast(l, makecon(TypeKlassPtr::make(tp->klass())), &bad_type_ctrl);
+ bad_type_exit->control()->add_req(bad_type_ctrl);
+ }
+
+ BasicType bt_l = _gvn.type(l)->basic_type();
+ BasicType bt_t = type->basic_type();
+ assert(_gvn.type(l)->higher_equal(type), "must constrain OSR typestate");
+ return l;
+}
+
+// Helper routine which sets up elements of the initial parser map when
+// performing a parse for on stack replacement. Add values into map.
+// The only parameter contains the address of a interpreter arguments.
+void Parse::load_interpreter_state(Node* osr_buf) {
+ int index;
+ int max_locals = jvms()->loc_size();
+ int max_stack = jvms()->stk_size();
+
+
+ // Mismatch between method and jvms can occur since map briefly held
+ // an OSR entry state (which takes up one RawPtr word).
+ assert(max_locals == method()->max_locals(), "sanity");
+ assert(max_stack >= method()->max_stack(), "sanity");
+ assert((int)jvms()->endoff() == TypeFunc::Parms + max_locals + max_stack, "sanity");
+ assert((int)jvms()->endoff() == (int)map()->req(), "sanity");
+
+ // Find the start block.
+ Block* osr_block = start_block();
+ assert(osr_block->start() == osr_bci(), "sanity");
+
+ // Set initial BCI.
+ set_parse_bci(osr_block->start());
+
+ // Set initial stack depth.
+ set_sp(osr_block->start_sp());
+
+ // Check bailouts. We currently do not perform on stack replacement
+ // of loops in catch blocks or loops which branch with a non-empty stack.
+ if (sp() != 0) {
+ C->record_method_not_compilable("OSR starts with non-empty stack");
+ return;
+ }
+ // Do not OSR inside finally clauses:
+ if (osr_block->has_trap_at(osr_block->start())) {
+ C->record_method_not_compilable("OSR starts with an immediate trap");
+ return;
+ }
+
+ // Commute monitors from interpreter frame to compiler frame.
+ assert(jvms()->monitor_depth() == 0, "should be no active locks at beginning of osr");
+ int mcnt = osr_block->flow()->monitor_count();
+ Node *monitors_addr = basic_plus_adr(osr_buf, osr_buf, (max_locals+mcnt*2-1)*wordSize);
+ for (index = 0; index < mcnt; index++) {
+ // Make a BoxLockNode for the monitor.
+ Node *box = _gvn.transform(new (C, 1) BoxLockNode(next_monitor()));
+
+
+ // Displaced headers and locked objects are interleaved in the
+ // temp OSR buffer. We only copy the locked objects out here.
+ // Fetch the locked object from the OSR temp buffer and copy to our fastlock node.
+ Node *lock_object = fetch_interpreter_state(index*2, T_OBJECT, monitors_addr, osr_buf);
+ // Try and copy the displaced header to the BoxNode
+ Node *displaced_hdr = fetch_interpreter_state((index*2) + 1, T_ADDRESS, monitors_addr, osr_buf);
+
+
+ store_to_memory(control(), box, displaced_hdr, T_ADDRESS, Compile::AliasIdxRaw);
+
+ // Build a bogus FastLockNode (no code will be generated) and push the
+ // monitor into our debug info.
+ const FastLockNode *flock = _gvn.transform(new (C, 3) FastLockNode( 0, lock_object, box ))->as_FastLock();
+ map()->push_monitor(flock);
+
+ // If the lock is our method synchronization lock, tuck it away in
+ // _sync_lock for return and rethrow exit paths.
+ if (index == 0 && method()->is_synchronized()) {
+ _synch_lock = flock;
+ }
+ }
+
+ MethodLivenessResult live_locals = method()->liveness_at_bci(osr_bci());
+ if (!live_locals.is_valid()) {
+ // Degenerate or breakpointed method.
+ C->record_method_not_compilable("OSR in empty or breakpointed method");
+ return;
+ }
+
+ // Extract the needed locals from the interpreter frame.
+ Node *locals_addr = basic_plus_adr(osr_buf, osr_buf, (max_locals-1)*wordSize);
+
+ // find all the locals that the interpreter thinks contain live oops
+ const BitMap live_oops = method()->live_local_oops_at_bci(osr_bci());
+ for (index = 0; index < max_locals; index++) {
+
+ if (!live_locals.at(index)) {
+ continue;
+ }
+
+ const Type *type = osr_block->local_type_at(index);
+
+ if (type->isa_oopptr() != NULL) {
+
+ // 6403625: Verify that the interpreter oopMap thinks that the oop is live
+ // else we might load a stale oop if the MethodLiveness disagrees with the
+ // result of the interpreter. If the interpreter says it is dead we agree
+ // by making the value go to top.
+ //
+
+ if (!live_oops.at(index)) {
+ if (C->log() != NULL) {
+ C->log()->elem("OSR_mismatch local_index='%d'",index);
+ }
+ set_local(index, null());
+ // and ignore it for the loads
+ continue;
+ }
+ }
+
+ // Filter out TOP, HALF, and BOTTOM. (Cf. ensure_phi.)
+ if (type == Type::TOP || type == Type::HALF) {
+ continue;
+ }
+ // If the type falls to bottom, then this must be a local that
+ // is mixing ints and oops or some such. Forcing it to top
+ // makes it go dead.
+ if (type == Type::BOTTOM) {
+ continue;
+ }
+ // Construct code to access the appropriate local.
+ Node *value = fetch_interpreter_state(index, type->basic_type(), locals_addr, osr_buf);
+ set_local(index, value);
+ }
+
+ // Extract the needed stack entries from the interpreter frame.
+ for (index = 0; index < sp(); index++) {
+ const Type *type = osr_block->stack_type_at(index);
+ if (type != Type::TOP) {
+ // Currently the compiler bails out when attempting to on stack replace
+ // at a bci with a non-empty stack. We should not reach here.
+ ShouldNotReachHere();
+ }
+ }
+
+ // End the OSR migration
+ make_runtime_call(RC_LEAF, OptoRuntime::osr_end_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_end),
+ "OSR_migration_end", TypeRawPtr::BOTTOM,
+ osr_buf);
+
+ // Now that the interpreter state is loaded, make sure it will match
+ // at execution time what the compiler is expecting now:
+ SafePointNode* bad_type_exit = clone_map();
+ bad_type_exit->set_control(new (C, 1) RegionNode(1));
+
+ for (index = 0; index < max_locals; index++) {
+ if (stopped()) break;
+ Node* l = local(index);
+ if (l->is_top()) continue; // nothing here
+ const Type *type = osr_block->local_type_at(index);
+ if (type->isa_oopptr() != NULL) {
+ if (!live_oops.at(index)) {
+ // skip type check for dead oops
+ continue;
+ }
+ }
+ set_local(index, check_interpreter_type(l, type, bad_type_exit));
+ }
+
+ for (index = 0; index < sp(); index++) {
+ if (stopped()) break;
+ Node* l = stack(index);
+ if (l->is_top()) continue; // nothing here
+ const Type *type = osr_block->stack_type_at(index);
+ set_stack(index, check_interpreter_type(l, type, bad_type_exit));
+ }
+
+ if (bad_type_exit->control()->req() > 1) {
+ // Build an uncommon trap here, if any inputs can be unexpected.
+ bad_type_exit->set_control(_gvn.transform( bad_type_exit->control() ));
+ record_for_igvn(bad_type_exit->control());
+ SafePointNode* types_are_good = map();
+ set_map(bad_type_exit);
+ // The unexpected type happens because a new edge is active
+ // in the CFG, which typeflow had previously ignored.
+ // E.g., Object x = coldAtFirst() && notReached()? "str": new Integer(123).
+ // This x will be typed as Integer if notReached is not yet linked.
+ uncommon_trap(Deoptimization::Reason_unreached,
+ Deoptimization::Action_reinterpret);
+ set_map(types_are_good);
+ }
+}
+
+//------------------------------Parse------------------------------------------
+// Main parser constructor.
+Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
+ : _exits(caller)
+{
+ // Init some variables
+ _caller = caller;
+ _method = parse_method;
+ _expected_uses = expected_uses;
+ _depth = 1 + (caller->has_method() ? caller->depth() : 0);
+ _wrote_final = false;
+ _entry_bci = InvocationEntryBci;
+ _tf = NULL;
+ _block = NULL;
+ debug_only(_block_count = -1);
+ debug_only(_blocks = (Block*)-1);
+#ifndef PRODUCT
+ if (PrintCompilation || PrintOpto) {
+ // Make sure I have an inline tree, so I can print messages about it.
+ JVMState* ilt_caller = is_osr_parse() ? caller->caller() : caller;
+ InlineTree::find_subtree_from_root(C->ilt(), ilt_caller, parse_method, true);
+ }
+ _max_switch_depth = 0;
+ _est_switch_depth = 0;
+#endif
+
+ _tf = TypeFunc::make(method());
+ _iter.reset_to_method(method());
+ _flow = method()->get_flow_analysis();
+ if (_flow->failing()) {
+ C->record_method_not_compilable_all_tiers(_flow->failure_reason());
+ }
+
+ if (_expected_uses <= 0) {
+ _prof_factor = 1;
+ } else {
+ float prof_total = parse_method->interpreter_invocation_count();
+ if (prof_total <= _expected_uses) {
+ _prof_factor = 1;
+ } else {
+ _prof_factor = _expected_uses / prof_total;
+ }
+ }
+
+ CompileLog* log = C->log();
+ if (log != NULL) {
+ log->begin_head("parse method='%d' uses='%g'",
+ log->identify(parse_method), expected_uses);
+ if (depth() == 1 && C->is_osr_compilation()) {
+ log->print(" osr_bci='%d'", C->entry_bci());
+ }
+ log->stamp();
+ log->end_head();
+ }
+
+ // Accumulate deoptimization counts.
+ // (The range_check and store_check counts are checked elsewhere.)
+ ciMethodData* md = method()->method_data();
+ for (uint reason = 0; reason < md->trap_reason_limit(); reason++) {
+ uint md_count = md->trap_count(reason);
+ if (md_count != 0) {
+ if (md_count == md->trap_count_limit())
+ md_count += md->overflow_trap_count();
+ uint total_count = C->trap_count(reason);
+ uint old_count = total_count;
+ total_count += md_count;
+ // Saturate the add if it overflows.
+ if (total_count < old_count || total_count < md_count)
+ total_count = (uint)-1;
+ C->set_trap_count(reason, total_count);
+ if (log != NULL)
+ log->elem("observe trap='%s' count='%d' total='%d'",
+ Deoptimization::trap_reason_name(reason),
+ md_count, total_count);
+ }
+ }
+ // Accumulate total sum of decompilations, also.
+ C->set_decompile_count(C->decompile_count() + md->decompile_count());
+
+ _count_invocations = C->do_count_invocations();
+ _method_data_update = C->do_method_data_update();
+
+ if (log != NULL && method()->has_exception_handlers()) {
+ log->elem("observe that='has_exception_handlers'");
+ }
+
+ assert(method()->can_be_compiled(), "Can not parse this method, cutout earlier");
+ assert(method()->has_balanced_monitors(), "Can not parse unbalanced monitors, cutout earlier");
+
+ // Always register dependence if JVMTI is enabled, because
+ // either breakpoint setting or hotswapping of methods may
+ // cause deoptimization.
+ if (JvmtiExport::can_hotswap_or_post_breakpoint()) {
+ C->dependencies()->assert_evol_method(method());
+ }
+
+ methods_seen++;
+
+ // Do some special top-level things.
+ if (depth() == 1 && C->is_osr_compilation()) {
+ _entry_bci = C->entry_bci();
+ _flow = method()->get_osr_flow_analysis(osr_bci());
+ if (_flow->failing()) {
+ C->record_method_not_compilable(_flow->failure_reason());
+#ifndef PRODUCT
+ if (PrintOpto && (Verbose || WizardMode)) {
+ tty->print_cr("OSR @%d type flow bailout: %s", _entry_bci, _flow->failure_reason());
+ if (Verbose) {
+ method()->print_oop();
+ method()->print_codes();
+ _flow->print();
+ }
+ }
+#endif
+ }
+ _tf = C->tf(); // the OSR entry type is different
+ }
+
+#ifdef ASSERT
+ if (depth() == 1) {
+ assert(C->is_osr_compilation() == this->is_osr_parse(), "OSR in sync");
+ if (C->tf() != tf()) {
+ MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
+ assert(C->env()->system_dictionary_modification_counter_changed(),
+ "Must invalidate if TypeFuncs differ");
+ }
+ } else {
+ assert(!this->is_osr_parse(), "no recursive OSR");
+ }
+#endif
+
+ methods_parsed++;
+#ifndef PRODUCT
+ // add method size here to guarantee that inlined methods are added too
+ if (TimeCompiler)
+ _total_bytes_compiled += method()->code_size();
+
+ show_parse_info();
+#endif
+
+ if (failing()) {
+ if (log) log->done("parse");
+ return;
+ }
+
+ gvn().set_type(root(), root()->bottom_type());
+ gvn().transform(top());
+
+ // Import the results of the ciTypeFlow.
+ init_blocks();
+
+ // Merge point for all normal exits
+ build_exits();
+
+ // Setup the initial JVM state map.
+ SafePointNode* entry_map = create_entry_map();
+
+ // Check for bailouts during map initialization
+ if (failing() || entry_map == NULL) {
+ if (log) log->done("parse");
+ return;
+ }
+
+ Node_Notes* caller_nn = C->default_node_notes();
+ // Collect debug info for inlined calls unless -XX:-DebugInlinedCalls.
+ if (DebugInlinedCalls || depth() == 1) {
+ C->set_default_node_notes(make_node_notes(caller_nn));
+ }
+
+ if (is_osr_parse()) {
+ Node* osr_buf = entry_map->in(TypeFunc::Parms+0);
+ entry_map->set_req(TypeFunc::Parms+0, top());
+ set_map(entry_map);
+ load_interpreter_state(osr_buf);
+ } else {
+ set_map(entry_map);
+ do_method_entry();
+ }
+
+ // Check for bailouts during method entry.
+ if (failing()) {
+ if (log) log->done("parse");
+ C->set_default_node_notes(caller_nn);
+ return;
+ }
+
+ entry_map = map(); // capture any changes performed by method setup code
+ assert(jvms()->endoff() == map()->req(), "map matches JVMS layout");
+
+ // We begin parsing as if we have just encountered a jump to the
+ // method entry.
+ Block* entry_block = start_block();
+ assert(entry_block->start() == (is_osr_parse() ? osr_bci() : 0), "");
+ set_map_clone(entry_map);
+ merge_common(entry_block, entry_block->next_path_num());
+
+#ifndef PRODUCT
+ BytecodeParseHistogram *parse_histogram_obj = new (C->env()->arena()) BytecodeParseHistogram(this, C);
+ set_parse_histogram( parse_histogram_obj );
+#endif
+
+ // Parse all the basic blocks.
+ do_all_blocks();
+
+ C->set_default_node_notes(caller_nn);
+
+ // Check for bailouts during conversion to graph
+ if (failing()) {
+ if (log) log->done("parse");
+ return;
+ }
+
+ // Fix up all exiting control flow.
+ set_map(entry_map);
+ do_exits();
+
+ // Collect a few more statistics.
+ parse_idx += C->unique();
+ parse_arena += C->node_arena()->used();
+
+ if (log) log->done("parse nodes='%d' memory='%d'",
+ C->unique(), C->node_arena()->used());
+}
+
+//---------------------------do_all_blocks-------------------------------------
+void Parse::do_all_blocks() {
+ _blocks_merged = 0;
+ _blocks_parsed = 0;
+
+ int old_blocks_merged = -1;
+ int old_blocks_parsed = -1;
+
+ for (int tries = 0; ; tries++) {
+ visit_blocks();
+ if (failing()) return; // Check for bailout
+
+ // No need for a work list. The outer loop is hardly ever repeated.
+ // The following loop traverses the blocks in a reasonable pre-order,
+ // as produced by the ciTypeFlow pass.
+
+ // This loop can be taken more than once if there are two entries to
+ // a loop (irreduceable CFG), and the edge which ciTypeFlow chose
+ // as the first predecessor to the loop goes dead in the parser,
+ // due to parse-time optimization. (Could happen with obfuscated code.)
+
+ // Look for progress, or the lack of it:
+ if (_blocks_parsed == block_count()) {
+ // That's all, folks.
+ if (TraceOptoParse) {
+ tty->print_cr("All blocks parsed.");
+ }
+ break;
+ }
+
+ // How much work was done this time around?
+ int new_blocks_merged = _blocks_merged - old_blocks_merged;
+ int new_blocks_parsed = _blocks_parsed - old_blocks_parsed;
+ if (new_blocks_merged == 0) {
+ if (TraceOptoParse) {
+ tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed);
+ }
+ // No new blocks have become parseable. Some blocks are just dead.
+ break;
+ }
+ assert(new_blocks_parsed > 0, "must make progress");
+ assert(tries < block_count(), "the pre-order cannot be this bad!");
+
+ old_blocks_merged = _blocks_merged;
+ old_blocks_parsed = _blocks_parsed;
+ }
+
+#ifndef PRODUCT
+ // Make sure there are no half-processed blocks remaining.
+ // Every remaining unprocessed block is dead and may be ignored now.
+ for (int po = 0; po < block_count(); po++) {
+ Block* block = pre_order_at(po);
+ if (!block->is_parsed()) {
+ if (TraceOptoParse) {
+ tty->print("Skipped dead block %d at bci:%d", po, block->start());
+ assert(!block->is_merged(), "no half-processed blocks");
+ }
+ }
+ }
+#endif
+}
+
+//---------------------------visit_blocks--------------------------------------
+void Parse::visit_blocks() {
+ // Walk over all blocks, parsing every one that has been reached (merged).
+ for (int po = 0; po < block_count(); po++) {
+ Block* block = pre_order_at(po);
+
+ if (block->is_parsed()) {
+ // Do not parse twice.
+ continue;
+ }
+
+ if (!block->is_merged()) {
+ // No state on this block. It had not yet been reached.
+ // Delay reaching it until later.
+ continue;
+ }
+
+ // Prepare to parse this block.
+ load_state_from(block);
+
+ if (stopped()) {
+ // Block is dead.
+ continue;
+ }
+
+ if (!block->is_ready() || block->is_handler()) {
+ // Not all preds have been parsed. We must build phis everywhere.
+ // (Note that dead locals do not get phis built, ever.)
+ ensure_phis_everywhere();
+
+ // Leave behind an undisturbed copy of the map, for future merges.
+ set_map(clone_map());
+ }
+
+ // Ready or not, parse the block.
+ do_one_block();
+
+ // Check for bailouts.
+ if (failing()) return;
+ }
+}
+
+//-------------------------------build_exits----------------------------------
+// Build normal and exceptional exit merge points.
+void Parse::build_exits() {
+ // make a clone of caller to prevent sharing of side-effects
+ _exits.set_map(_exits.clone_map());
+ _exits.clean_stack(_exits.sp());
+ _exits.sync_jvms();
+
+ RegionNode* region = new (C, 1) RegionNode(1);
+ record_for_igvn(region);
+ gvn().set_type_bottom(region);
+ _exits.set_control(region);
+
+ // Note: iophi and memphi are not transformed until do_exits.
+ Node* iophi = new (C, region->req()) PhiNode(region, Type::ABIO);
+ Node* memphi = new (C, region->req()) PhiNode(region, Type::MEMORY, TypePtr::BOTTOM);
+ _exits.set_i_o(iophi);
+ _exits.set_all_memory(memphi);
+
+ // Add a return value to the exit state. (Do not push it yet.)
+ if (tf()->range()->cnt() > TypeFunc::Parms) {
+ const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
+ // Don't "bind" an unloaded return klass to the ret_phi. If the klass
+ // becomes loaded during the subsequent parsing, the loaded and unloaded
+ // types will not join when we transform and push in do_exits().
+ const TypeOopPtr* ret_oop_type = ret_type->isa_oopptr();
+ if (ret_oop_type && !ret_oop_type->klass()->is_loaded()) {
+ ret_type = TypeOopPtr::BOTTOM;
+ }
+ int ret_size = type2size[ret_type->basic_type()];
+ Node* ret_phi = new (C, region->req()) PhiNode(region, ret_type);
+ _exits.ensure_stack(ret_size);
+ assert((int)(tf()->range()->cnt() - TypeFunc::Parms) == ret_size, "good tf range");
+ assert(method()->return_type()->size() == ret_size, "tf agrees w/ method");
+ _exits.set_argument(0, ret_phi); // here is where the parser finds it
+ // Note: ret_phi is not yet pushed, until do_exits.
+ }
+}
+
+
+//----------------------------build_start_state-------------------------------
+// Construct a state which contains only the incoming arguments from an
+// unknown caller. The method & bci will be NULL & InvocationEntryBci.
+JVMState* Compile::build_start_state(StartNode* start, const TypeFunc* tf) {
+ int arg_size = tf->domain()->cnt();
+ int max_size = MAX2(arg_size, (int)tf->range()->cnt());
+ JVMState* jvms = new (this) JVMState(max_size - TypeFunc::Parms);
+ SafePointNode* map = new (this, max_size) SafePointNode(max_size, NULL);
+ record_for_igvn(map);
+ assert(arg_size == TypeFunc::Parms + (is_osr_compilation() ? 1 : method()->arg_size()), "correct arg_size");
+ Node_Notes* old_nn = default_node_notes();
+ if (old_nn != NULL && has_method()) {
+ Node_Notes* entry_nn = old_nn->clone(this);
+ JVMState* entry_jvms = new(this) JVMState(method(), old_nn->jvms());
+ entry_jvms->set_offsets(0);
+ entry_jvms->set_bci(entry_bci());
+ entry_nn->set_jvms(entry_jvms);
+ set_default_node_notes(entry_nn);
+ }
+ uint i;
+ for (i = 0; i < (uint)arg_size; i++) {
+ Node* parm = initial_gvn()->transform(new (this, 1) ParmNode(start, i));
+ map->init_req(i, parm);
+ // Record all these guys for later GVN.
+ record_for_igvn(parm);
+ }
+ for (; i < map->req(); i++) {
+ map->init_req(i, top());
+ }
+ assert(jvms->argoff() == TypeFunc::Parms, "parser gets arguments here");
+ set_default_node_notes(old_nn);
+ map->set_jvms(jvms);
+ jvms->set_map(map);
+ return jvms;
+}
+
+//-----------------------------make_node_notes---------------------------------
+Node_Notes* Parse::make_node_notes(Node_Notes* caller_nn) {
+ if (caller_nn == NULL) return NULL;
+ Node_Notes* nn = caller_nn->clone(C);
+ JVMState* caller_jvms = nn->jvms();
+ JVMState* jvms = new (C) JVMState(method(), caller_jvms);
+ jvms->set_offsets(0);
+ jvms->set_bci(_entry_bci);
+ nn->set_jvms(jvms);
+ return nn;
+}
+
+
+//--------------------------return_values--------------------------------------
+void Compile::return_values(JVMState* jvms) {
+ GraphKit kit(jvms);
+ Node* ret = new (this, TypeFunc::Parms) ReturnNode(TypeFunc::Parms,
+ kit.control(),
+ kit.i_o(),
+ kit.reset_memory(),
+ kit.frameptr(),
+ kit.returnadr());
+ // Add zero or 1 return values
+ int ret_size = tf()->range()->cnt() - TypeFunc::Parms;
+ if (ret_size > 0) {
+ kit.inc_sp(-ret_size); // pop the return value(s)
+ kit.sync_jvms();
+ ret->add_req(kit.argument(0));
+ // Note: The second dummy edge is not needed by a ReturnNode.
+ }
+ // bind it to root
+ root()->add_req(ret);
+ record_for_igvn(ret);
+ initial_gvn()->transform_no_reclaim(ret);
+}
+
+//------------------------rethrow_exceptions-----------------------------------
+// Bind all exception states in the list into a single RethrowNode.
+void Compile::rethrow_exceptions(JVMState* jvms) {
+ GraphKit kit(jvms);
+ if (!kit.has_exceptions()) return; // nothing to generate
+ // Load my combined exception state into the kit, with all phis transformed:
+ SafePointNode* ex_map = kit.combine_and_pop_all_exception_states();
+ Node* ex_oop = kit.use_exception_state(ex_map);
+ RethrowNode* exit = new (this, TypeFunc::Parms + 1) RethrowNode(kit.control(),
+ kit.i_o(), kit.reset_memory(),
+ kit.frameptr(), kit.returnadr(),
+ // like a return but with exception input
+ ex_oop);
+ // bind to root
+ root()->add_req(exit);
+ record_for_igvn(exit);
+ initial_gvn()->transform_no_reclaim(exit);
+}
+
+bool Parse::can_rerun_bytecode() {
+ switch (bc()) {
+ case Bytecodes::_ldc:
+ case Bytecodes::_ldc_w:
+ case Bytecodes::_ldc2_w:
+ case Bytecodes::_getfield:
+ case Bytecodes::_putfield:
+ case Bytecodes::_getstatic:
+ case Bytecodes::_putstatic:
+ case Bytecodes::_arraylength:
+ case Bytecodes::_baload:
+ case Bytecodes::_caload:
+ case Bytecodes::_iaload:
+ case Bytecodes::_saload:
+ case Bytecodes::_faload:
+ case Bytecodes::_aaload:
+ case Bytecodes::_laload:
+ case Bytecodes::_daload:
+ case Bytecodes::_bastore:
+ case Bytecodes::_castore:
+ case Bytecodes::_iastore:
+ case Bytecodes::_sastore:
+ case Bytecodes::_fastore:
+ case Bytecodes::_aastore:
+ case Bytecodes::_lastore:
+ case Bytecodes::_dastore:
+ case Bytecodes::_irem:
+ case Bytecodes::_idiv:
+ case Bytecodes::_lrem:
+ case Bytecodes::_ldiv:
+ case Bytecodes::_frem:
+ case Bytecodes::_fdiv:
+ case Bytecodes::_drem:
+ case Bytecodes::_ddiv:
+ case Bytecodes::_checkcast:
+ case Bytecodes::_instanceof:
+ case Bytecodes::_athrow:
+ case Bytecodes::_anewarray:
+ case Bytecodes::_newarray:
+ case Bytecodes::_multianewarray:
+ case Bytecodes::_new:
+ case Bytecodes::_monitorenter: // can re-run initial null check, only
+ case Bytecodes::_return:
+ return true;
+ break;
+
+ case Bytecodes::_invokestatic:
+ case Bytecodes::_invokespecial:
+ case Bytecodes::_invokevirtual:
+ case Bytecodes::_invokeinterface:
+ return false;
+ break;
+
+ default:
+ assert(false, "unexpected bytecode produced an exception");
+ return true;
+ }
+}
+
+//---------------------------do_exceptions-------------------------------------
+// Process exceptions arising from the current bytecode.
+// Send caught exceptions to the proper handler within this method.
+// Unhandled exceptions feed into _exit.
+void Parse::do_exceptions() {
+ if (!has_exceptions()) return;
+
+ if (failing()) {
+ // Pop them all off and throw them away.
+ while (pop_exception_state() != NULL) ;
+ return;
+ }
+
+ // Make sure we can classify this bytecode if we need to.
+ debug_only(can_rerun_bytecode());
+
+ PreserveJVMState pjvms(this, false);
+
+ SafePointNode* ex_map;
+ while ((ex_map = pop_exception_state()) != NULL) {
+ if (!method()->has_exception_handlers()) {
+ // Common case: Transfer control outward.
+ // Doing it this early allows the exceptions to common up
+ // even between adjacent method calls.
+ throw_to_exit(ex_map);
+ } else {
+ // Have to look at the exception first.
+ assert(stopped(), "catch_inline_exceptions trashes the map");
+ catch_inline_exceptions(ex_map);
+ stop_and_kill_map(); // we used up this exception state; kill it
+ }
+ }
+
+ // We now return to our regularly scheduled program:
+}
+
+//---------------------------throw_to_exit-------------------------------------
+// Merge the given map into an exception exit from this method.
+// The exception exit will handle any unlocking of receiver.
+// The ex_oop must be saved within the ex_map, unlike merge_exception.
+void Parse::throw_to_exit(SafePointNode* ex_map) {
+ // Pop the JVMS to (a copy of) the caller.
+ GraphKit caller;
+ caller.set_map_clone(_caller->map());
+ caller.set_bci(_caller->bci());
+ caller.set_sp(_caller->sp());
+ // Copy out the standard machine state:
+ for (uint i = 0; i < TypeFunc::Parms; i++) {
+ caller.map()->set_req(i, ex_map->in(i));
+ }
+ // ...and the exception:
+ Node* ex_oop = saved_ex_oop(ex_map);
+ SafePointNode* caller_ex_map = caller.make_exception_state(ex_oop);
+ // Finally, collect the new exception state in my exits:
+ _exits.add_exception_state(caller_ex_map);
+}
+
+//------------------------------do_exits---------------------------------------
+void Parse::do_exits() {
+ set_parse_bci(InvocationEntryBci);
+
+ // Now peephole on the return bits
+ Node* region = _exits.control();
+ _exits.set_control(gvn().transform(region));
+
+ Node* iophi = _exits.i_o();
+ _exits.set_i_o(gvn().transform(iophi));
+
+ if (wrote_final()) {
+ // This method (which must be a constructor by the rules of Java)
+ // wrote a final. The effects of all initializations must be
+ // committed to memory before any code after the constructor
+ // publishes the reference to the newly constructor object.
+ // Rather than wait for the publication, we simply block the
+ // writes here. Rather than put a barrier on only those writes
+ // which are required to complete, we force all writes to complete.
+ //
+ // "All bets are off" unless the first publication occurs after a
+ // normal return from the constructor. We do not attempt to detect
+ // such unusual early publications. But no barrier is needed on
+ // exceptional returns, since they cannot publish normally.
+ //
+ _exits.insert_mem_bar(Op_MemBarRelease);
+#ifndef PRODUCT
+ if (PrintOpto && (Verbose || WizardMode)) {
+ method()->print_name();
+ tty->print_cr(" writes finals and needs a memory barrier");
+ }
+#endif
+ }
+
+ for (MergeMemStream mms(_exits.merged_memory()); mms.next_non_empty(); ) {
+ // transform each slice of the original memphi:
+ mms.set_memory(_gvn.transform(mms.memory()));
+ }
+
+ if (tf()->range()->cnt() > TypeFunc::Parms) {
+ const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
+ Node* ret_phi = _gvn.transform( _exits.argument(0) );
+ assert(_exits.control()->is_top() || !_gvn.type(ret_phi)->empty(), "return value must be well defined");
+ _exits.push_node(ret_type->basic_type(), ret_phi);
+ }
+
+ // Note: Logic for creating and optimizing the ReturnNode is in Compile.
+
+ // Unlock along the exceptional paths.
+ // This is done late so that we can common up equivalent exceptions
+ // (e.g., null checks) arising from multiple points within this method.
+ // See GraphKit::add_exception_state, which performs the commoning.
+ bool do_synch = method()->is_synchronized() && GenerateSynchronizationCode;
+
+ // record exit from a method if compiled while Dtrace is turned on.
+ if (do_synch || DTraceMethodProbes) {
+ // First move the exception list out of _exits:
+ GraphKit kit(_exits.transfer_exceptions_into_jvms());
+ SafePointNode* normal_map = kit.map(); // keep this guy safe
+ // Now re-collect the exceptions into _exits:
+ SafePointNode* ex_map;
+ while ((ex_map = kit.pop_exception_state()) != NULL) {
+ Node* ex_oop = kit.use_exception_state(ex_map);
+ // Force the exiting JVM state to have this method at InvocationEntryBci.
+ // The exiting JVM state is otherwise a copy of the calling JVMS.
+ JVMState* caller = kit.jvms();
+ JVMState* ex_jvms = caller->clone_shallow(C);
+ ex_jvms->set_map(kit.clone_map());
+ ex_jvms->map()->set_jvms(ex_jvms);
+ ex_jvms->set_bci( InvocationEntryBci);
+ kit.set_jvms(ex_jvms);
+ if (do_synch) {
+ // Add on the synchronized-method box/object combo
+ kit.map()->push_monitor(_synch_lock);
+ // Unlock!
+ kit.shared_unlock(_synch_lock->box_node(), _synch_lock->obj_node());
+ }
+ if (DTraceMethodProbes) {
+ kit.make_dtrace_method_exit(method());
+ }
+ // Done with exception-path processing.
+ ex_map = kit.make_exception_state(ex_oop);
+ assert(ex_jvms->same_calls_as(ex_map->jvms()), "sanity");
+ // Pop the last vestige of this method:
+ ex_map->set_jvms(caller->clone_shallow(C));
+ ex_map->jvms()->set_map(ex_map);
+ _exits.push_exception_state(ex_map);
+ }
+ assert(_exits.map() == normal_map, "keep the same return state");
+ }
+
+ {
+ // Capture very early exceptions (receiver null checks) from caller JVMS
+ GraphKit caller(_caller);
+ SafePointNode* ex_map;
+ while ((ex_map = caller.pop_exception_state()) != NULL) {
+ _exits.add_exception_state(ex_map);
+ }
+ }
+}
+
+//-----------------------------create_entry_map-------------------------------
+// Initialize our parser map to contain the types at method entry.
+// For OSR, the map contains a single RawPtr parameter.
+// Initial monitor locking for sync. methods is performed by do_method_entry.
+SafePointNode* Parse::create_entry_map() {
+ // Check for really stupid bail-out cases.
+ uint len = TypeFunc::Parms + method()->max_locals() + method()->max_stack();
+ if (len >= 32760) {
+ C->record_method_not_compilable_all_tiers("too many local variables");
+ return NULL;
+ }
+
+ // If this is an inlined method, we may have to do a receiver null check.
+ if (_caller->has_method() && is_normal_parse() && !method()->is_static()) {
+ GraphKit kit(_caller);
+ kit.null_check_receiver(method());
+ _caller = kit.transfer_exceptions_into_jvms();
+ if (kit.stopped()) {
+ _exits.add_exception_states_from(_caller);
+ _exits.set_jvms(_caller);
+ return NULL;
+ }
+ }
+
+ assert(method() != NULL, "parser must have a method");
+
+ // Create an initial safepoint to hold JVM state during parsing
+ JVMState* jvms = new (C) JVMState(method(), _caller->has_method() ? _caller : NULL);
+ set_map(new (C, len) SafePointNode(len, jvms));
+ jvms->set_map(map());
+ record_for_igvn(map());
+ assert(jvms->endoff() == len, "correct jvms sizing");
+
+ SafePointNode* inmap = _caller->map();
+ assert(inmap != NULL, "must have inmap");
+
+ uint i;
+
+ // Pass thru the predefined input parameters.
+ for (i = 0; i < TypeFunc::Parms; i++) {
+ map()->init_req(i, inmap->in(i));
+ }
+
+ if (depth() == 1) {
+ assert(map()->memory()->Opcode() == Op_Parm, "");
+ // Insert the memory aliasing node
+ set_all_memory(reset_memory());
+ }
+ assert(merged_memory(), "");
+
+ // Now add the locals which are initially bound to arguments:
+ uint arg_size = tf()->domain()->cnt();
+ ensure_stack(arg_size - TypeFunc::Parms); // OSR methods have funny args
+ for (i = TypeFunc::Parms; i < arg_size; i++) {
+ map()->init_req(i, inmap->argument(_caller, i - TypeFunc::Parms));
+ }
+
+ // Clear out the rest of the map (locals and stack)
+ for (i = arg_size; i < len; i++) {
+ map()->init_req(i, top());
+ }
+
+ SafePointNode* entry_map = stop();
+ return entry_map;
+}
+
+//-----------------------------do_method_entry--------------------------------
+// Emit any code needed in the pseudo-block before BCI zero.
+// The main thing to do is lock the receiver of a synchronized method.
+void Parse::do_method_entry() {
+ set_parse_bci(InvocationEntryBci); // Pseudo-BCP
+ set_sp(0); // Java Stack Pointer
+
+ NOT_PRODUCT( count_compiled_calls(true/*at_method_entry*/, false/*is_inline*/); )
+
+ if (DTraceMethodProbes) {
+ make_dtrace_method_entry(method());
+ }
+
+ // If the method is synchronized, we need to construct a lock node, attach
+ // it to the Start node, and pin it there.
+ if (method()->is_synchronized()) {
+ // Insert a FastLockNode right after the Start which takes as arguments
+ // the current thread pointer, the "this" pointer & the address of the
+ // stack slot pair used for the lock. The "this" pointer is a projection
+ // off the start node, but the locking spot has to be constructed by
+ // creating a ConLNode of 0, and boxing it with a BoxLockNode. The BoxLockNode
+ // becomes the second argument to the FastLockNode call. The
+ // FastLockNode becomes the new control parent to pin it to the start.
+
+ // Setup Object Pointer
+ Node *lock_obj = NULL;
+ if(method()->is_static()) {
+ ciInstance* mirror = _method->holder()->java_mirror();
+ const TypeInstPtr *t_lock = TypeInstPtr::make(mirror);
+ lock_obj = makecon(t_lock);
+ } else { // Else pass the "this" pointer,
+ lock_obj = local(0); // which is Parm0 from StartNode
+ }
+ // Clear out dead values from the debug info.
+ kill_dead_locals();
+ // Build the FastLockNode
+ _synch_lock = shared_lock(lock_obj);
+ }
+
+ if (depth() == 1) {
+ increment_and_test_invocation_counter(Tier2CompileThreshold);
+ }
+}
+
+//------------------------------init_blocks------------------------------------
+// Initialize our parser map to contain the types/monitors at method entry.
+void Parse::init_blocks() {
+ // Create the blocks.
+ _block_count = flow()->block_count();
+ _blocks = NEW_RESOURCE_ARRAY(Block, _block_count);
+ Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count);
+
+ int po;
+
+ // Initialize the structs.
+ for (po = 0; po < block_count(); po++) {
+ Block* block = pre_order_at(po);
+ block->init_node(this, po);
+ }
+
+ // Collect predecessor and successor information.
+ for (po = 0; po < block_count(); po++) {
+ Block* block = pre_order_at(po);
+ block->init_graph(this);
+ }
+}
+
+//-------------------------------init_node-------------------------------------
+void Parse::Block::init_node(Parse* outer, int po) {
+ _flow = outer->flow()->pre_order_at(po);
+ _pred_count = 0;
+ _preds_parsed = 0;
+ _count = 0;
+ assert(pred_count() == 0 && preds_parsed() == 0, "sanity");
+ assert(!(is_merged() || is_parsed() || is_handler()), "sanity");
+ assert(_live_locals.size() == 0, "sanity");
+
+ // entry point has additional predecessor
+ if (flow()->is_start()) _pred_count++;
+ assert(flow()->is_start() == (this == outer->start_block()), "");
+}
+
+//-------------------------------init_graph------------------------------------
+void Parse::Block::init_graph(Parse* outer) {
+ // Create the successor list for this parser block.
+ GrowableArray<ciTypeFlow::Block*>* tfs = flow()->successors();
+ GrowableArray<ciTypeFlow::Block*>* tfe = flow()->exceptions();
+ int ns = tfs->length();
+ int ne = tfe->length();
+ _num_successors = ns;
+ _all_successors = ns+ne;
+ _successors = (ns+ne == 0) ? NULL : NEW_RESOURCE_ARRAY(Block*, ns+ne);
+ int p = 0;
+ for (int i = 0; i < ns+ne; i++) {
+ ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns);
+ Block* block2 = outer->pre_order_at(tf2->pre_order());
+ _successors[i] = block2;
+
+ // Accumulate pred info for the other block, too.
+ if (i < ns) {
+ block2->_pred_count++;
+ } else {
+ block2->_is_handler = true;
+ }
+
+ #ifdef ASSERT
+ // A block's successors must be distinguishable by BCI.
+ // That is, no bytecode is allowed to branch to two different
+ // clones of the same code location.
+ for (int j = 0; j < i; j++) {
+ Block* block1 = _successors[j];
+ if (block1 == block2) continue; // duplicates are OK
+ assert(block1->start() != block2->start(), "successors have unique bcis");
+ }
+ #endif
+ }
+
+ // Note: We never call next_path_num along exception paths, so they
+ // never get processed as "ready". Also, the input phis of exception
+ // handlers get specially processed, so that
+}
+
+//---------------------------successor_for_bci---------------------------------
+Parse::Block* Parse::Block::successor_for_bci(int bci) {
+ for (int i = 0; i < all_successors(); i++) {
+ Block* block2 = successor_at(i);
+ if (block2->start() == bci) return block2;
+ }
+ // We can actually reach here if ciTypeFlow traps out a block
+ // due to an unloaded class, and concurrently with compilation the
+ // class is then loaded, so that a later phase of the parser is
+ // able to see more of the bytecode CFG. Or, the flow pass and
+ // the parser can have a minor difference of opinion about executability
+ // of bytecodes. For example, "obj.field = null" is executable even
+ // if the field's type is an unloaded class; the flow pass used to
+ // make a trap for such code.
+ return NULL;
+}
+
+
+//-----------------------------stack_type_at-----------------------------------
+const Type* Parse::Block::stack_type_at(int i) const {
+ return get_type(flow()->stack_type_at(i));
+}
+
+
+//-----------------------------local_type_at-----------------------------------
+const Type* Parse::Block::local_type_at(int i) const {
+ // Make dead locals fall to bottom.
+ if (_live_locals.size() == 0) {
+ MethodLivenessResult live_locals = flow()->outer()->method()->liveness_at_bci(start());
+ // This bitmap can be zero length if we saw a breakpoint.
+ // In such cases, pretend they are all live.
+ ((Block*)this)->_live_locals = live_locals;
+ }
+ if (_live_locals.size() > 0 && !_live_locals.at(i))
+ return Type::BOTTOM;
+
+ return get_type(flow()->local_type_at(i));
+}
+
+
+#ifndef PRODUCT
+
+//----------------------------name_for_bc--------------------------------------
+// helper method for BytecodeParseHistogram
+static const char* name_for_bc(int i) {
+ return Bytecodes::is_defined(i) ? Bytecodes::name(Bytecodes::cast(i)) : "xxxunusedxxx";
+}
+
+//----------------------------BytecodeParseHistogram------------------------------------
+Parse::BytecodeParseHistogram::BytecodeParseHistogram(Parse *p, Compile *c) {
+ _parser = p;
+ _compiler = c;
+ if( ! _initialized ) { _initialized = true; reset(); }
+}
+
+//----------------------------current_count------------------------------------
+int Parse::BytecodeParseHistogram::current_count(BPHType bph_type) {
+ switch( bph_type ) {
+ case BPH_transforms: { return _parser->gvn().made_progress(); }
+ case BPH_values: { return _parser->gvn().made_new_values(); }
+ default: { ShouldNotReachHere(); return 0; }
+ }
+}
+
+//----------------------------initialized--------------------------------------
+bool Parse::BytecodeParseHistogram::initialized() { return _initialized; }
+
+//----------------------------reset--------------------------------------------
+void Parse::BytecodeParseHistogram::reset() {
+ int i = Bytecodes::number_of_codes;
+ while (i-- > 0) { _bytecodes_parsed[i] = 0; _nodes_constructed[i] = 0; _nodes_transformed[i] = 0; _new_values[i] = 0; }
+}
+
+//----------------------------set_initial_state--------------------------------
+// Record info when starting to parse one bytecode
+void Parse::BytecodeParseHistogram::set_initial_state( Bytecodes::Code bc ) {
+ if( PrintParseStatistics && !_parser->is_osr_parse() ) {
+ _initial_bytecode = bc;
+ _initial_node_count = _compiler->unique();
+ _initial_transforms = current_count(BPH_transforms);
+ _initial_values = current_count(BPH_values);
+ }
+}
+
+//----------------------------record_change--------------------------------
+// Record results of parsing one bytecode
+void Parse::BytecodeParseHistogram::record_change() {
+ if( PrintParseStatistics && !_parser->is_osr_parse() ) {
+ ++_bytecodes_parsed[_initial_bytecode];
+ _nodes_constructed [_initial_bytecode] += (_compiler->unique() - _initial_node_count);
+ _nodes_transformed [_initial_bytecode] += (current_count(BPH_transforms) - _initial_transforms);
+ _new_values [_initial_bytecode] += (current_count(BPH_values) - _initial_values);
+ }
+}
+
+
+//----------------------------print--------------------------------------------
+void Parse::BytecodeParseHistogram::print(float cutoff) {
+ ResourceMark rm;
+ // print profile
+ int total = 0;
+ int i = 0;
+ for( i = 0; i < Bytecodes::number_of_codes; ++i ) { total += _bytecodes_parsed[i]; }
+ int abs_sum = 0;
+ tty->cr(); //0123456789012345678901234567890123456789012345678901234567890123456789
+ tty->print_cr("Histogram of %d parsed bytecodes:", total);
+ if( total == 0 ) { return; }
+ tty->cr();
+ tty->print_cr("absolute: count of compiled bytecodes of this type");
+ tty->print_cr("relative: percentage contribution to compiled nodes");
+ tty->print_cr("nodes : Average number of nodes constructed per bytecode");
+ tty->print_cr("rnodes : Significance towards total nodes constructed, (nodes*relative)");
+ tty->print_cr("transforms: Average amount of tranform progress per bytecode compiled");
+ tty->print_cr("values : Average number of node values improved per bytecode");
+ tty->print_cr("name : Bytecode name");
+ tty->cr();
+ tty->print_cr(" absolute relative nodes rnodes transforms values name");
+ tty->print_cr("----------------------------------------------------------------------");
+ while (--i > 0) {
+ int abs = _bytecodes_parsed[i];
+ float rel = abs * 100.0F / total;
+ float nodes = _bytecodes_parsed[i] == 0 ? 0 : (1.0F * _nodes_constructed[i])/_bytecodes_parsed[i];
+ float rnodes = _bytecodes_parsed[i] == 0 ? 0 : rel * nodes;
+ float xforms = _bytecodes_parsed[i] == 0 ? 0 : (1.0F * _nodes_transformed[i])/_bytecodes_parsed[i];
+ float values = _bytecodes_parsed[i] == 0 ? 0 : (1.0F * _new_values [i])/_bytecodes_parsed[i];
+ if (cutoff <= rel) {
+ tty->print_cr("%10d %7.2f%% %6.1f %6.2f %6.1f %6.1f %s", abs, rel, nodes, rnodes, xforms, values, name_for_bc(i));
+ abs_sum += abs;
+ }
+ }
+ tty->print_cr("----------------------------------------------------------------------");
+ float rel_sum = abs_sum * 100.0F / total;
+ tty->print_cr("%10d %7.2f%% (cutoff = %.2f%%)", abs_sum, rel_sum, cutoff);
+ tty->print_cr("----------------------------------------------------------------------");
+ tty->cr();
+}
+#endif
+
+//----------------------------load_state_from----------------------------------
+// Load block/map/sp. But not do not touch iter/bci.
+void Parse::load_state_from(Block* block) {
+ set_block(block);
+ // load the block's JVM state:
+ set_map(block->start_map());
+ set_sp( block->start_sp());
+}
+
+
+//-----------------------------record_state------------------------------------
+void Parse::Block::record_state(Parse* p) {
+ assert(!is_merged(), "can only record state once, on 1st inflow");
+ assert(start_sp() == p->sp(), "stack pointer must agree with ciTypeFlow");
+ set_start_map(p->stop());
+}
+
+
+//------------------------------do_one_block-----------------------------------
+void Parse::do_one_block() {
+ if (TraceOptoParse) {
+ Block *b = block();
+ int ns = b->num_successors();
+ int nt = b->all_successors();
+
+ tty->print("Parsing block #%d at bci [%d,%d), successors: ",
+ block()->pre_order(), block()->start(), block()->limit());
+ for (int i = 0; i < nt; i++) {
+ tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order());
+ }
+ tty->print_cr("");
+ }
+
+ assert(block()->is_merged(), "must be merged before being parsed");
+ block()->mark_parsed();
+ ++_blocks_parsed;
+
+ // Set iterator to start of block.
+ iter().reset_to_bci(block()->start());
+
+ CompileLog* log = C->log();
+
+ // Parse bytecodes
+ while (!stopped() && !failing()) {
+ iter().next();
+
+ // Learn the current bci from the iterator:
+ set_parse_bci(iter().cur_bci());
+
+ if (bci() == block()->limit()) {
+ // Do not walk into the next block until directed by do_all_blocks.
+ merge(bci());
+ break;
+ }
+ assert(bci() < block()->limit(), "bci still in block");
+
+ if (log != NULL) {
+ // Output an optional context marker, to help place actions
+ // that occur during parsing of this BC. If there is no log
+ // output until the next context string, this context string
+ // will be silently ignored.
+ log->context()->reset();
+ log->context()->print_cr("<bc code='%d' bci='%d'/>", (int)bc(), bci());
+ }
+
+ if (block()->has_trap_at(bci())) {
+ // We must respect the flow pass's traps, because it will refuse
+ // to produce successors for trapping blocks.
+ int trap_index = block()->flow()->trap_index();
+ assert(trap_index != 0, "trap index must be valid");
+ uncommon_trap(trap_index);
+ break;
+ }
+
+ NOT_PRODUCT( parse_histogram()->set_initial_state(bc()); );
+
+#ifdef ASSERT
+ int pre_bc_sp = sp();
+ int inputs, depth;
+ bool have_se = !stopped() && compute_stack_effects(inputs, depth);
+ assert(!have_se || pre_bc_sp >= inputs, "have enough stack to execute this BC");
+#endif //ASSERT
+
+ do_one_bytecode();
+
+ assert(!have_se || stopped() || failing() || (sp() - pre_bc_sp) == depth, "correct depth prediction");
+
+ do_exceptions();
+
+ NOT_PRODUCT( parse_histogram()->record_change(); );
+
+ if (log != NULL) log->context()->reset(); // done w/ this one
+
+ // Fall into next bytecode. Each bytecode normally has 1 sequential
+ // successor which is typically made ready by visiting this bytecode.
+ // If the successor has several predecessors, then it is a merge
+ // point, starts a new basic block, and is handled like other basic blocks.
+ }
+}
+
+
+//------------------------------merge------------------------------------------
+void Parse::set_parse_bci(int bci) {
+ set_bci(bci);
+ Node_Notes* nn = C->default_node_notes();
+ if (nn == NULL) return;
+
+ // Collect debug info for inlined calls unless -XX:-DebugInlinedCalls.
+ if (!DebugInlinedCalls && depth() > 1) {
+ return;
+ }
+
+ // Update the JVMS annotation, if present.
+ JVMState* jvms = nn->jvms();
+ if (jvms != NULL && jvms->bci() != bci) {
+ // Update the JVMS.
+ jvms = jvms->clone_shallow(C);
+ jvms->set_bci(bci);
+ nn->set_jvms(jvms);
+ }
+}
+
+//------------------------------merge------------------------------------------
+// Merge the current mapping into the basic block starting at bci
+void Parse::merge(int target_bci) {
+ Block* target = successor_for_bci(target_bci);
+ if (target == NULL) { handle_missing_successor(target_bci); return; }
+ assert(!target->is_ready(), "our arrival must be expected");
+ int pnum = target->next_path_num();
+ merge_common(target, pnum);
+}
+
+//-------------------------merge_new_path--------------------------------------
+// Merge the current mapping into the basic block, using a new path
+void Parse::merge_new_path(int target_bci) {
+ Block* target = successor_for_bci(target_bci);
+ if (target == NULL) { handle_missing_successor(target_bci); return; }
+ assert(!target->is_ready(), "new path into frozen graph");
+ int pnum = target->add_new_path();
+ merge_common(target, pnum);
+}
+
+//-------------------------merge_exception-------------------------------------
+// Merge the current mapping into the basic block starting at bci
+// The ex_oop must be pushed on the stack, unlike throw_to_exit.
+void Parse::merge_exception(int target_bci) {
+ assert(sp() == 1, "must have only the throw exception on the stack");
+ Block* target = successor_for_bci(target_bci);
+ if (target == NULL) { handle_missing_successor(target_bci); return; }
+ assert(target->is_handler(), "exceptions are handled by special blocks");
+ int pnum = target->add_new_path();
+ merge_common(target, pnum);
+}
+
+//--------------------handle_missing_successor---------------------------------
+void Parse::handle_missing_successor(int target_bci) {
+#ifndef PRODUCT
+ Block* b = block();
+ int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1;
+ tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci);
+#endif
+ ShouldNotReachHere();
+}
+
+//--------------------------merge_common---------------------------------------
+void Parse::merge_common(Parse::Block* target, int pnum) {
+ if (TraceOptoParse) {
+ tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start());
+ }
+
+ // Zap extra stack slots to top
+ assert(sp() == target->start_sp(), "");
+ clean_stack(sp());
+
+ if (!target->is_merged()) { // No prior mapping at this bci
+ if (TraceOptoParse) { tty->print(" with empty state"); }
+
+ // If this path is dead, do not bother capturing it as a merge.
+ // It is "as if" we had 1 fewer predecessors from the beginning.
+ if (stopped()) {
+ if (TraceOptoParse) tty->print_cr(", but path is dead and doesn't count");
+ return;
+ }
+
+ // Record that a new block has been merged.
+ ++_blocks_merged;
+
+ // Make a region if we know there are multiple or unpredictable inputs.
+ // (Also, if this is a plain fall-through, we might see another region,
+ // which must not be allowed into this block's map.)
+ if (pnum > PhiNode::Input // Known multiple inputs.
+ || target->is_handler() // These have unpredictable inputs.
+ || control()->is_Region()) { // We must hide this guy.
+ // Add a Region to start the new basic block. Phis will be added
+ // later lazily.
+ int edges = target->pred_count();
+ if (edges < pnum) edges = pnum; // might be a new path!
+ Node *r = new (C, edges+1) RegionNode(edges+1);
+ gvn().set_type(r, Type::CONTROL);
+ record_for_igvn(r);
+ // zap all inputs to NULL for debugging (done in Node(uint) constructor)
+ // for (int j = 1; j < edges+1; j++) { r->init_req(j, NULL); }
+ r->init_req(pnum, control());
+ set_control(r);
+ }
+
+ // Convert the existing Parser mapping into a mapping at this bci.
+ store_state_to(target);
+ assert(target->is_merged(), "do not come here twice");
+
+ } else { // Prior mapping at this bci
+ if (TraceOptoParse) { tty->print(" with previous state"); }
+
+ // We must not manufacture more phis if the target is already parsed.
+ bool nophi = target->is_parsed();
+
+ SafePointNode* newin = map();// Hang on to incoming mapping
+ Block* save_block = block(); // Hang on to incoming block;
+ load_state_from(target); // Get prior mapping
+
+ assert(newin->jvms()->locoff() == jvms()->locoff(), "JVMS layouts agree");
+ assert(newin->jvms()->stkoff() == jvms()->stkoff(), "JVMS layouts agree");
+ assert(newin->jvms()->monoff() == jvms()->monoff(), "JVMS layouts agree");
+ assert(newin->jvms()->endoff() == jvms()->endoff(), "JVMS layouts agree");
+
+ // Iterate over my current mapping and the old mapping.
+ // Where different, insert Phi functions.
+ // Use any existing Phi functions.
+ assert(control()->is_Region(), "must be merging to a region");
+ RegionNode* r = control()->as_Region();
+
+ // Compute where to merge into
+ // Merge incoming control path
+ r->set_req(pnum, newin->control());
+
+ if (pnum == 1) { // Last merge for this Region?
+ _gvn.transform_no_reclaim(r);
+ record_for_igvn(r);
+ }
+
+ // Update all the non-control inputs to map:
+ assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms");
+ for (uint j = 1; j < newin->req(); j++) {
+ Node* m = map()->in(j); // Current state of target.
+ Node* n = newin->in(j); // Incoming change to target state.
+ PhiNode* phi;
+ if (m->is_Phi() && m->as_Phi()->region() == r)
+ phi = m->as_Phi();
+ else
+ phi = NULL;
+ if (m != n) { // Different; must merge
+ switch (j) {
+ // Frame pointer and Return Address never changes
+ case TypeFunc::FramePtr:// Drop m, use the original value
+ case TypeFunc::ReturnAdr:
+ break;
+ case TypeFunc::Memory: // Merge inputs to the MergeMem node
+ assert(phi == NULL, "the merge contains phis, not vice versa");
+ merge_memory_edges(n->as_MergeMem(), pnum, nophi);
+ continue;
+ default: // All normal stuff
+ if (phi == NULL) phi = ensure_phi(j, nophi);
+ break;
+ }
+ }
+ // At this point, n might be top if:
+ // - there is no phi (because TypeFlow detected a conflict), or
+ // - the corresponding control edges is top (a dead incoming path)
+ // It is a bug if we create a phi which sees a garbage value on a live path.
+
+ if (phi != NULL) {
+ assert(n != top() || r->in(pnum) == top(), "live value must not be garbage");
+ assert(phi->region() == r, "");
+ phi->set_req(pnum, n); // Then add 'n' to the merge
+ if (pnum == PhiNode::Input) {
+ // Last merge for this Phi.
+ // So far, Phis have had a reasonable type from ciTypeFlow.
+ // Now _gvn will join that with the meet of current inputs.
+ // BOTTOM is never permissible here, 'cause pessimistically
+ // Phis of pointers cannot lose the basic pointer type.
+ debug_only(const Type* bt1 = phi->bottom_type());
+ assert(bt1 != Type::BOTTOM, "should not be building conflict phis");
+ map()->set_req(j, _gvn.transform_no_reclaim(phi));
+ debug_only(const Type* bt2 = phi->bottom_type());
+ assert(bt2->higher_equal(bt1), "must be consistent with type-flow");
+ record_for_igvn(phi);
+ }
+ }
+ } // End of for all values to be merged
+
+ if (pnum == PhiNode::Input &&
+ !r->in(0)) { // The occasional useless Region
+ assert(control() == r, "");
+ set_control(r->nonnull_req());
+ }
+
+ // newin has been subsumed into the lazy merge, and is now dead.
+ set_block(save_block);
+
+ stop(); // done with this guy, for now
+ }
+
+ if (TraceOptoParse) {
+ tty->print_cr(" on path %d", pnum);
+ }
+
+ // Done with this parser state.
+ assert(stopped(), "");
+}
+
+
+//--------------------------merge_memory_edges---------------------------------
+void Parse::merge_memory_edges(MergeMemNode* n, int pnum, bool nophi) {
+ // (nophi means we must not create phis, because we already parsed here)
+ assert(n != NULL, "");
+ // Merge the inputs to the MergeMems
+ MergeMemNode* m = merged_memory();
+
+ assert(control()->is_Region(), "must be merging to a region");
+ RegionNode* r = control()->as_Region();
+
+ PhiNode* base = NULL;
+ MergeMemNode* remerge = NULL;
+ for (MergeMemStream mms(m, n); mms.next_non_empty2(); ) {
+ Node *p = mms.force_memory();
+ Node *q = mms.memory2();
+ if (mms.is_empty() && nophi) {
+ // Trouble: No new splits allowed after a loop body is parsed.
+ // Instead, wire the new split into a MergeMem on the backedge.
+ // The optimizer will sort it out, slicing the phi.
+ if (remerge == NULL) {
+ assert(base != NULL, "");
+ assert(base->in(0) != NULL, "should not be xformed away");
+ remerge = MergeMemNode::make(C, base->in(pnum));
+ gvn().set_type(remerge, Type::MEMORY);
+ base->set_req(pnum, remerge);
+ }
+ remerge->set_memory_at(mms.alias_idx(), q);
+ continue;
+ }
+ assert(!q->is_MergeMem(), "");
+ PhiNode* phi;
+ if (p != q) {
+ phi = ensure_memory_phi(mms.alias_idx(), nophi);
+ } else {
+ if (p->is_Phi() && p->as_Phi()->region() == r)
+ phi = p->as_Phi();
+ else
+ phi = NULL;
+ }
+ // Insert q into local phi
+ if (phi != NULL) {
+ assert(phi->region() == r, "");
+ p = phi;
+ phi->set_req(pnum, q);
+ if (mms.at_base_memory()) {
+ base = phi; // delay transforming it
+ } else if (pnum == 1) {
+ record_for_igvn(phi);
+ p = _gvn.transform_no_reclaim(phi);
+ }
+ mms.set_memory(p);// store back through the iterator
+ }
+ }
+ // Transform base last, in case we must fiddle with remerging.
+ if (base != NULL && pnum == 1) {
+ record_for_igvn(base);
+ m->set_base_memory( _gvn.transform_no_reclaim(base) );
+ }
+}
+
+
+//------------------------ensure_phis_everywhere-------------------------------
+void Parse::ensure_phis_everywhere() {
+ ensure_phi(TypeFunc::I_O);
+
+ // Ensure a phi on all currently known memories.
+ for (MergeMemStream mms(merged_memory()); mms.next_non_empty(); ) {
+ ensure_memory_phi(mms.alias_idx());
+ debug_only(mms.set_memory()); // keep the iterator happy
+ }
+
+ // Note: This is our only chance to create phis for memory slices.
+ // If we miss a slice that crops up later, it will have to be
+ // merged into the base-memory phi that we are building here.
+ // Later, the optimizer will comb out the knot, and build separate
+ // phi-loops for each memory slice that matters.
+
+ // Monitors must nest nicely and not get confused amongst themselves.
+ // Phi-ify everything up to the monitors, though.
+ uint monoff = map()->jvms()->monoff();
+ uint nof_monitors = map()->jvms()->nof_monitors();
+
+ assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms");
+ for (uint i = TypeFunc::Parms; i < monoff; i++) {
+ ensure_phi(i);
+ }
+ // Even monitors need Phis, though they are well-structured.
+ // This is true for OSR methods, and also for the rare cases where
+ // a monitor object is the subject of a replace_in_map operation.
+ // See bugs 4426707 and 5043395.
+ for (uint m = 0; m < nof_monitors; m++) {
+ ensure_phi(map()->jvms()->monitor_obj_offset(m));
+ }
+}
+
+
+//-----------------------------add_new_path------------------------------------
+// Add a previously unaccounted predecessor to this block.
+int Parse::Block::add_new_path() {
+ // If there is no map, return the lowest unused path number.
+ if (!is_merged()) return pred_count()+1; // there will be a map shortly
+
+ SafePointNode* map = start_map();
+ if (!map->control()->is_Region())
+ return pred_count()+1; // there may be a region some day
+ RegionNode* r = map->control()->as_Region();
+
+ // Add new path to the region.
+ uint pnum = r->req();
+ r->add_req(NULL);
+
+ for (uint i = 1; i < map->req(); i++) {
+ Node* n = map->in(i);
+ if (i == TypeFunc::Memory) {
+ // Ensure a phi on all currently known memories.
+ for (MergeMemStream mms(n->as_MergeMem()); mms.next_non_empty(); ) {
+ Node* phi = mms.memory();
+ if (phi->is_Phi() && phi->as_Phi()->region() == r) {
+ assert(phi->req() == pnum, "must be same size as region");
+ phi->add_req(NULL);
+ }
+ }
+ } else {
+ if (n->is_Phi() && n->as_Phi()->region() == r) {
+ assert(n->req() == pnum, "must be same size as region");
+ n->add_req(NULL);
+ }
+ }
+ }
+
+ return pnum;
+}
+
+//------------------------------ensure_phi-------------------------------------
+// Turn the idx'th entry of the current map into a Phi
+PhiNode *Parse::ensure_phi(int idx, bool nocreate) {
+ SafePointNode* map = this->map();
+ Node* region = map->control();
+ assert(region->is_Region(), "");
+
+ Node* o = map->in(idx);
+ assert(o != NULL, "");
+
+ if (o == top()) return NULL; // TOP always merges into TOP
+
+ if (o->is_Phi() && o->as_Phi()->region() == region) {
+ return o->as_Phi();
+ }
+
+ // Now use a Phi here for merging
+ assert(!nocreate, "Cannot build a phi for a block already parsed.");
+ const JVMState* jvms = map->jvms();
+ const Type* t;
+ if (jvms->is_loc(idx)) {
+ t = block()->local_type_at(idx - jvms->locoff());
+ } else if (jvms->is_stk(idx)) {
+ t = block()->stack_type_at(idx - jvms->stkoff());
+ } else if (jvms->is_mon(idx)) {
+ assert(!jvms->is_monitor_box(idx), "no phis for boxes");
+ t = TypeInstPtr::BOTTOM; // this is sufficient for a lock object
+ } else if ((uint)idx < TypeFunc::Parms) {
+ t = o->bottom_type(); // Type::RETURN_ADDRESS or such-like.
+ } else {
+ assert(false, "no type information for this phi");
+ }
+
+ // If the type falls to bottom, then this must be a local that
+ // is mixing ints and oops or some such. Forcing it to top
+ // makes it go dead.
+ if (t == Type::BOTTOM) {
+ map->set_req(idx, top());
+ return NULL;
+ }
+
+ // Do not create phis for top either.
+ // A top on a non-null control flow must be an unused even after the.phi.
+ if (t == Type::TOP || t == Type::HALF) {
+ map->set_req(idx, top());
+ return NULL;
+ }
+
+ PhiNode* phi = PhiNode::make(region, o, t);
+ gvn().set_type(phi, t);
+ if (DoEscapeAnalysis) record_for_igvn(phi);
+ map->set_req(idx, phi);
+ return phi;
+}
+
+//--------------------------ensure_memory_phi----------------------------------
+// Turn the idx'th slice of the current memory into a Phi
+PhiNode *Parse::ensure_memory_phi(int idx, bool nocreate) {
+ MergeMemNode* mem = merged_memory();
+ Node* region = control();
+ assert(region->is_Region(), "");
+
+ Node *o = (idx == Compile::AliasIdxBot)? mem->base_memory(): mem->memory_at(idx);
+ assert(o != NULL && o != top(), "");
+
+ PhiNode* phi;
+ if (o->is_Phi() && o->as_Phi()->region() == region) {
+ phi = o->as_Phi();
+ if (phi == mem->base_memory() && idx >= Compile::AliasIdxRaw) {
+ // clone the shared base memory phi to make a new memory split
+ assert(!nocreate, "Cannot build a phi for a block already parsed.");
+ const Type* t = phi->bottom_type();
+ const TypePtr* adr_type = C->get_adr_type(idx);
+ phi = phi->slice_memory(adr_type);
+ gvn().set_type(phi, t);
+ }
+ return phi;
+ }
+
+ // Now use a Phi here for merging
+ assert(!nocreate, "Cannot build a phi for a block already parsed.");
+ const Type* t = o->bottom_type();
+ const TypePtr* adr_type = C->get_adr_type(idx);
+ phi = PhiNode::make(region, o, t, adr_type);
+ gvn().set_type(phi, t);
+ if (idx == Compile::AliasIdxBot)
+ mem->set_base_memory(phi);
+ else
+ mem->set_memory_at(idx, phi);
+ return phi;
+}
+
+//------------------------------call_register_finalizer-----------------------
+// Check the klass of the receiver and call register_finalizer if the
+// class need finalization.
+void Parse::call_register_finalizer() {
+ Node* receiver = local(0);
+ assert(receiver != NULL && receiver->bottom_type()->isa_instptr() != NULL,
+ "must have non-null instance type");
+
+ const TypeInstPtr *tinst = receiver->bottom_type()->isa_instptr();
+ if (tinst != NULL && tinst->klass()->is_loaded() && !tinst->klass_is_exact()) {
+ // The type isn't known exactly so see if CHA tells us anything.
+ ciInstanceKlass* ik = tinst->klass()->as_instance_klass();
+ if (!Dependencies::has_finalizable_subclass(ik)) {
+ // No finalizable subclasses so skip the dynamic check.
+ C->dependencies()->assert_has_no_finalizable_subclasses(ik);
+ return;
+ }
+ }
+
+ // Insert a dynamic test for whether the instance needs
+ // finalization. In general this will fold up since the concrete
+ // class is often visible so the access flags are constant.
+ Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() );
+ Node* klass = _gvn.transform(new (C, 3) LoadKlassNode(NULL, immutable_memory(), klass_addr, TypeInstPtr::KLASS));
+
+ Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+ Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT);
+
+ Node* mask = _gvn.transform(new (C, 3) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER)));
+ Node* check = _gvn.transform(new (C, 3) CmpINode(mask, intcon(0)));
+ Node* test = _gvn.transform(new (C, 2) BoolNode(check, BoolTest::ne));
+
+ IfNode* iff = create_and_map_if(control(), test, PROB_MAX, COUNT_UNKNOWN);
+
+ RegionNode* result_rgn = new (C, 3) RegionNode(3);
+ record_for_igvn(result_rgn);
+
+ Node *skip_register = _gvn.transform(new (C, 1) IfFalseNode(iff));
+ result_rgn->init_req(1, skip_register);
+
+ Node *needs_register = _gvn.transform(new (C, 1) IfTrueNode(iff));
+ set_control(needs_register);
+ if (stopped()) {
+ // There is no slow path.
+ result_rgn->init_req(2, top());
+ } else {
+ Node *call = make_runtime_call(RC_NO_LEAF,
+ OptoRuntime::register_finalizer_Type(),
+ OptoRuntime::register_finalizer_Java(),
+ NULL, TypePtr::BOTTOM,
+ receiver);
+ make_slow_call_ex(call, env()->Throwable_klass(), true);
+
+ Node* fast_io = call->in(TypeFunc::I_O);
+ Node* fast_mem = call->in(TypeFunc::Memory);
+ // These two phis are pre-filled with copies of of the fast IO and Memory
+ Node* io_phi = PhiNode::make(result_rgn, fast_io, Type::ABIO);
+ Node* mem_phi = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+
+ result_rgn->init_req(2, control());
+ io_phi ->init_req(2, i_o());
+ mem_phi ->init_req(2, reset_memory());
+
+ set_all_memory( _gvn.transform(mem_phi) );
+ set_i_o( _gvn.transform(io_phi) );
+ }
+
+ set_control( _gvn.transform(result_rgn) );
+}
+
+//------------------------------return_current---------------------------------
+// Append current _map to _exit_return
+void Parse::return_current(Node* value) {
+ if (RegisterFinalizersAtInit &&
+ method()->intrinsic_id() == vmIntrinsics::_Object_init) {
+ call_register_finalizer();
+ }
+
+ // Do not set_parse_bci, so that return goo is credited to the return insn.
+ set_bci(InvocationEntryBci);
+ if (method()->is_synchronized() && GenerateSynchronizationCode) {
+ shared_unlock(_synch_lock->box_node(), _synch_lock->obj_node());
+ }
+ if (DTraceMethodProbes) {
+ make_dtrace_method_exit(method());
+ }
+ SafePointNode* exit_return = _exits.map();
+ exit_return->in( TypeFunc::Control )->add_req( control() );
+ exit_return->in( TypeFunc::I_O )->add_req( i_o () );
+ Node *mem = exit_return->in( TypeFunc::Memory );
+ for (MergeMemStream mms(mem->as_MergeMem(), merged_memory()); mms.next_non_empty2(); ) {
+ if (mms.is_empty()) {
+ // get a copy of the base memory, and patch just this one input
+ const TypePtr* adr_type = mms.adr_type(C);
+ Node* phi = mms.force_memory()->as_Phi()->slice_memory(adr_type);
+ assert(phi->as_Phi()->region() == mms.base_memory()->in(0), "");
+ gvn().set_type_bottom(phi);
+ phi->del_req(phi->req()-1); // prepare to re-patch
+ mms.set_memory(phi);
+ }
+ mms.memory()->add_req(mms.memory2());
+ }
+
+ // frame pointer is always same, already captured
+ if (value != NULL) {
+ // If returning oops to an interface-return, there is a silent free
+ // cast from oop to interface allowed by the Verifier. Make it explicit
+ // here.
+ Node* phi = _exits.argument(0);
+ const TypeInstPtr *tr = phi->bottom_type()->isa_instptr();
+ if( tr && tr->klass()->is_loaded() &&
+ tr->klass()->is_interface() ) {
+ const TypeInstPtr *tp = value->bottom_type()->isa_instptr();
+ if (tp && tp->klass()->is_loaded() &&
+ !tp->klass()->is_interface()) {
+ // sharpen the type eagerly; this eases certain assert checking
+ if (tp->higher_equal(TypeInstPtr::NOTNULL))
+ tr = tr->join(TypeInstPtr::NOTNULL)->is_instptr();
+ value = _gvn.transform(new (C, 2) CheckCastPPNode(0,value,tr));
+ }
+ }
+ phi->add_req(value);
+ }
+
+ stop_and_kill_map(); // This CFG path dies here
+}
+
+
+//------------------------------add_safepoint----------------------------------
+void Parse::add_safepoint() {
+ // See if we can avoid this safepoint. No need for a SafePoint immediately
+ // after a Call (except Leaf Call) or another SafePoint.
+ Node *proj = control();
+ bool add_poll_param = SafePointNode::needs_polling_address_input();
+ uint parms = add_poll_param ? TypeFunc::Parms+1 : TypeFunc::Parms;
+ if( proj->is_Proj() ) {
+ Node *n0 = proj->in(0);
+ if( n0->is_Catch() ) {
+ n0 = n0->in(0)->in(0);
+ assert( n0->is_Call(), "expect a call here" );
+ }
+ if( n0->is_Call() ) {
+ if( n0->as_Call()->guaranteed_safepoint() )
+ return;
+ } else if( n0->is_SafePoint() && n0->req() >= parms ) {
+ return;
+ }
+ }
+
+ // Clear out dead values from the debug info.
+ kill_dead_locals();
+
+ // Clone the JVM State
+ SafePointNode *sfpnt = new (C, parms) SafePointNode(parms, NULL);
+
+ // Capture memory state BEFORE a SafePoint. Since we can block at a
+ // SafePoint we need our GC state to be safe; i.e. we need all our current
+ // write barriers (card marks) to not float down after the SafePoint so we
+ // must read raw memory. Likewise we need all oop stores to match the card
+ // marks. If deopt can happen, we need ALL stores (we need the correct JVM
+ // state on a deopt).
+
+ // We do not need to WRITE the memory state after a SafePoint. The control
+ // edge will keep card-marks and oop-stores from floating up from below a
+ // SafePoint and our true dependency added here will keep them from floating
+ // down below a SafePoint.
+
+ // Clone the current memory state
+ Node* mem = MergeMemNode::make(C, map()->memory());
+
+ mem = _gvn.transform(mem);
+
+ // Pass control through the safepoint
+ sfpnt->init_req(TypeFunc::Control , control());
+ // Fix edges normally used by a call
+ sfpnt->init_req(TypeFunc::I_O , top() );
+ sfpnt->init_req(TypeFunc::Memory , mem );
+ sfpnt->init_req(TypeFunc::ReturnAdr, top() );
+ sfpnt->init_req(TypeFunc::FramePtr , top() );
+
+ // Create a node for the polling address
+ if( add_poll_param ) {
+ Node *polladr = ConPNode::make(C, (address)os::get_polling_page());
+ sfpnt->init_req(TypeFunc::Parms+0, _gvn.transform(polladr));
+ }
+
+ // Fix up the JVM State edges
+ add_safepoint_edges(sfpnt);
+ Node *transformed_sfpnt = _gvn.transform(sfpnt);
+ set_control(transformed_sfpnt);
+
+ // Provide an edge from root to safepoint. This makes the safepoint
+ // appear useful until the parse has completed.
+ if( OptoRemoveUseless && transformed_sfpnt->is_SafePoint() ) {
+ assert(C->root() != NULL, "Expect parse is still valid");
+ C->root()->add_prec(transformed_sfpnt);
+ }
+}
+
+#ifndef PRODUCT
+//------------------------show_parse_info--------------------------------------
+void Parse::show_parse_info() {
+ InlineTree* ilt = NULL;
+ if (C->ilt() != NULL) {
+ JVMState* caller_jvms = is_osr_parse() ? caller()->caller() : caller();
+ ilt = InlineTree::find_subtree_from_root(C->ilt(), caller_jvms, method());
+ }
+ if (PrintCompilation && Verbose) {
+ if (depth() == 1) {
+ if( ilt->count_inlines() ) {
+ tty->print(" __inlined %d (%d bytes)", ilt->count_inlines(),
+ ilt->count_inline_bcs());
+ tty->cr();
+ }
+ } else {
+ if (method()->is_synchronized()) tty->print("s");
+ if (method()->has_exception_handlers()) tty->print("!");
+ // Check this is not the final compiled version
+ if (C->trap_can_recompile()) {
+ tty->print("-");
+ } else {
+ tty->print(" ");
+ }
+ method()->print_short_name();
+ if (is_osr_parse()) {
+ tty->print(" @ %d", osr_bci());
+ }
+ tty->print(" (%d bytes)",method()->code_size());
+ if (ilt->count_inlines()) {
+ tty->print(" __inlined %d (%d bytes)", ilt->count_inlines(),
+ ilt->count_inline_bcs());
+ }
+ tty->cr();
+ }
+ }
+ if (PrintOpto && (depth() == 1 || PrintOptoInlining)) {
+ // Print that we succeeded; suppress this message on the first osr parse.
+
+ if (method()->is_synchronized()) tty->print("s");
+ if (method()->has_exception_handlers()) tty->print("!");
+ // Check this is not the final compiled version
+ if (C->trap_can_recompile() && depth() == 1) {
+ tty->print("-");
+ } else {
+ tty->print(" ");
+ }
+ if( depth() != 1 ) { tty->print(" "); } // missing compile count
+ for (int i = 1; i < depth(); ++i) { tty->print(" "); }
+ method()->print_short_name();
+ if (is_osr_parse()) {
+ tty->print(" @ %d", osr_bci());
+ }
+ if (ilt->caller_bci() != -1) {
+ tty->print(" @ %d", ilt->caller_bci());
+ }
+ tty->print(" (%d bytes)",method()->code_size());
+ if (ilt->count_inlines()) {
+ tty->print(" __inlined %d (%d bytes)", ilt->count_inlines(),
+ ilt->count_inline_bcs());
+ }
+ tty->cr();
+ }
+}
+
+
+//------------------------------dump-------------------------------------------
+// Dump information associated with the bytecodes of current _method
+void Parse::dump() {
+ if( method() != NULL ) {
+ // Iterate over bytecodes
+ ciBytecodeStream iter(method());
+ for( Bytecodes::Code bc = iter.next(); bc != ciBytecodeStream::EOBC() ; bc = iter.next() ) {
+ dump_bci( iter.cur_bci() );
+ tty->cr();
+ }
+ }
+}
+
+// Dump information associated with a byte code index, 'bci'
+void Parse::dump_bci(int bci) {
+ // Output info on merge-points, cloning, and within _jsr..._ret
+ // NYI
+ tty->print(" bci:%d", bci);
+}
+
+#endif
diff --git a/src/share/vm/opto/parse2.cpp b/src/share/vm/opto/parse2.cpp
new file mode 100644
index 000000000..2a5b0da07
--- /dev/null
+++ b/src/share/vm/opto/parse2.cpp
@@ -0,0 +1,2171 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parse2.cpp.incl"
+
+extern int explicit_null_checks_inserted,
+ explicit_null_checks_elided;
+
+//---------------------------------array_load----------------------------------
+void Parse::array_load(BasicType elem_type) {
+ const Type* elem = Type::TOP;
+ Node* adr = array_addressing(elem_type, 0, &elem);
+ if (stopped()) return; // guarenteed null or range check
+ _sp -= 2; // Pop array and index
+ const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
+ Node* ld = make_load(control(), adr, elem, elem_type, adr_type);
+ push(ld);
+}
+
+
+//--------------------------------array_store----------------------------------
+void Parse::array_store(BasicType elem_type) {
+ Node* adr = array_addressing(elem_type, 1);
+ if (stopped()) return; // guarenteed null or range check
+ Node* val = pop();
+ _sp -= 2; // Pop array and index
+ const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
+ store_to_memory(control(), adr, val, elem_type, adr_type);
+}
+
+
+//------------------------------array_addressing-------------------------------
+// Pull array and index from the stack. Compute pointer-to-element.
+Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) {
+ Node *idx = peek(0+vals); // Get from stack without popping
+ Node *ary = peek(1+vals); // in case of exception
+
+ // Null check the array base, with correct stack contents
+ ary = do_null_check(ary, T_ARRAY);
+ // Compile-time detect of null-exception?
+ if (stopped()) return top();
+
+ const TypeAryPtr* arytype = _gvn.type(ary)->is_aryptr();
+ const TypeInt* sizetype = arytype->size();
+ const Type* elemtype = arytype->elem();
+
+ if (UseUniqueSubclasses && result2 != NULL) {
+ const TypeInstPtr* toop = elemtype->isa_instptr();
+ if (toop) {
+ if (toop->klass()->as_instance_klass()->unique_concrete_subklass()) {
+ // If we load from "AbstractClass[]" we must see "ConcreteSubClass".
+ const Type* subklass = Type::get_const_type(toop->klass());
+ elemtype = subklass->join(elemtype);
+ }
+ }
+ }
+
+ // Check for big class initializers with all constant offsets
+ // feeding into a known-size array.
+ const TypeInt* idxtype = _gvn.type(idx)->is_int();
+ // See if the highest idx value is less than the lowest array bound,
+ // and if the idx value cannot be negative:
+ bool need_range_check = true;
+ if (idxtype->_hi < sizetype->_lo && idxtype->_lo >= 0) {
+ need_range_check = false;
+ if (C->log() != NULL) C->log()->elem("observe that='!need_range_check'");
+ }
+
+ if (!arytype->klass()->is_loaded()) {
+ // Only fails for some -Xcomp runs
+ // The class is unloaded. We have to run this bytecode in the interpreter.
+ uncommon_trap(Deoptimization::Reason_unloaded,
+ Deoptimization::Action_reinterpret,
+ arytype->klass(), "!loaded array");
+ return top();
+ }
+
+ // Do the range check
+ if (GenerateRangeChecks && need_range_check) {
+ // Range is constant in array-oop, so we can use the original state of mem
+ Node* len = load_array_length(ary);
+ // Test length vs index (standard trick using unsigned compare)
+ Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) );
+ BoolTest::mask btest = BoolTest::lt;
+ Node* tst = _gvn.transform( new (C, 2) BoolNode(chk, btest) );
+ // Branch to failure if out of bounds
+ { BuildCutout unless(this, tst, PROB_MAX);
+ if (C->allow_range_check_smearing()) {
+ // Do not use builtin_throw, since range checks are sometimes
+ // made more stringent by an optimistic transformation.
+ // This creates "tentative" range checks at this point,
+ // which are not guaranteed to throw exceptions.
+ // See IfNode::Ideal, is_range_check, adjust_check.
+ uncommon_trap(Deoptimization::Reason_range_check,
+ Deoptimization::Action_make_not_entrant,
+ NULL, "range_check");
+ } else {
+ // If we have already recompiled with the range-check-widening
+ // heroic optimization turned off, then we must really be throwing
+ // range check exceptions.
+ builtin_throw(Deoptimization::Reason_range_check, idx);
+ }
+ }
+ }
+ // Check for always knowing you are throwing a range-check exception
+ if (stopped()) return top();
+
+ Node* ptr = array_element_address( ary, idx, type, sizetype);
+
+ if (result2 != NULL) *result2 = elemtype;
+ return ptr;
+}
+
+
+// returns IfNode
+IfNode* Parse::jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask) {
+ Node *cmp = _gvn.transform( new (C, 3) CmpINode( a, b)); // two cases: shiftcount > 32 and shiftcount <= 32
+ Node *tst = _gvn.transform( new (C, 2) BoolNode( cmp, mask));
+ IfNode *iff = create_and_map_if( control(), tst, ((mask == BoolTest::eq) ? PROB_STATIC_INFREQUENT : PROB_FAIR), COUNT_UNKNOWN );
+ return iff;
+}
+
+// return Region node
+Node* Parse::jump_if_join(Node* iffalse, Node* iftrue) {
+ Node *region = new (C, 3) RegionNode(3); // 2 results
+ record_for_igvn(region);
+ region->init_req(1, iffalse);
+ region->init_req(2, iftrue );
+ _gvn.set_type(region, Type::CONTROL);
+ region = _gvn.transform(region);
+ set_control (region);
+ return region;
+}
+
+
+//------------------------------helper for tableswitch-------------------------
+void Parse::jump_if_true_fork(IfNode *iff, int dest_bci_if_true, int prof_table_index) {
+ // True branch, use existing map info
+ { PreserveJVMState pjvms(this);
+ Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode (iff) );
+ set_control( iftrue );
+ profile_switch_case(prof_table_index);
+ merge_new_path(dest_bci_if_true);
+ }
+
+ // False branch
+ Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+ set_control( iffalse );
+}
+
+void Parse::jump_if_false_fork(IfNode *iff, int dest_bci_if_true, int prof_table_index) {
+ // True branch, use existing map info
+ { PreserveJVMState pjvms(this);
+ Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode (iff) );
+ set_control( iffalse );
+ profile_switch_case(prof_table_index);
+ merge_new_path(dest_bci_if_true);
+ }
+
+ // False branch
+ Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(iff) );
+ set_control( iftrue );
+}
+
+void Parse::jump_if_always_fork(int dest_bci, int prof_table_index) {
+ // False branch, use existing map and control()
+ profile_switch_case(prof_table_index);
+ merge_new_path(dest_bci);
+}
+
+
+extern "C" {
+ static int jint_cmp(const void *i, const void *j) {
+ int a = *(jint *)i;
+ int b = *(jint *)j;
+ return a > b ? 1 : a < b ? -1 : 0;
+ }
+}
+
+
+// Default value for methodData switch indexing. Must be a negative value to avoid
+// conflict with any legal switch index.
+#define NullTableIndex -1
+
+class SwitchRange : public StackObj {
+ // a range of integers coupled with a bci destination
+ jint _lo; // inclusive lower limit
+ jint _hi; // inclusive upper limit
+ int _dest;
+ int _table_index; // index into method data table
+
+public:
+ jint lo() const { return _lo; }
+ jint hi() const { return _hi; }
+ int dest() const { return _dest; }
+ int table_index() const { return _table_index; }
+ bool is_singleton() const { return _lo == _hi; }
+
+ void setRange(jint lo, jint hi, int dest, int table_index) {
+ assert(lo <= hi, "must be a non-empty range");
+ _lo = lo, _hi = hi; _dest = dest; _table_index = table_index;
+ }
+ bool adjoinRange(jint lo, jint hi, int dest, int table_index) {
+ assert(lo <= hi, "must be a non-empty range");
+ if (lo == _hi+1 && dest == _dest && table_index == _table_index) {
+ _hi = hi;
+ return true;
+ }
+ return false;
+ }
+
+ void set (jint value, int dest, int table_index) {
+ setRange(value, value, dest, table_index);
+ }
+ bool adjoin(jint value, int dest, int table_index) {
+ return adjoinRange(value, value, dest, table_index);
+ }
+
+ void print(ciEnv* env) {
+ if (is_singleton())
+ tty->print(" {%d}=>%d", lo(), dest());
+ else if (lo() == min_jint)
+ tty->print(" {..%d}=>%d", hi(), dest());
+ else if (hi() == max_jint)
+ tty->print(" {%d..}=>%d", lo(), dest());
+ else
+ tty->print(" {%d..%d}=>%d", lo(), hi(), dest());
+ }
+};
+
+
+//-------------------------------do_tableswitch--------------------------------
+void Parse::do_tableswitch() {
+ Node* lookup = pop();
+
+ // Get information about tableswitch
+ int default_dest = iter().get_dest_table(0);
+ int lo_index = iter().get_int_table(1);
+ int hi_index = iter().get_int_table(2);
+ int len = hi_index - lo_index + 1;
+
+ if (len < 1) {
+ // If this is a backward branch, add safepoint
+ maybe_add_safepoint(default_dest);
+ merge(default_dest);
+ return;
+ }
+
+ // generate decision tree, using trichotomy when possible
+ int rnum = len+2;
+ bool makes_backward_branch = false;
+ SwitchRange* ranges = NEW_RESOURCE_ARRAY(SwitchRange, rnum);
+ int rp = -1;
+ if (lo_index != min_jint) {
+ ranges[++rp].setRange(min_jint, lo_index-1, default_dest, NullTableIndex);
+ }
+ for (int j = 0; j < len; j++) {
+ jint match_int = lo_index+j;
+ int dest = iter().get_dest_table(j+3);
+ makes_backward_branch |= (dest <= bci());
+ int table_index = method_data_update() ? j : NullTableIndex;
+ if (rp < 0 || !ranges[rp].adjoin(match_int, dest, table_index)) {
+ ranges[++rp].set(match_int, dest, table_index);
+ }
+ }
+ jint highest = lo_index+(len-1);
+ assert(ranges[rp].hi() == highest, "");
+ if (highest != max_jint
+ && !ranges[rp].adjoinRange(highest+1, max_jint, default_dest, NullTableIndex)) {
+ ranges[++rp].setRange(highest+1, max_jint, default_dest, NullTableIndex);
+ }
+ assert(rp < len+2, "not too many ranges");
+
+ // Safepoint in case if backward branch observed
+ if( makes_backward_branch && UseLoopSafepoints )
+ add_safepoint();
+
+ jump_switch_ranges(lookup, &ranges[0], &ranges[rp]);
+}
+
+
+//------------------------------do_lookupswitch--------------------------------
+void Parse::do_lookupswitch() {
+ Node *lookup = pop(); // lookup value
+ // Get information about lookupswitch
+ int default_dest = iter().get_dest_table(0);
+ int len = iter().get_int_table(1);
+
+ if (len < 1) { // If this is a backward branch, add safepoint
+ maybe_add_safepoint(default_dest);
+ merge(default_dest);
+ return;
+ }
+
+ // generate decision tree, using trichotomy when possible
+ jint* table = NEW_RESOURCE_ARRAY(jint, len*2);
+ {
+ for( int j = 0; j < len; j++ ) {
+ table[j+j+0] = iter().get_int_table(2+j+j);
+ table[j+j+1] = iter().get_dest_table(2+j+j+1);
+ }
+ qsort( table, len, 2*sizeof(table[0]), jint_cmp );
+ }
+
+ int rnum = len*2+1;
+ bool makes_backward_branch = false;
+ SwitchRange* ranges = NEW_RESOURCE_ARRAY(SwitchRange, rnum);
+ int rp = -1;
+ for( int j = 0; j < len; j++ ) {
+ jint match_int = table[j+j+0];
+ int dest = table[j+j+1];
+ int next_lo = rp < 0 ? min_jint : ranges[rp].hi()+1;
+ int table_index = method_data_update() ? j : NullTableIndex;
+ makes_backward_branch |= (dest <= bci());
+ if( match_int != next_lo ) {
+ ranges[++rp].setRange(next_lo, match_int-1, default_dest, NullTableIndex);
+ }
+ if( rp < 0 || !ranges[rp].adjoin(match_int, dest, table_index) ) {
+ ranges[++rp].set(match_int, dest, table_index);
+ }
+ }
+ jint highest = table[2*(len-1)];
+ assert(ranges[rp].hi() == highest, "");
+ if( highest != max_jint
+ && !ranges[rp].adjoinRange(highest+1, max_jint, default_dest, NullTableIndex) ) {
+ ranges[++rp].setRange(highest+1, max_jint, default_dest, NullTableIndex);
+ }
+ assert(rp < rnum, "not too many ranges");
+
+ // Safepoint in case backward branch observed
+ if( makes_backward_branch && UseLoopSafepoints )
+ add_safepoint();
+
+ jump_switch_ranges(lookup, &ranges[0], &ranges[rp]);
+}
+
+//----------------------------create_jump_tables-------------------------------
+bool Parse::create_jump_tables(Node* key_val, SwitchRange* lo, SwitchRange* hi) {
+ // Are jumptables enabled
+ if (!UseJumpTables) return false;
+
+ // Are jumptables supported
+ if (!Matcher::has_match_rule(Op_Jump)) return false;
+
+ // Don't make jump table if profiling
+ if (method_data_update()) return false;
+
+ // Decide if a guard is needed to lop off big ranges at either (or
+ // both) end(s) of the input set. We'll call this the default target
+ // even though we can't be sure that it is the true "default".
+
+ bool needs_guard = false;
+ int default_dest;
+ int64 total_outlier_size = 0;
+ int64 hi_size = ((int64)hi->hi()) - ((int64)hi->lo()) + 1;
+ int64 lo_size = ((int64)lo->hi()) - ((int64)lo->lo()) + 1;
+
+ if (lo->dest() == hi->dest()) {
+ total_outlier_size = hi_size + lo_size;
+ default_dest = lo->dest();
+ } else if (lo_size > hi_size) {
+ total_outlier_size = lo_size;
+ default_dest = lo->dest();
+ } else {
+ total_outlier_size = hi_size;
+ default_dest = hi->dest();
+ }
+
+ // If a guard test will eliminate very sparse end ranges, then
+ // it is worth the cost of an extra jump.
+ if (total_outlier_size > (MaxJumpTableSparseness * 4)) {
+ needs_guard = true;
+ if (default_dest == lo->dest()) lo++;
+ if (default_dest == hi->dest()) hi--;
+ }
+
+ // Find the total number of cases and ranges
+ int64 num_cases = ((int64)hi->hi()) - ((int64)lo->lo()) + 1;
+ int num_range = hi - lo + 1;
+
+ // Don't create table if: too large, too small, or too sparse.
+ if (num_cases < MinJumpTableSize || num_cases > MaxJumpTableSize)
+ return false;
+ if (num_cases > (MaxJumpTableSparseness * num_range))
+ return false;
+
+ // Normalize table lookups to zero
+ int lowval = lo->lo();
+ key_val = _gvn.transform( new (C, 3) SubINode(key_val, _gvn.intcon(lowval)) );
+
+ // Generate a guard to protect against input keyvals that aren't
+ // in the switch domain.
+ if (needs_guard) {
+ Node* size = _gvn.intcon(num_cases);
+ Node* cmp = _gvn.transform( new (C, 3) CmpUNode(key_val, size) );
+ Node* tst = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ge) );
+ IfNode* iff = create_and_map_if( control(), tst, PROB_FAIR, COUNT_UNKNOWN);
+ jump_if_true_fork(iff, default_dest, NullTableIndex);
+ }
+
+ // Create an ideal node JumpTable that has projections
+ // of all possible ranges for a switch statement
+ // The key_val input must be converted to a pointer offset and scaled.
+ // Compare Parse::array_addressing above.
+#ifdef _LP64
+ // Clean the 32-bit int into a real 64-bit offset.
+ // Otherwise, the jint value 0 might turn into an offset of 0x0800000000.
+ const TypeLong* lkeytype = TypeLong::make(CONST64(0), num_cases-1, Type::WidenMin);
+ key_val = _gvn.transform( new (C, 2) ConvI2LNode(key_val, lkeytype) );
+#endif
+ // Shift the value by wordsize so we have an index into the table, rather
+ // than a switch value
+ Node *shiftWord = _gvn.MakeConX(wordSize);
+ key_val = _gvn.transform( new (C, 3) MulXNode( key_val, shiftWord));
+
+ // Create the JumpNode
+ Node* jtn = _gvn.transform( new (C, 2) JumpNode(control(), key_val, num_cases) );
+
+ // These are the switch destinations hanging off the jumpnode
+ int i = 0;
+ for (SwitchRange* r = lo; r <= hi; r++) {
+ for (int j = r->lo(); j <= r->hi(); j++, i++) {
+ Node* input = _gvn.transform(new (C, 1) JumpProjNode(jtn, i, r->dest(), j - lowval));
+ {
+ PreserveJVMState pjvms(this);
+ set_control(input);
+ jump_if_always_fork(r->dest(), r->table_index());
+ }
+ }
+ }
+ assert(i == num_cases, "miscount of cases");
+ stop_and_kill_map(); // no more uses for this JVMS
+ return true;
+}
+
+//----------------------------jump_switch_ranges-------------------------------
+void Parse::jump_switch_ranges(Node* key_val, SwitchRange *lo, SwitchRange *hi, int switch_depth) {
+ Block* switch_block = block();
+
+ if (switch_depth == 0) {
+ // Do special processing for the top-level call.
+ assert(lo->lo() == min_jint, "initial range must exhaust Type::INT");
+ assert(hi->hi() == max_jint, "initial range must exhaust Type::INT");
+
+ // Decrement pred-numbers for the unique set of nodes.
+#ifdef ASSERT
+ // Ensure that the block's successors are a (duplicate-free) set.
+ int successors_counted = 0; // block occurrences in [hi..lo]
+ int unique_successors = switch_block->num_successors();
+ for (int i = 0; i < unique_successors; i++) {
+ Block* target = switch_block->successor_at(i);
+
+ // Check that the set of successors is the same in both places.
+ int successors_found = 0;
+ for (SwitchRange* p = lo; p <= hi; p++) {
+ if (p->dest() == target->start()) successors_found++;
+ }
+ assert(successors_found > 0, "successor must be known");
+ successors_counted += successors_found;
+ }
+ assert(successors_counted == (hi-lo)+1, "no unexpected successors");
+#endif
+
+ // Maybe prune the inputs, based on the type of key_val.
+ jint min_val = min_jint;
+ jint max_val = max_jint;
+ const TypeInt* ti = key_val->bottom_type()->isa_int();
+ if (ti != NULL) {
+ min_val = ti->_lo;
+ max_val = ti->_hi;
+ assert(min_val <= max_val, "invalid int type");
+ }
+ while (lo->hi() < min_val) lo++;
+ if (lo->lo() < min_val) lo->setRange(min_val, lo->hi(), lo->dest(), lo->table_index());
+ while (hi->lo() > max_val) hi--;
+ if (hi->hi() > max_val) hi->setRange(hi->lo(), max_val, hi->dest(), hi->table_index());
+ }
+
+#ifndef PRODUCT
+ if (switch_depth == 0) {
+ _max_switch_depth = 0;
+ _est_switch_depth = log2_intptr((hi-lo+1)-1)+1;
+ }
+#endif
+
+ assert(lo <= hi, "must be a non-empty set of ranges");
+ if (lo == hi) {
+ jump_if_always_fork(lo->dest(), lo->table_index());
+ } else {
+ assert(lo->hi() == (lo+1)->lo()-1, "contiguous ranges");
+ assert(hi->lo() == (hi-1)->hi()+1, "contiguous ranges");
+
+ if (create_jump_tables(key_val, lo, hi)) return;
+
+ int nr = hi - lo + 1;
+
+ SwitchRange* mid = lo + nr/2;
+ // if there is an easy choice, pivot at a singleton:
+ if (nr > 3 && !mid->is_singleton() && (mid-1)->is_singleton()) mid--;
+
+ assert(lo < mid && mid <= hi, "good pivot choice");
+ assert(nr != 2 || mid == hi, "should pick higher of 2");
+ assert(nr != 3 || mid == hi-1, "should pick middle of 3");
+
+ Node *test_val = _gvn.intcon(mid->lo());
+
+ if (mid->is_singleton()) {
+ IfNode *iff_ne = jump_if_fork_int(key_val, test_val, BoolTest::ne);
+ jump_if_false_fork(iff_ne, mid->dest(), mid->table_index());
+
+ // Special Case: If there are exactly three ranges, and the high
+ // and low range each go to the same place, omit the "gt" test,
+ // since it will not discriminate anything.
+ bool eq_test_only = (hi == lo+2 && hi->dest() == lo->dest());
+ if (eq_test_only) {
+ assert(mid == hi-1, "");
+ }
+
+ // if there is a higher range, test for it and process it:
+ if (mid < hi && !eq_test_only) {
+ // two comparisons of same values--should enable 1 test for 2 branches
+ // Use BoolTest::le instead of BoolTest::gt
+ IfNode *iff_le = jump_if_fork_int(key_val, test_val, BoolTest::le);
+ Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(iff_le) );
+ Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff_le) );
+ { PreserveJVMState pjvms(this);
+ set_control(iffalse);
+ jump_switch_ranges(key_val, mid+1, hi, switch_depth+1);
+ }
+ set_control(iftrue);
+ }
+
+ } else {
+ // mid is a range, not a singleton, so treat mid..hi as a unit
+ IfNode *iff_ge = jump_if_fork_int(key_val, test_val, BoolTest::ge);
+
+ // if there is a higher range, test for it and process it:
+ if (mid == hi) {
+ jump_if_true_fork(iff_ge, mid->dest(), mid->table_index());
+ } else {
+ Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(iff_ge) );
+ Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff_ge) );
+ { PreserveJVMState pjvms(this);
+ set_control(iftrue);
+ jump_switch_ranges(key_val, mid, hi, switch_depth+1);
+ }
+ set_control(iffalse);
+ }
+ }
+
+ // in any case, process the lower range
+ jump_switch_ranges(key_val, lo, mid-1, switch_depth+1);
+ }
+
+ // Decrease pred_count for each successor after all is done.
+ if (switch_depth == 0) {
+ int unique_successors = switch_block->num_successors();
+ for (int i = 0; i < unique_successors; i++) {
+ Block* target = switch_block->successor_at(i);
+ // Throw away the pre-allocated path for each unique successor.
+ target->next_path_num();
+ }
+ }
+
+#ifndef PRODUCT
+ _max_switch_depth = MAX2(switch_depth, _max_switch_depth);
+ if (TraceOptoParse && Verbose && WizardMode && switch_depth == 0) {
+ SwitchRange* r;
+ int nsing = 0;
+ for( r = lo; r <= hi; r++ ) {
+ if( r->is_singleton() ) nsing++;
+ }
+ tty->print(">>> ");
+ _method->print_short_name();
+ tty->print_cr(" switch decision tree");
+ tty->print_cr(" %d ranges (%d singletons), max_depth=%d, est_depth=%d",
+ hi-lo+1, nsing, _max_switch_depth, _est_switch_depth);
+ if (_max_switch_depth > _est_switch_depth) {
+ tty->print_cr("******** BAD SWITCH DEPTH ********");
+ }
+ tty->print(" ");
+ for( r = lo; r <= hi; r++ ) {
+ r->print(env());
+ }
+ tty->print_cr("");
+ }
+#endif
+}
+
+void Parse::modf() {
+ Node *f2 = pop();
+ Node *f1 = pop();
+ Node* c = make_runtime_call(RC_LEAF, OptoRuntime::modf_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::frem),
+ "frem", NULL, //no memory effects
+ f1, f2);
+ Node* res = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 0));
+
+ push(res);
+}
+
+void Parse::modd() {
+ Node *d2 = pop_pair();
+ Node *d1 = pop_pair();
+ Node* c = make_runtime_call(RC_LEAF, OptoRuntime::Math_DD_D_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::drem),
+ "drem", NULL, //no memory effects
+ d1, top(), d2, top());
+ Node* res_d = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 0));
+
+#ifdef ASSERT
+ Node* res_top = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 1));
+ assert(res_top == top(), "second value must be top");
+#endif
+
+ push_pair(res_d);
+}
+
+void Parse::l2f() {
+ Node* f2 = pop();
+ Node* f1 = pop();
+ Node* c = make_runtime_call(RC_LEAF, OptoRuntime::l2f_Type(),
+ CAST_FROM_FN_PTR(address, SharedRuntime::l2f),
+ "l2f", NULL, //no memory effects
+ f1, f2);
+ Node* res = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 0));
+
+ push(res);
+}
+
+void Parse::do_irem() {
+ // Must keep both values on the expression-stack during null-check
+ do_null_check(peek(), T_INT);
+ // Compile-time detect of null-exception?
+ if (stopped()) return;
+
+ Node* b = pop();
+ Node* a = pop();
+
+ const Type *t = _gvn.type(b);
+ if (t != Type::TOP) {
+ const TypeInt *ti = t->is_int();
+ if (ti->is_con()) {
+ int divisor = ti->get_con();
+ // check for positive power of 2
+ if (divisor > 0 &&
+ (divisor & ~(divisor-1)) == divisor) {
+ // yes !
+ Node *mask = _gvn.intcon((divisor - 1));
+ // Sigh, must handle negative dividends
+ Node *zero = _gvn.intcon(0);
+ IfNode *ifff = jump_if_fork_int(a, zero, BoolTest::lt);
+ Node *iff = _gvn.transform( new (C, 1) IfFalseNode(ifff) );
+ Node *ift = _gvn.transform( new (C, 1) IfTrueNode (ifff) );
+ Node *reg = jump_if_join(ift, iff);
+ Node *phi = PhiNode::make(reg, NULL, TypeInt::INT);
+ // Negative path; negate/and/negate
+ Node *neg = _gvn.transform( new (C, 3) SubINode(zero, a) );
+ Node *andn= _gvn.transform( new (C, 3) AndINode(neg, mask) );
+ Node *negn= _gvn.transform( new (C, 3) SubINode(zero, andn) );
+ phi->init_req(1, negn);
+ // Fast positive case
+ Node *andx = _gvn.transform( new (C, 3) AndINode(a, mask) );
+ phi->init_req(2, andx);
+ // Push the merge
+ push( _gvn.transform(phi) );
+ return;
+ }
+ }
+ }
+ // Default case
+ push( _gvn.transform( new (C, 3) ModINode(control(),a,b) ) );
+}
+
+// Handle jsr and jsr_w bytecode
+void Parse::do_jsr() {
+ assert(bc() == Bytecodes::_jsr || bc() == Bytecodes::_jsr_w, "wrong bytecode");
+
+ // Store information about current state, tagged with new _jsr_bci
+ int return_bci = iter().next_bci();
+ int jsr_bci = (bc() == Bytecodes::_jsr) ? iter().get_dest() : iter().get_far_dest();
+
+ // Update method data
+ profile_taken_branch(jsr_bci);
+
+ // The way we do things now, there is only one successor block
+ // for the jsr, because the target code is cloned by ciTypeFlow.
+ Block* target = successor_for_bci(jsr_bci);
+
+ // What got pushed?
+ const Type* ret_addr = target->peek();
+ assert(ret_addr->singleton(), "must be a constant (cloned jsr body)");
+
+ // Effect on jsr on stack
+ push(_gvn.makecon(ret_addr));
+
+ // Flow to the jsr.
+ merge(jsr_bci);
+}
+
+// Handle ret bytecode
+void Parse::do_ret() {
+ // Find to whom we return.
+#if 0 // %%%% MAKE THIS WORK
+ Node* con = local();
+ const TypePtr* tp = con->bottom_type()->isa_ptr();
+ assert(tp && tp->singleton(), "");
+ int return_bci = (int) tp->get_con();
+ merge(return_bci);
+#else
+ assert(block()->num_successors() == 1, "a ret can only go one place now");
+ Block* target = block()->successor_at(0);
+ assert(!target->is_ready(), "our arrival must be expected");
+ profile_ret(target->flow()->start());
+ int pnum = target->next_path_num();
+ merge_common(target, pnum);
+#endif
+}
+
+//--------------------------dynamic_branch_prediction--------------------------
+// Try to gather dynamic branch prediction behavior. Return a probability
+// of the branch being taken and set the "cnt" field. Returns a -1.0
+// if we need to use static prediction for some reason.
+float Parse::dynamic_branch_prediction(float &cnt) {
+ ResourceMark rm;
+
+ cnt = COUNT_UNKNOWN;
+
+ // Use MethodData information if it is available
+ // FIXME: free the ProfileData structure
+ ciMethodData* methodData = method()->method_data();
+ if (!methodData->is_mature()) return PROB_UNKNOWN;
+ ciProfileData* data = methodData->bci_to_data(bci());
+ if (!data->is_JumpData()) return PROB_UNKNOWN;
+
+ // get taken and not taken values
+ int taken = data->as_JumpData()->taken();
+ int not_taken = 0;
+ if (data->is_BranchData()) {
+ not_taken = data->as_BranchData()->not_taken();
+ }
+
+ // scale the counts to be commensurate with invocation counts:
+ taken = method()->scale_count(taken);
+ not_taken = method()->scale_count(not_taken);
+
+ // Give up if too few counts to be meaningful
+ if (taken + not_taken < 40) {
+ if (C->log() != NULL) {
+ C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d'", iter().get_dest(), taken, not_taken);
+ }
+ return PROB_UNKNOWN;
+ }
+
+ // Compute frequency that we arrive here
+ int sum = taken + not_taken;
+ // Adjust, if this block is a cloned private block but the
+ // Jump counts are shared. Taken the private counts for
+ // just this path instead of the shared counts.
+ if( block()->count() > 0 )
+ sum = block()->count();
+ cnt = (float)sum / (float)FreqCountInvocations;
+
+ // Pin probability to sane limits
+ float prob;
+ if( !taken )
+ prob = (0+PROB_MIN) / 2;
+ else if( !not_taken )
+ prob = (1+PROB_MAX) / 2;
+ else { // Compute probability of true path
+ prob = (float)taken / (float)(taken + not_taken);
+ if (prob > PROB_MAX) prob = PROB_MAX;
+ if (prob < PROB_MIN) prob = PROB_MIN;
+ }
+
+ assert((cnt > 0.0f) && (prob > 0.0f),
+ "Bad frequency assignment in if");
+
+ if (C->log() != NULL) {
+ const char* prob_str = NULL;
+ if (prob >= PROB_MAX) prob_str = (prob == PROB_MAX) ? "max" : "always";
+ if (prob <= PROB_MIN) prob_str = (prob == PROB_MIN) ? "min" : "never";
+ char prob_str_buf[30];
+ if (prob_str == NULL) {
+ sprintf(prob_str_buf, "%g", prob);
+ prob_str = prob_str_buf;
+ }
+ C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d' cnt='%g' prob='%s'",
+ iter().get_dest(), taken, not_taken, cnt, prob_str);
+ }
+ return prob;
+}
+
+//-----------------------------branch_prediction-------------------------------
+float Parse::branch_prediction(float& cnt,
+ BoolTest::mask btest,
+ int target_bci) {
+ float prob = dynamic_branch_prediction(cnt);
+ // If prob is unknown, switch to static prediction
+ if (prob != PROB_UNKNOWN) return prob;
+
+ prob = PROB_FAIR; // Set default value
+ if (btest == BoolTest::eq) // Exactly equal test?
+ prob = PROB_STATIC_INFREQUENT; // Assume its relatively infrequent
+ else if (btest == BoolTest::ne)
+ prob = PROB_STATIC_FREQUENT; // Assume its relatively frequent
+
+ // If this is a conditional test guarding a backwards branch,
+ // assume its a loop-back edge. Make it a likely taken branch.
+ if (target_bci < bci()) {
+ if (is_osr_parse()) { // Could be a hot OSR'd loop; force deopt
+ // Since it's an OSR, we probably have profile data, but since
+ // branch_prediction returned PROB_UNKNOWN, the counts are too small.
+ // Let's make a special check here for completely zero counts.
+ ciMethodData* methodData = method()->method_data();
+ if (!methodData->is_empty()) {
+ ciProfileData* data = methodData->bci_to_data(bci());
+ // Only stop for truly zero counts, which mean an unknown part
+ // of the OSR-ed method, and we want to deopt to gather more stats.
+ // If you have ANY counts, then this loop is simply 'cold' relative
+ // to the OSR loop.
+ if (data->as_BranchData()->taken() +
+ data->as_BranchData()->not_taken() == 0 ) {
+ // This is the only way to return PROB_UNKNOWN:
+ return PROB_UNKNOWN;
+ }
+ }
+ }
+ prob = PROB_STATIC_FREQUENT; // Likely to take backwards branch
+ }
+
+ assert(prob != PROB_UNKNOWN, "must have some guess at this point");
+ return prob;
+}
+
+// The magic constants are chosen so as to match the output of
+// branch_prediction() when the profile reports a zero taken count.
+// It is important to distinguish zero counts unambiguously, because
+// some branches (e.g., _213_javac.Assembler.eliminate) validly produce
+// very small but nonzero probabilities, which if confused with zero
+// counts would keep the program recompiling indefinitely.
+bool Parse::seems_never_taken(float prob) {
+ return prob < PROB_MIN;
+}
+
+inline void Parse::repush_if_args() {
+#ifndef PRODUCT
+ if (PrintOpto && WizardMode) {
+ tty->print("defending against excessive implicit null exceptions on %s @%d in ",
+ Bytecodes::name(iter().cur_bc()), iter().cur_bci());
+ method()->print_name(); tty->cr();
+ }
+#endif
+ int bc_depth = - Bytecodes::depth(iter().cur_bc());
+ assert(bc_depth == 1 || bc_depth == 2, "only two kinds of branches");
+ DEBUG_ONLY(sync_jvms()); // argument(n) requires a synced jvms
+ assert(argument(0) != NULL, "must exist");
+ assert(bc_depth == 1 || argument(1) != NULL, "two must exist");
+ _sp += bc_depth;
+}
+
+//----------------------------------do_ifnull----------------------------------
+void Parse::do_ifnull(BoolTest::mask btest) {
+ int target_bci = iter().get_dest();
+
+ float cnt;
+ float prob = branch_prediction(cnt, btest, target_bci);
+ if (prob == PROB_UNKNOWN) {
+ // (An earlier version of do_ifnull omitted this trap for OSR methods.)
+#ifndef PRODUCT
+ if (PrintOpto && Verbose)
+ tty->print_cr("Never-taken backedge stops compilation at bci %d",bci());
+#endif
+ repush_if_args(); // to gather stats on loop
+ // We need to mark this branch as taken so that if we recompile we will
+ // see that it is possible. In the tiered system the interpreter doesn't
+ // do profiling and by the time we get to the lower tier from the interpreter
+ // the path may be cold again. Make sure it doesn't look untaken
+ profile_taken_branch(target_bci, !ProfileInterpreter);
+ uncommon_trap(Deoptimization::Reason_unreached,
+ Deoptimization::Action_reinterpret,
+ NULL, "cold");
+ return;
+ }
+
+ // If this is a backwards branch in the bytecodes, add Safepoint
+ maybe_add_safepoint(target_bci);
+ Block* branch_block = successor_for_bci(target_bci);
+ Block* next_block = successor_for_bci(iter().next_bci());
+
+ explicit_null_checks_inserted++;
+ Node* a = null();
+ Node* b = pop();
+ Node* c = _gvn.transform( new (C, 3) CmpPNode(b, a) );
+
+ // Make a cast-away-nullness that is control dependent on the test
+ const Type *t = _gvn.type(b);
+ const Type *t_not_null = t->join(TypePtr::NOTNULL);
+ Node *cast = new (C, 2) CastPPNode(b,t_not_null);
+
+ // Generate real control flow
+ Node *tst = _gvn.transform( new (C, 2) BoolNode( c, btest ) );
+
+ // Sanity check the probability value
+ assert(prob > 0.0f,"Bad probability in Parser");
+ // Need xform to put node in hash table
+ IfNode *iff = create_and_xform_if( control(), tst, prob, cnt );
+ assert(iff->_prob > 0.0f,"Optimizer made bad probability in parser");
+ // True branch
+ { PreserveJVMState pjvms(this);
+ Node* iftrue = _gvn.transform( new (C, 1) IfTrueNode (iff) );
+ set_control(iftrue);
+
+ if (stopped()) { // Path is dead?
+ explicit_null_checks_elided++;
+ } else { // Path is live.
+ // Update method data
+ profile_taken_branch(target_bci);
+ adjust_map_after_if(btest, c, prob, branch_block, next_block);
+ if (!stopped())
+ merge(target_bci);
+ }
+ }
+
+ // False branch
+ Node* iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+ set_control(iffalse);
+
+ if (stopped()) { // Path is dead?
+ explicit_null_checks_elided++;
+ } else { // Path is live.
+ // Update method data
+ profile_not_taken_branch();
+ adjust_map_after_if(BoolTest(btest).negate(), c, 1.0-prob,
+ next_block, branch_block);
+ }
+}
+
+//------------------------------------do_if------------------------------------
+void Parse::do_if(BoolTest::mask btest, Node* c) {
+ int target_bci = iter().get_dest();
+
+ float cnt;
+ float prob = branch_prediction(cnt, btest, target_bci);
+ float untaken_prob = 1.0 - prob;
+
+ if (prob == PROB_UNKNOWN) {
+#ifndef PRODUCT
+ if (PrintOpto && Verbose)
+ tty->print_cr("Never-taken backedge stops compilation at bci %d",bci());
+#endif
+ repush_if_args(); // to gather stats on loop
+ // We need to mark this branch as taken so that if we recompile we will
+ // see that it is possible. In the tiered system the interpreter doesn't
+ // do profiling and by the time we get to the lower tier from the interpreter
+ // the path may be cold again. Make sure it doesn't look untaken
+ profile_taken_branch(target_bci, !ProfileInterpreter);
+ uncommon_trap(Deoptimization::Reason_unreached,
+ Deoptimization::Action_reinterpret,
+ NULL, "cold");
+ return;
+ }
+
+ // Sanity check the probability value
+ assert(0.0f < prob && prob < 1.0f,"Bad probability in Parser");
+
+ bool taken_if_true = true;
+ // Convert BoolTest to canonical form:
+ if (!BoolTest(btest).is_canonical()) {
+ btest = BoolTest(btest).negate();
+ taken_if_true = false;
+ // prob is NOT updated here; it remains the probability of the taken
+ // path (as opposed to the prob of the path guarded by an 'IfTrueNode').
+ }
+ assert(btest != BoolTest::eq, "!= is the only canonical exact test");
+
+ Node* tst0 = new (C, 2) BoolNode(c, btest);
+ Node* tst = _gvn.transform(tst0);
+ BoolTest::mask taken_btest = BoolTest::illegal;
+ BoolTest::mask untaken_btest = BoolTest::illegal;
+ if (btest == BoolTest::ne) {
+ // For now, these are the only cases of btest that matter. (More later.)
+ taken_btest = taken_if_true ? btest : BoolTest::eq;
+ untaken_btest = taken_if_true ? BoolTest::eq : btest;
+ }
+
+ // Generate real control flow
+ float true_prob = (taken_if_true ? prob : untaken_prob);
+ IfNode* iff = create_and_map_if(control(), tst, true_prob, cnt);
+ assert(iff->_prob > 0.0f,"Optimizer made bad probability in parser");
+ Node* taken_branch = new (C, 1) IfTrueNode(iff);
+ Node* untaken_branch = new (C, 1) IfFalseNode(iff);
+ if (!taken_if_true) { // Finish conversion to canonical form
+ Node* tmp = taken_branch;
+ taken_branch = untaken_branch;
+ untaken_branch = tmp;
+ }
+
+ Block* branch_block = successor_for_bci(target_bci);
+ Block* next_block = successor_for_bci(iter().next_bci());
+
+ // Branch is taken:
+ { PreserveJVMState pjvms(this);
+ taken_branch = _gvn.transform(taken_branch);
+ set_control(taken_branch);
+
+ if (!stopped()) {
+ // Update method data
+ profile_taken_branch(target_bci);
+ adjust_map_after_if(taken_btest, c, prob, branch_block, next_block);
+ if (!stopped())
+ merge(target_bci);
+ }
+ }
+
+ untaken_branch = _gvn.transform(untaken_branch);
+ set_control(untaken_branch);
+
+ // Branch not taken.
+ if (!stopped()) {
+ // Update method data
+ profile_not_taken_branch();
+ adjust_map_after_if(untaken_btest, c, untaken_prob,
+ next_block, branch_block);
+ }
+}
+
+//----------------------------adjust_map_after_if------------------------------
+// Adjust the JVM state to reflect the result of taking this path.
+// Basically, it means inspecting the CmpNode controlling this
+// branch, seeing how it constrains a tested value, and then
+// deciding if it's worth our while to encode this constraint
+// as graph nodes in the current abstract interpretation map.
+void Parse::adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
+ Block* path, Block* other_path) {
+ if (stopped() || !c->is_Cmp() || btest == BoolTest::illegal)
+ return; // nothing to do
+
+ bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
+
+ int cop = c->Opcode();
+ if (seems_never_taken(prob) && cop == Op_CmpP && btest == BoolTest::eq) {
+ // (An earlier version of do_if omitted '&& btest == BoolTest::eq'.)
+ //
+ // If this might possibly turn into an implicit null check,
+ // and the null has never yet been seen, we need to generate
+ // an uncommon trap, so as to recompile instead of suffering
+ // with very slow branches. (We'll get the slow branches if
+ // the program ever changes phase and starts seeing nulls here.)
+ //
+ // The tests we worry about are of the form (p == null).
+ // We do not simply inspect for a null constant, since a node may
+ // optimize to 'null' later on.
+ repush_if_args();
+ // We need to mark this branch as taken so that if we recompile we will
+ // see that it is possible. In the tiered system the interpreter doesn't
+ // do profiling and by the time we get to the lower tier from the interpreter
+ // the path may be cold again. Make sure it doesn't look untaken
+ if (is_fallthrough) {
+ profile_not_taken_branch(!ProfileInterpreter);
+ } else {
+ profile_taken_branch(iter().get_dest(), !ProfileInterpreter);
+ }
+ uncommon_trap(Deoptimization::Reason_unreached,
+ Deoptimization::Action_reinterpret,
+ NULL,
+ (is_fallthrough ? "taken always" : "taken never"));
+ return;
+ }
+
+ Node* val = c->in(1);
+ Node* con = c->in(2);
+ const Type* tcon = _gvn.type(con);
+ const Type* tval = _gvn.type(val);
+ bool have_con = tcon->singleton();
+ if (tval->singleton()) {
+ if (!have_con) {
+ // Swap, so constant is in con.
+ con = val;
+ tcon = tval;
+ val = c->in(2);
+ tval = _gvn.type(val);
+ btest = BoolTest(btest).commute();
+ have_con = true;
+ } else {
+ // Do we have two constants? Then leave well enough alone.
+ have_con = false;
+ }
+ }
+ if (!have_con) // remaining adjustments need a con
+ return;
+
+
+ int val_in_map = map()->find_edge(val);
+ if (val_in_map < 0) return; // replace_in_map would be useless
+ {
+ JVMState* jvms = this->jvms();
+ if (!(jvms->is_loc(val_in_map) ||
+ jvms->is_stk(val_in_map)))
+ return; // again, it would be useless
+ }
+
+ // Check for a comparison to a constant, and "know" that the compared
+ // value is constrained on this path.
+ assert(tcon->singleton(), "");
+ ConstraintCastNode* ccast = NULL;
+ Node* cast = NULL;
+
+ switch (btest) {
+ case BoolTest::eq: // Constant test?
+ {
+ const Type* tboth = tcon->join(tval);
+ if (tboth == tval) break; // Nothing to gain.
+ if (tcon->isa_int()) {
+ ccast = new (C, 2) CastIINode(val, tboth);
+ } else if (tcon == TypePtr::NULL_PTR) {
+ // Cast to null, but keep the pointer identity temporarily live.
+ ccast = new (C, 2) CastPPNode(val, tboth);
+ } else {
+ const TypeF* tf = tcon->isa_float_constant();
+ const TypeD* td = tcon->isa_double_constant();
+ // Exclude tests vs float/double 0 as these could be
+ // either +0 or -0. Just because you are equal to +0
+ // doesn't mean you ARE +0!
+ if ((!tf || tf->_f != 0.0) &&
+ (!td || td->_d != 0.0))
+ cast = con; // Replace non-constant val by con.
+ }
+ }
+ break;
+
+ case BoolTest::ne:
+ if (tcon == TypePtr::NULL_PTR) {
+ cast = cast_not_null(val, false);
+ }
+ break;
+
+ default:
+ // (At this point we could record int range types with CastII.)
+ break;
+ }
+
+ if (ccast != NULL) {
+ const Type* tcc = ccast->as_Type()->type();
+ assert(tcc != tval && tcc->higher_equal(tval), "must improve");
+ // Delay transform() call to allow recovery of pre-cast value
+ // at the control merge.
+ ccast->set_req(0, control());
+ _gvn.set_type_bottom(ccast);
+ record_for_igvn(ccast);
+ cast = ccast;
+ }
+
+ if (cast != NULL) { // Here's the payoff.
+ replace_in_map(val, cast);
+ }
+}
+
+
+//------------------------------do_one_bytecode--------------------------------
+// Parse this bytecode, and alter the Parsers JVM->Node mapping
+void Parse::do_one_bytecode() {
+ Node *a, *b, *c, *d; // Handy temps
+ BoolTest::mask btest;
+ int i;
+
+ assert(!has_exceptions(), "bytecode entry state must be clear of throws");
+
+ if (C->check_node_count(NodeLimitFudgeFactor * 5,
+ "out of nodes parsing method")) {
+ return;
+ }
+
+#ifdef ASSERT
+ // for setting breakpoints
+ if (TraceOptoParse) {
+ tty->print(" @");
+ dump_bci(bci());
+ }
+#endif
+
+ switch (bc()) {
+ case Bytecodes::_nop:
+ // do nothing
+ break;
+ case Bytecodes::_lconst_0:
+ push_pair(longcon(0));
+ break;
+
+ case Bytecodes::_lconst_1:
+ push_pair(longcon(1));
+ break;
+
+ case Bytecodes::_fconst_0:
+ push(zerocon(T_FLOAT));
+ break;
+
+ case Bytecodes::_fconst_1:
+ push(makecon(TypeF::ONE));
+ break;
+
+ case Bytecodes::_fconst_2:
+ push(makecon(TypeF::make(2.0f)));
+ break;
+
+ case Bytecodes::_dconst_0:
+ push_pair(zerocon(T_DOUBLE));
+ break;
+
+ case Bytecodes::_dconst_1:
+ push_pair(makecon(TypeD::ONE));
+ break;
+
+ case Bytecodes::_iconst_m1:push(intcon(-1)); break;
+ case Bytecodes::_iconst_0: push(intcon( 0)); break;
+ case Bytecodes::_iconst_1: push(intcon( 1)); break;
+ case Bytecodes::_iconst_2: push(intcon( 2)); break;
+ case Bytecodes::_iconst_3: push(intcon( 3)); break;
+ case Bytecodes::_iconst_4: push(intcon( 4)); break;
+ case Bytecodes::_iconst_5: push(intcon( 5)); break;
+ case Bytecodes::_bipush: push(intcon( iter().get_byte())); break;
+ case Bytecodes::_sipush: push(intcon( iter().get_short())); break;
+ case Bytecodes::_aconst_null: push(null()); break;
+ case Bytecodes::_ldc:
+ case Bytecodes::_ldc_w:
+ case Bytecodes::_ldc2_w:
+ // If the constant is unresolved, run this BC once in the interpreter.
+ if (iter().is_unresolved_string()) {
+ uncommon_trap(Deoptimization::make_trap_request
+ (Deoptimization::Reason_unloaded,
+ Deoptimization::Action_reinterpret,
+ iter().get_constant_index()),
+ NULL, "unresolved_string");
+ break;
+ } else {
+ ciConstant constant = iter().get_constant();
+ if (constant.basic_type() == T_OBJECT) {
+ ciObject* c = constant.as_object();
+ if (c->is_klass()) {
+ // The constant returned for a klass is the ciKlass for the
+ // entry. We want the java_mirror so get it.
+ ciKlass* klass = c->as_klass();
+ if (klass->is_loaded()) {
+ constant = ciConstant(T_OBJECT, klass->java_mirror());
+ } else {
+ uncommon_trap(Deoptimization::make_trap_request
+ (Deoptimization::Reason_unloaded,
+ Deoptimization::Action_reinterpret,
+ iter().get_constant_index()),
+ NULL, "unresolved_klass");
+ break;
+ }
+ }
+ }
+ push_constant(constant);
+ }
+
+ break;
+
+ case Bytecodes::_aload_0:
+ push( local(0) );
+ break;
+ case Bytecodes::_aload_1:
+ push( local(1) );
+ break;
+ case Bytecodes::_aload_2:
+ push( local(2) );
+ break;
+ case Bytecodes::_aload_3:
+ push( local(3) );
+ break;
+ case Bytecodes::_aload:
+ push( local(iter().get_index()) );
+ break;
+
+ case Bytecodes::_fload_0:
+ case Bytecodes::_iload_0:
+ push( local(0) );
+ break;
+ case Bytecodes::_fload_1:
+ case Bytecodes::_iload_1:
+ push( local(1) );
+ break;
+ case Bytecodes::_fload_2:
+ case Bytecodes::_iload_2:
+ push( local(2) );
+ break;
+ case Bytecodes::_fload_3:
+ case Bytecodes::_iload_3:
+ push( local(3) );
+ break;
+ case Bytecodes::_fload:
+ case Bytecodes::_iload:
+ push( local(iter().get_index()) );
+ break;
+ case Bytecodes::_lload_0:
+ push_pair_local( 0 );
+ break;
+ case Bytecodes::_lload_1:
+ push_pair_local( 1 );
+ break;
+ case Bytecodes::_lload_2:
+ push_pair_local( 2 );
+ break;
+ case Bytecodes::_lload_3:
+ push_pair_local( 3 );
+ break;
+ case Bytecodes::_lload:
+ push_pair_local( iter().get_index() );
+ break;
+
+ case Bytecodes::_dload_0:
+ push_pair_local(0);
+ break;
+ case Bytecodes::_dload_1:
+ push_pair_local(1);
+ break;
+ case Bytecodes::_dload_2:
+ push_pair_local(2);
+ break;
+ case Bytecodes::_dload_3:
+ push_pair_local(3);
+ break;
+ case Bytecodes::_dload:
+ push_pair_local(iter().get_index());
+ break;
+ case Bytecodes::_fstore_0:
+ case Bytecodes::_istore_0:
+ case Bytecodes::_astore_0:
+ set_local( 0, pop() );
+ break;
+ case Bytecodes::_fstore_1:
+ case Bytecodes::_istore_1:
+ case Bytecodes::_astore_1:
+ set_local( 1, pop() );
+ break;
+ case Bytecodes::_fstore_2:
+ case Bytecodes::_istore_2:
+ case Bytecodes::_astore_2:
+ set_local( 2, pop() );
+ break;
+ case Bytecodes::_fstore_3:
+ case Bytecodes::_istore_3:
+ case Bytecodes::_astore_3:
+ set_local( 3, pop() );
+ break;
+ case Bytecodes::_fstore:
+ case Bytecodes::_istore:
+ case Bytecodes::_astore:
+ set_local( iter().get_index(), pop() );
+ break;
+ // long stores
+ case Bytecodes::_lstore_0:
+ set_pair_local( 0, pop_pair() );
+ break;
+ case Bytecodes::_lstore_1:
+ set_pair_local( 1, pop_pair() );
+ break;
+ case Bytecodes::_lstore_2:
+ set_pair_local( 2, pop_pair() );
+ break;
+ case Bytecodes::_lstore_3:
+ set_pair_local( 3, pop_pair() );
+ break;
+ case Bytecodes::_lstore:
+ set_pair_local( iter().get_index(), pop_pair() );
+ break;
+
+ // double stores
+ case Bytecodes::_dstore_0:
+ set_pair_local( 0, dstore_rounding(pop_pair()) );
+ break;
+ case Bytecodes::_dstore_1:
+ set_pair_local( 1, dstore_rounding(pop_pair()) );
+ break;
+ case Bytecodes::_dstore_2:
+ set_pair_local( 2, dstore_rounding(pop_pair()) );
+ break;
+ case Bytecodes::_dstore_3:
+ set_pair_local( 3, dstore_rounding(pop_pair()) );
+ break;
+ case Bytecodes::_dstore:
+ set_pair_local( iter().get_index(), dstore_rounding(pop_pair()) );
+ break;
+
+ case Bytecodes::_pop: _sp -= 1; break;
+ case Bytecodes::_pop2: _sp -= 2; break;
+ case Bytecodes::_swap:
+ a = pop();
+ b = pop();
+ push(a);
+ push(b);
+ break;
+ case Bytecodes::_dup:
+ a = pop();
+ push(a);
+ push(a);
+ break;
+ case Bytecodes::_dup_x1:
+ a = pop();
+ b = pop();
+ push( a );
+ push( b );
+ push( a );
+ break;
+ case Bytecodes::_dup_x2:
+ a = pop();
+ b = pop();
+ c = pop();
+ push( a );
+ push( c );
+ push( b );
+ push( a );
+ break;
+ case Bytecodes::_dup2:
+ a = pop();
+ b = pop();
+ push( b );
+ push( a );
+ push( b );
+ push( a );
+ break;
+
+ case Bytecodes::_dup2_x1:
+ // before: .. c, b, a
+ // after: .. b, a, c, b, a
+ // not tested
+ a = pop();
+ b = pop();
+ c = pop();
+ push( b );
+ push( a );
+ push( c );
+ push( b );
+ push( a );
+ break;
+ case Bytecodes::_dup2_x2:
+ // before: .. d, c, b, a
+ // after: .. b, a, d, c, b, a
+ // not tested
+ a = pop();
+ b = pop();
+ c = pop();
+ d = pop();
+ push( b );
+ push( a );
+ push( d );
+ push( c );
+ push( b );
+ push( a );
+ break;
+
+ case Bytecodes::_arraylength: {
+ // Must do null-check with value on expression stack
+ Node *ary = do_null_check(peek(), T_ARRAY);
+ // Compile-time detect of null-exception?
+ if (stopped()) return;
+ a = pop();
+ push(load_array_length(a));
+ break;
+ }
+
+ case Bytecodes::_baload: array_load(T_BYTE); break;
+ case Bytecodes::_caload: array_load(T_CHAR); break;
+ case Bytecodes::_iaload: array_load(T_INT); break;
+ case Bytecodes::_saload: array_load(T_SHORT); break;
+ case Bytecodes::_faload: array_load(T_FLOAT); break;
+ case Bytecodes::_aaload: array_load(T_OBJECT); break;
+ case Bytecodes::_laload: {
+ a = array_addressing(T_LONG, 0);
+ if (stopped()) return; // guarenteed null or range check
+ _sp -= 2; // Pop array and index
+ push_pair( make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS));
+ break;
+ }
+ case Bytecodes::_daload: {
+ a = array_addressing(T_DOUBLE, 0);
+ if (stopped()) return; // guarenteed null or range check
+ _sp -= 2; // Pop array and index
+ push_pair( make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES));
+ break;
+ }
+ case Bytecodes::_bastore: array_store(T_BYTE); break;
+ case Bytecodes::_castore: array_store(T_CHAR); break;
+ case Bytecodes::_iastore: array_store(T_INT); break;
+ case Bytecodes::_sastore: array_store(T_SHORT); break;
+ case Bytecodes::_fastore: array_store(T_FLOAT); break;
+ case Bytecodes::_aastore: {
+ d = array_addressing(T_OBJECT, 1);
+ if (stopped()) return; // guarenteed null or range check
+ array_store_check();
+ c = pop(); // Oop to store
+ b = pop(); // index (already used)
+ a = pop(); // the array itself
+ const Type* elemtype = _gvn.type(a)->is_aryptr()->elem();
+ const TypeAryPtr* adr_type = TypeAryPtr::OOPS;
+ Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT);
+ break;
+ }
+ case Bytecodes::_lastore: {
+ a = array_addressing(T_LONG, 2);
+ if (stopped()) return; // guarenteed null or range check
+ c = pop_pair();
+ _sp -= 2; // Pop array and index
+ store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS);
+ break;
+ }
+ case Bytecodes::_dastore: {
+ a = array_addressing(T_DOUBLE, 2);
+ if (stopped()) return; // guarenteed null or range check
+ c = pop_pair();
+ _sp -= 2; // Pop array and index
+ c = dstore_rounding(c);
+ store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES);
+ break;
+ }
+ case Bytecodes::_getfield:
+ do_getfield();
+ break;
+
+ case Bytecodes::_getstatic:
+ do_getstatic();
+ break;
+
+ case Bytecodes::_putfield:
+ do_putfield();
+ break;
+
+ case Bytecodes::_putstatic:
+ do_putstatic();
+ break;
+
+ case Bytecodes::_irem:
+ do_irem();
+ break;
+ case Bytecodes::_idiv:
+ // Must keep both values on the expression-stack during null-check
+ do_null_check(peek(), T_INT);
+ // Compile-time detect of null-exception?
+ if (stopped()) return;
+ b = pop();
+ a = pop();
+ push( _gvn.transform( new (C, 3) DivINode(control(),a,b) ) );
+ break;
+ case Bytecodes::_imul:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) MulINode(a,b) ) );
+ break;
+ case Bytecodes::_iadd:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) AddINode(a,b) ) );
+ break;
+ case Bytecodes::_ineg:
+ a = pop();
+ push( _gvn.transform( new (C, 3) SubINode(_gvn.intcon(0),a)) );
+ break;
+ case Bytecodes::_isub:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) SubINode(a,b) ) );
+ break;
+ case Bytecodes::_iand:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) AndINode(a,b) ) );
+ break;
+ case Bytecodes::_ior:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) OrINode(a,b) ) );
+ break;
+ case Bytecodes::_ixor:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) XorINode(a,b) ) );
+ break;
+ case Bytecodes::_ishl:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) LShiftINode(a,b) ) );
+ break;
+ case Bytecodes::_ishr:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) RShiftINode(a,b) ) );
+ break;
+ case Bytecodes::_iushr:
+ b = pop(); a = pop();
+ push( _gvn.transform( new (C, 3) URShiftINode(a,b) ) );
+ break;
+
+ case Bytecodes::_fneg:
+ a = pop();
+ b = _gvn.transform(new (C, 2) NegFNode (a));
+ push(b);
+ break;
+
+ case Bytecodes::_fsub:
+ b = pop();
+ a = pop();
+ c = _gvn.transform( new (C, 3) SubFNode(a,b) );
+ d = precision_rounding(c);
+ push( d );
+ break;
+
+ case Bytecodes::_fadd:
+ b = pop();
+ a = pop();
+ c = _gvn.transform( new (C, 3) AddFNode(a,b) );
+ d = precision_rounding(c);
+ push( d );
+ break;
+
+ case Bytecodes::_fmul:
+ b = pop();
+ a = pop();
+ c = _gvn.transform( new (C, 3) MulFNode(a,b) );
+ d = precision_rounding(c);
+ push( d );
+ break;
+
+ case Bytecodes::_fdiv:
+ b = pop();
+ a = pop();
+ c = _gvn.transform( new (C, 3) DivFNode(0,a,b) );
+ d = precision_rounding(c);
+ push( d );
+ break;
+
+ case Bytecodes::_frem:
+ if (Matcher::has_match_rule(Op_ModF)) {
+ // Generate a ModF node.
+ b = pop();
+ a = pop();
+ c = _gvn.transform( new (C, 3) ModFNode(0,a,b) );
+ d = precision_rounding(c);
+ push( d );
+ }
+ else {
+ // Generate a call.
+ modf();
+ }
+ break;
+
+ case Bytecodes::_fcmpl:
+ b = pop();
+ a = pop();
+ c = _gvn.transform( new (C, 3) CmpF3Node( a, b));
+ push(c);
+ break;
+ case Bytecodes::_fcmpg:
+ b = pop();
+ a = pop();
+
+ // Same as fcmpl but need to flip the unordered case. Swap the inputs,
+ // which negates the result sign except for unordered. Flip the unordered
+ // as well by using CmpF3 which implements unordered-lesser instead of
+ // unordered-greater semantics. Finally, commute the result bits. Result
+ // is same as using a CmpF3Greater except we did it with CmpF3 alone.
+ c = _gvn.transform( new (C, 3) CmpF3Node( b, a));
+ c = _gvn.transform( new (C, 3) SubINode(_gvn.intcon(0),c) );
+ push(c);
+ break;
+
+ case Bytecodes::_f2i:
+ a = pop();
+ push(_gvn.transform(new (C, 2) ConvF2INode(a)));
+ break;
+
+ case Bytecodes::_d2i:
+ a = pop_pair();
+ b = _gvn.transform(new (C, 2) ConvD2INode(a));
+ push( b );
+ break;
+
+ case Bytecodes::_f2d:
+ a = pop();
+ b = _gvn.transform( new (C, 2) ConvF2DNode(a));
+ push_pair( b );
+ break;
+
+ case Bytecodes::_d2f:
+ a = pop_pair();
+ b = _gvn.transform( new (C, 2) ConvD2FNode(a));
+ // This breaks _227_mtrt (speed & correctness) and _222_mpegaudio (speed)
+ //b = _gvn.transform(new (C, 2) RoundFloatNode(0, b) );
+ push( b );
+ break;
+
+ case Bytecodes::_l2f:
+ if (Matcher::convL2FSupported()) {
+ a = pop_pair();
+ b = _gvn.transform( new (C, 2) ConvL2FNode(a));
+ // For i486.ad, FILD doesn't restrict precision to 24 or 53 bits.
+ // Rather than storing the result into an FP register then pushing
+ // out to memory to round, the machine instruction that implements
+ // ConvL2D is responsible for rounding.
+ // c = precision_rounding(b);
+ c = _gvn.transform(b);
+ push(c);
+ } else {
+ l2f();
+ }
+ break;
+
+ case Bytecodes::_l2d:
+ a = pop_pair();
+ b = _gvn.transform( new (C, 2) ConvL2DNode(a));
+ // For i486.ad, rounding is always necessary (see _l2f above).
+ // c = dprecision_rounding(b);
+ c = _gvn.transform(b);
+ push_pair(c);
+ break;
+
+ case Bytecodes::_f2l:
+ a = pop();
+ b = _gvn.transform( new (C, 2) ConvF2LNode(a));
+ push_pair(b);
+ break;
+
+ case Bytecodes::_d2l:
+ a = pop_pair();
+ b = _gvn.transform( new (C, 2) ConvD2LNode(a));
+ push_pair(b);
+ break;
+
+ case Bytecodes::_dsub:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) SubDNode(a,b) );
+ d = dprecision_rounding(c);
+ push_pair( d );
+ break;
+
+ case Bytecodes::_dadd:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) AddDNode(a,b) );
+ d = dprecision_rounding(c);
+ push_pair( d );
+ break;
+
+ case Bytecodes::_dmul:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) MulDNode(a,b) );
+ d = dprecision_rounding(c);
+ push_pair( d );
+ break;
+
+ case Bytecodes::_ddiv:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) DivDNode(0,a,b) );
+ d = dprecision_rounding(c);
+ push_pair( d );
+ break;
+
+ case Bytecodes::_dneg:
+ a = pop_pair();
+ b = _gvn.transform(new (C, 2) NegDNode (a));
+ push_pair(b);
+ break;
+
+ case Bytecodes::_drem:
+ if (Matcher::has_match_rule(Op_ModD)) {
+ // Generate a ModD node.
+ b = pop_pair();
+ a = pop_pair();
+ // a % b
+
+ c = _gvn.transform( new (C, 3) ModDNode(0,a,b) );
+ d = dprecision_rounding(c);
+ push_pair( d );
+ }
+ else {
+ // Generate a call.
+ modd();
+ }
+ break;
+
+ case Bytecodes::_dcmpl:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) CmpD3Node( a, b));
+ push(c);
+ break;
+
+ case Bytecodes::_dcmpg:
+ b = pop_pair();
+ a = pop_pair();
+ // Same as dcmpl but need to flip the unordered case.
+ // Commute the inputs, which negates the result sign except for unordered.
+ // Flip the unordered as well by using CmpD3 which implements
+ // unordered-lesser instead of unordered-greater semantics.
+ // Finally, negate the result bits. Result is same as using a
+ // CmpD3Greater except we did it with CmpD3 alone.
+ c = _gvn.transform( new (C, 3) CmpD3Node( b, a));
+ c = _gvn.transform( new (C, 3) SubINode(_gvn.intcon(0),c) );
+ push(c);
+ break;
+
+
+ // Note for longs -> lo word is on TOS, hi word is on TOS - 1
+ case Bytecodes::_land:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) AndLNode(a,b) );
+ push_pair(c);
+ break;
+ case Bytecodes::_lor:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) OrLNode(a,b) );
+ push_pair(c);
+ break;
+ case Bytecodes::_lxor:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) XorLNode(a,b) );
+ push_pair(c);
+ break;
+
+ case Bytecodes::_lshl:
+ b = pop(); // the shift count
+ a = pop_pair(); // value to be shifted
+ c = _gvn.transform( new (C, 3) LShiftLNode(a,b) );
+ push_pair(c);
+ break;
+ case Bytecodes::_lshr:
+ b = pop(); // the shift count
+ a = pop_pair(); // value to be shifted
+ c = _gvn.transform( new (C, 3) RShiftLNode(a,b) );
+ push_pair(c);
+ break;
+ case Bytecodes::_lushr:
+ b = pop(); // the shift count
+ a = pop_pair(); // value to be shifted
+ c = _gvn.transform( new (C, 3) URShiftLNode(a,b) );
+ push_pair(c);
+ break;
+ case Bytecodes::_lmul:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) MulLNode(a,b) );
+ push_pair(c);
+ break;
+
+ case Bytecodes::_lrem:
+ // Must keep both values on the expression-stack during null-check
+ assert(peek(0) == top(), "long word order");
+ do_null_check(peek(1), T_LONG);
+ // Compile-time detect of null-exception?
+ if (stopped()) return;
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) ModLNode(control(),a,b) );
+ push_pair(c);
+ break;
+
+ case Bytecodes::_ldiv:
+ // Must keep both values on the expression-stack during null-check
+ assert(peek(0) == top(), "long word order");
+ do_null_check(peek(1), T_LONG);
+ // Compile-time detect of null-exception?
+ if (stopped()) return;
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) DivLNode(control(),a,b) );
+ push_pair(c);
+ break;
+
+ case Bytecodes::_ladd:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) AddLNode(a,b) );
+ push_pair(c);
+ break;
+ case Bytecodes::_lsub:
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) SubLNode(a,b) );
+ push_pair(c);
+ break;
+ case Bytecodes::_lcmp:
+ // Safepoints are now inserted _before_ branches. The long-compare
+ // bytecode painfully produces a 3-way value (-1,0,+1) which requires a
+ // slew of control flow. These are usually followed by a CmpI vs zero and
+ // a branch; this pattern then optimizes to the obvious long-compare and
+ // branch. However, if the branch is backwards there's a Safepoint
+ // inserted. The inserted Safepoint captures the JVM state at the
+ // pre-branch point, i.e. it captures the 3-way value. Thus if a
+ // long-compare is used to control a loop the debug info will force
+ // computation of the 3-way value, even though the generated code uses a
+ // long-compare and branch. We try to rectify the situation by inserting
+ // a SafePoint here and have it dominate and kill the safepoint added at a
+ // following backwards branch. At this point the JVM state merely holds 2
+ // longs but not the 3-way value.
+ if( UseLoopSafepoints ) {
+ switch( iter().next_bc() ) {
+ case Bytecodes::_ifgt:
+ case Bytecodes::_iflt:
+ case Bytecodes::_ifge:
+ case Bytecodes::_ifle:
+ case Bytecodes::_ifne:
+ case Bytecodes::_ifeq:
+ // If this is a backwards branch in the bytecodes, add Safepoint
+ maybe_add_safepoint(iter().next_get_dest());
+ }
+ }
+ b = pop_pair();
+ a = pop_pair();
+ c = _gvn.transform( new (C, 3) CmpL3Node( a, b ));
+ push(c);
+ break;
+
+ case Bytecodes::_lneg:
+ a = pop_pair();
+ b = _gvn.transform( new (C, 3) SubLNode(longcon(0),a));
+ push_pair(b);
+ break;
+ case Bytecodes::_l2i:
+ a = pop_pair();
+ push( _gvn.transform( new (C, 2) ConvL2INode(a)));
+ break;
+ case Bytecodes::_i2l:
+ a = pop();
+ b = _gvn.transform( new (C, 2) ConvI2LNode(a));
+ push_pair(b);
+ break;
+ case Bytecodes::_i2b:
+ // Sign extend
+ a = pop();
+ a = _gvn.transform( new (C, 3) LShiftINode(a,_gvn.intcon(24)) );
+ a = _gvn.transform( new (C, 3) RShiftINode(a,_gvn.intcon(24)) );
+ push( a );
+ break;
+ case Bytecodes::_i2s:
+ a = pop();
+ a = _gvn.transform( new (C, 3) LShiftINode(a,_gvn.intcon(16)) );
+ a = _gvn.transform( new (C, 3) RShiftINode(a,_gvn.intcon(16)) );
+ push( a );
+ break;
+ case Bytecodes::_i2c:
+ a = pop();
+ push( _gvn.transform( new (C, 3) AndINode(a,_gvn.intcon(0xFFFF)) ) );
+ break;
+
+ case Bytecodes::_i2f:
+ a = pop();
+ b = _gvn.transform( new (C, 2) ConvI2FNode(a) ) ;
+ c = precision_rounding(b);
+ push (b);
+ break;
+
+ case Bytecodes::_i2d:
+ a = pop();
+ b = _gvn.transform( new (C, 2) ConvI2DNode(a));
+ push_pair(b);
+ break;
+
+ case Bytecodes::_iinc: // Increment local
+ i = iter().get_index(); // Get local index
+ set_local( i, _gvn.transform( new (C, 3) AddINode( _gvn.intcon(iter().get_iinc_con()), local(i) ) ) );
+ break;
+
+ // Exit points of synchronized methods must have an unlock node
+ case Bytecodes::_return:
+ return_current(NULL);
+ break;
+
+ case Bytecodes::_ireturn:
+ case Bytecodes::_areturn:
+ case Bytecodes::_freturn:
+ return_current(pop());
+ break;
+ case Bytecodes::_lreturn:
+ return_current(pop_pair());
+ break;
+ case Bytecodes::_dreturn:
+ return_current(pop_pair());
+ break;
+
+ case Bytecodes::_athrow:
+ // null exception oop throws NULL pointer exception
+ do_null_check(peek(), T_OBJECT);
+ if (stopped()) return;
+ if (JvmtiExport::can_post_exceptions()) {
+ // "Full-speed throwing" is not necessary here,
+ // since we're notifying the VM on every throw.
+ uncommon_trap(Deoptimization::Reason_unhandled,
+ Deoptimization::Action_none);
+ return;
+ }
+ // Hook the thrown exception directly to subsequent handlers.
+ if (BailoutToInterpreterForThrows) {
+ // Keep method interpreted from now on.
+ uncommon_trap(Deoptimization::Reason_unhandled,
+ Deoptimization::Action_make_not_compilable);
+ return;
+ }
+ add_exception_state(make_exception_state(peek()));
+ break;
+
+ case Bytecodes::_goto: // fall through
+ case Bytecodes::_goto_w: {
+ int target_bci = (bc() == Bytecodes::_goto) ? iter().get_dest() : iter().get_far_dest();
+
+ // If this is a backwards branch in the bytecodes, add Safepoint
+ maybe_add_safepoint(target_bci);
+
+ // Update method data
+ profile_taken_branch(target_bci);
+
+ // Merge the current control into the target basic block
+ merge(target_bci);
+
+ // See if we can get some profile data and hand it off to the next block
+ Block *target_block = block()->successor_for_bci(target_bci);
+ if (target_block->pred_count() != 1) break;
+ ciMethodData* methodData = method()->method_data();
+ if (!methodData->is_mature()) break;
+ ciProfileData* data = methodData->bci_to_data(bci());
+ assert( data->is_JumpData(), "" );
+ int taken = ((ciJumpData*)data)->taken();
+ taken = method()->scale_count(taken);
+ target_block->set_count(taken);
+ break;
+ }
+
+ case Bytecodes::_ifnull:
+ do_ifnull(BoolTest::eq);
+ break;
+ case Bytecodes::_ifnonnull:
+ do_ifnull(BoolTest::ne);
+ break;
+
+ case Bytecodes::_if_acmpeq: btest = BoolTest::eq; goto handle_if_acmp;
+ case Bytecodes::_if_acmpne: btest = BoolTest::ne; goto handle_if_acmp;
+ handle_if_acmp:
+ // If this is a backwards branch in the bytecodes, add Safepoint
+ maybe_add_safepoint(iter().get_dest());
+ a = pop();
+ b = pop();
+ c = _gvn.transform( new (C, 3) CmpPNode(b, a) );
+ do_if(btest, c);
+ break;
+
+ case Bytecodes::_ifeq: btest = BoolTest::eq; goto handle_ifxx;
+ case Bytecodes::_ifne: btest = BoolTest::ne; goto handle_ifxx;
+ case Bytecodes::_iflt: btest = BoolTest::lt; goto handle_ifxx;
+ case Bytecodes::_ifle: btest = BoolTest::le; goto handle_ifxx;
+ case Bytecodes::_ifgt: btest = BoolTest::gt; goto handle_ifxx;
+ case Bytecodes::_ifge: btest = BoolTest::ge; goto handle_ifxx;
+ handle_ifxx:
+ // If this is a backwards branch in the bytecodes, add Safepoint
+ maybe_add_safepoint(iter().get_dest());
+ a = _gvn.intcon(0);
+ b = pop();
+ c = _gvn.transform( new (C, 3) CmpINode(b, a) );
+ do_if(btest, c);
+ break;
+
+ case Bytecodes::_if_icmpeq: btest = BoolTest::eq; goto handle_if_icmp;
+ case Bytecodes::_if_icmpne: btest = BoolTest::ne; goto handle_if_icmp;
+ case Bytecodes::_if_icmplt: btest = BoolTest::lt; goto handle_if_icmp;
+ case Bytecodes::_if_icmple: btest = BoolTest::le; goto handle_if_icmp;
+ case Bytecodes::_if_icmpgt: btest = BoolTest::gt; goto handle_if_icmp;
+ case Bytecodes::_if_icmpge: btest = BoolTest::ge; goto handle_if_icmp;
+ handle_if_icmp:
+ // If this is a backwards branch in the bytecodes, add Safepoint
+ maybe_add_safepoint(iter().get_dest());
+ a = pop();
+ b = pop();
+ c = _gvn.transform( new (C, 3) CmpINode( b, a ) );
+ do_if(btest, c);
+ break;
+
+ case Bytecodes::_tableswitch:
+ do_tableswitch();
+ break;
+
+ case Bytecodes::_lookupswitch:
+ do_lookupswitch();
+ break;
+
+ case Bytecodes::_invokestatic:
+ case Bytecodes::_invokespecial:
+ case Bytecodes::_invokevirtual:
+ case Bytecodes::_invokeinterface:
+ do_call();
+ break;
+ case Bytecodes::_checkcast:
+ do_checkcast();
+ break;
+ case Bytecodes::_instanceof:
+ do_instanceof();
+ break;
+ case Bytecodes::_anewarray:
+ do_anewarray();
+ break;
+ case Bytecodes::_newarray:
+ do_newarray((BasicType)iter().get_index());
+ break;
+ case Bytecodes::_multianewarray:
+ do_multianewarray();
+ break;
+ case Bytecodes::_new:
+ do_new();
+ break;
+
+ case Bytecodes::_jsr:
+ case Bytecodes::_jsr_w:
+ do_jsr();
+ break;
+
+ case Bytecodes::_ret:
+ do_ret();
+ break;
+
+
+ case Bytecodes::_monitorenter:
+ do_monitor_enter();
+ break;
+
+ case Bytecodes::_monitorexit:
+ do_monitor_exit();
+ break;
+
+ case Bytecodes::_breakpoint:
+ // Breakpoint set concurrently to compile
+ // %%% use an uncommon trap?
+ C->record_failure("breakpoint in method");
+ return;
+
+ default:
+#ifndef PRODUCT
+ map()->dump(99);
+#endif
+ tty->print("\nUnhandled bytecode %s\n", Bytecodes::name(bc()) );
+ ShouldNotReachHere();
+ }
+
+#ifndef PRODUCT
+ IdealGraphPrinter *printer = IdealGraphPrinter::printer();
+ if(printer) {
+ char buffer[256];
+ sprintf(buffer, "Bytecode %d: %s", bci(), Bytecodes::name(bc()));
+ bool old = printer->traverse_outs();
+ printer->set_traverse_outs(true);
+ printer->print_method(C, buffer, 3);
+ printer->set_traverse_outs(old);
+ }
+#endif
+}
diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp
new file mode 100644
index 000000000..d32053ce2
--- /dev/null
+++ b/src/share/vm/opto/parse3.cpp
@@ -0,0 +1,463 @@
+/*
+ * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parse3.cpp.incl"
+
+//=============================================================================
+// Helper methods for _get* and _put* bytecodes
+//=============================================================================
+bool Parse::static_field_ok_in_clinit(ciField *field, ciMethod *method) {
+ // Could be the field_holder's <clinit> method, or <clinit> for a subklass.
+ // Better to check now than to Deoptimize as soon as we execute
+ assert( field->is_static(), "Only check if field is static");
+ // is_being_initialized() is too generous. It allows access to statics
+ // by threads that are not running the <clinit> before the <clinit> finishes.
+ // return field->holder()->is_being_initialized();
+
+ // The following restriction is correct but conservative.
+ // It is also desirable to allow compilation of methods called from <clinit>
+ // but this generated code will need to be made safe for execution by
+ // other threads, or the transition from interpreted to compiled code would
+ // need to be guarded.
+ ciInstanceKlass *field_holder = field->holder();
+
+ bool access_OK = false;
+ if (method->holder()->is_subclass_of(field_holder)) {
+ if (method->is_static()) {
+ if (method->name() == ciSymbol::class_initializer_name()) {
+ // OK to access static fields inside initializer
+ access_OK = true;
+ }
+ } else {
+ if (method->name() == ciSymbol::object_initializer_name()) {
+ // It's also OK to access static fields inside a constructor,
+ // because any thread calling the constructor must first have
+ // synchronized on the class by executing a '_new' bytecode.
+ access_OK = true;
+ }
+ }
+ }
+
+ return access_OK;
+
+}
+
+
+void Parse::do_field_access(bool is_get, bool is_field) {
+ bool will_link;
+ ciField* field = iter().get_field(will_link);
+ assert(will_link, "getfield: typeflow responsibility");
+
+ ciInstanceKlass* field_holder = field->holder();
+
+ if (is_field == field->is_static()) {
+ // Interpreter will throw java_lang_IncompatibleClassChangeError
+ // Check this before allowing <clinit> methods to access static fields
+ uncommon_trap(Deoptimization::Reason_unhandled,
+ Deoptimization::Action_none);
+ return;
+ }
+
+ if (!is_field && !field_holder->is_initialized()) {
+ if (!static_field_ok_in_clinit(field, method())) {
+ uncommon_trap(Deoptimization::Reason_uninitialized,
+ Deoptimization::Action_reinterpret,
+ NULL, "!static_field_ok_in_clinit");
+ return;
+ }
+ }
+
+ assert(field->will_link(method()->holder(), bc()), "getfield: typeflow responsibility");
+
+ // Note: We do not check for an unloaded field type here any more.
+
+ // Generate code for the object pointer.
+ Node* obj;
+ if (is_field) {
+ int obj_depth = is_get ? 0 : field->type()->size();
+ obj = do_null_check(peek(obj_depth), T_OBJECT);
+ // Compile-time detect of null-exception?
+ if (stopped()) return;
+
+ const TypeInstPtr *tjp = TypeInstPtr::make(TypePtr::NotNull, iter().get_declared_field_holder());
+ assert(_gvn.type(obj)->higher_equal(tjp), "cast_up is no longer needed");
+
+ if (is_get) {
+ --_sp; // pop receiver before getting
+ do_get_xxx(tjp, obj, field, is_field);
+ } else {
+ do_put_xxx(tjp, obj, field, is_field);
+ --_sp; // pop receiver after putting
+ }
+ } else {
+ const TypeKlassPtr* tkp = TypeKlassPtr::make(field_holder);
+ obj = _gvn.makecon(tkp);
+ if (is_get) {
+ do_get_xxx(tkp, obj, field, is_field);
+ } else {
+ do_put_xxx(tkp, obj, field, is_field);
+ }
+ }
+}
+
+
+void Parse::do_get_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field) {
+ // Does this field have a constant value? If so, just push the value.
+ if (field->is_constant() && push_constant(field->constant_value())) return;
+
+ ciType* field_klass = field->type();
+ bool is_vol = field->is_volatile();
+
+ // Compute address and memory type.
+ int offset = field->offset_in_bytes();
+ const TypePtr* adr_type = C->alias_type(field)->adr_type();
+ Node *adr = basic_plus_adr(obj, obj, offset);
+ BasicType bt = field->layout_type();
+
+ // Build the resultant type of the load
+ const Type *type;
+
+ bool must_assert_null = false;
+
+ if( bt == T_OBJECT ) {
+ if (!field->type()->is_loaded()) {
+ type = TypeInstPtr::BOTTOM;
+ must_assert_null = true;
+ } else if (field->is_constant()) {
+ // This can happen if the constant oop is non-perm.
+ ciObject* con = field->constant_value().as_object();
+ // Do not "join" in the previous type; it doesn't add value,
+ // and may yield a vacuous result if the field is of interface type.
+ type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
+ assert(type != NULL, "field singleton type must be consistent");
+ } else {
+ type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+ }
+ } else {
+ type = Type::get_const_basic_type(bt);
+ }
+ // Build the load.
+ Node* ld = make_load(NULL, adr, type, bt, adr_type, is_vol);
+
+ // Adjust Java stack
+ if (type2size[bt] == 1)
+ push(ld);
+ else
+ push_pair(ld);
+
+ if (must_assert_null) {
+ // Do not take a trap here. It's possible that the program
+ // will never load the field's class, and will happily see
+ // null values in this field forever. Don't stumble into a
+ // trap for such a program, or we might get a long series
+ // of useless recompilations. (Or, we might load a class
+ // which should not be loaded.) If we ever see a non-null
+ // value, we will then trap and recompile. (The trap will
+ // not need to mention the class index, since the class will
+ // already have been loaded if we ever see a non-null value.)
+ // uncommon_trap(iter().get_field_signature_index());
+#ifndef PRODUCT
+ if (PrintOpto && (Verbose || WizardMode)) {
+ method()->print_name(); tty->print_cr(" asserting nullness of field at bci: %d", bci());
+ }
+#endif
+ if (C->log() != NULL) {
+ C->log()->elem("assert_null reason='field' klass='%d'",
+ C->log()->identify(field->type()));
+ }
+ // If there is going to be a trap, put it at the next bytecode:
+ set_bci(iter().next_bci());
+ do_null_assert(peek(), T_OBJECT);
+ set_bci(iter().cur_bci()); // put it back
+ }
+
+ // If reference is volatile, prevent following memory ops from
+ // floating up past the volatile read. Also prevents commoning
+ // another volatile read.
+ if (field->is_volatile()) {
+ // Memory barrier includes bogus read of value to force load BEFORE membar
+ insert_mem_bar(Op_MemBarAcquire, ld);
+ }
+}
+
+void Parse::do_put_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field) {
+ bool is_vol = field->is_volatile();
+ // If reference is volatile, prevent following memory ops from
+ // floating down past the volatile write. Also prevents commoning
+ // another volatile read.
+ if (is_vol) insert_mem_bar(Op_MemBarRelease);
+
+ // Compute address and memory type.
+ int offset = field->offset_in_bytes();
+ const TypePtr* adr_type = C->alias_type(field)->adr_type();
+ Node* adr = basic_plus_adr(obj, obj, offset);
+ BasicType bt = field->layout_type();
+ // Value to be stored
+ Node* val = type2size[bt] == 1 ? pop() : pop_pair();
+ // Round doubles before storing
+ if (bt == T_DOUBLE) val = dstore_rounding(val);
+
+ // Store the value.
+ Node* store;
+ if (bt == T_OBJECT) {
+ const TypePtr* field_type;
+ if (!field->type()->is_loaded()) {
+ field_type = TypeInstPtr::BOTTOM;
+ } else {
+ field_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
+ }
+ store = store_oop_to_object( control(), obj, adr, adr_type, val, field_type, bt);
+ } else {
+ store = store_to_memory( control(), adr, val, bt, adr_type, is_vol );
+ }
+
+ // If reference is volatile, prevent following volatiles ops from
+ // floating up before the volatile write.
+ if (is_vol) {
+ // First place the specific membar for THIS volatile index. This first
+ // membar is dependent on the store, keeping any other membars generated
+ // below from floating up past the store.
+ int adr_idx = C->get_alias_index(adr_type);
+ insert_mem_bar_volatile(Op_MemBarVolatile, adr_idx);
+
+ // Now place a membar for AliasIdxBot for the unknown yet-to-be-parsed
+ // volatile alias indices. Skip this if the membar is redundant.
+ if (adr_idx != Compile::AliasIdxBot) {
+ insert_mem_bar_volatile(Op_MemBarVolatile, Compile::AliasIdxBot);
+ }
+
+ // Finally, place alias-index-specific membars for each volatile index
+ // that isn't the adr_idx membar. Typically there's only 1 or 2.
+ for( int i = Compile::AliasIdxRaw; i < C->num_alias_types(); i++ ) {
+ if (i != adr_idx && C->alias_type(i)->is_volatile()) {
+ insert_mem_bar_volatile(Op_MemBarVolatile, i);
+ }
+ }
+ }
+
+ // If the field is final, the rules of Java say we are in <init> or <clinit>.
+ // Note the presence of writes to final non-static fields, so that we
+ // can insert a memory barrier later on to keep the writes from floating
+ // out of the constructor.
+ if (is_field && field->is_final()) {
+ set_wrote_final(true);
+ }
+}
+
+
+bool Parse::push_constant(ciConstant constant) {
+ switch (constant.basic_type()) {
+ case T_BOOLEAN: push( intcon(constant.as_boolean()) ); break;
+ case T_INT: push( intcon(constant.as_int()) ); break;
+ case T_CHAR: push( intcon(constant.as_char()) ); break;
+ case T_BYTE: push( intcon(constant.as_byte()) ); break;
+ case T_SHORT: push( intcon(constant.as_short()) ); break;
+ case T_FLOAT: push( makecon(TypeF::make(constant.as_float())) ); break;
+ case T_DOUBLE: push_pair( makecon(TypeD::make(constant.as_double())) ); break;
+ case T_LONG: push_pair( longcon(constant.as_long()) ); break;
+ case T_ARRAY:
+ case T_OBJECT: {
+ // the oop is in perm space if the ciObject "has_encoding"
+ ciObject* oop_constant = constant.as_object();
+ if (oop_constant->is_null_object()) {
+ push( zerocon(T_OBJECT) );
+ break;
+ } else if (oop_constant->has_encoding()) {
+ push( makecon(TypeOopPtr::make_from_constant(oop_constant)) );
+ break;
+ } else {
+ // we cannot inline the oop, but we can use it later to narrow a type
+ return false;
+ }
+ }
+ case T_ILLEGAL: {
+ // Invalid ciConstant returned due to OutOfMemoryError in the CI
+ assert(C->env()->failing(), "otherwise should not see this");
+ // These always occur because of object types; we are going to
+ // bail out anyway, so make the stack depths match up
+ push( zerocon(T_OBJECT) );
+ return false;
+ }
+ default:
+ ShouldNotReachHere();
+ return false;
+ }
+
+ // success
+ return true;
+}
+
+
+
+//=============================================================================
+void Parse::do_anewarray() {
+ bool will_link;
+ ciKlass* klass = iter().get_klass(will_link);
+
+ // Uncommon Trap when class that array contains is not loaded
+ // we need the loaded class for the rest of graph; do not
+ // initialize the container class (see Java spec)!!!
+ assert(will_link, "anewarray: typeflow responsibility");
+
+ ciObjArrayKlass* array_klass = ciObjArrayKlass::make(klass);
+ // Check that array_klass object is loaded
+ if (!array_klass->is_loaded()) {
+ // Generate uncommon_trap for unloaded array_class
+ uncommon_trap(Deoptimization::Reason_unloaded,
+ Deoptimization::Action_reinterpret,
+ array_klass);
+ return;
+ }
+
+ kill_dead_locals();
+
+ const TypeKlassPtr* array_klass_type = TypeKlassPtr::make(array_klass);
+ Node* count_val = pop();
+ Node* obj = new_array(makecon(array_klass_type), count_val);
+ push(obj);
+}
+
+
+void Parse::do_newarray(BasicType elem_type) {
+ kill_dead_locals();
+
+ Node* count_val = pop();
+ const TypeKlassPtr* array_klass = TypeKlassPtr::make(ciTypeArrayKlass::make(elem_type));
+ Node* obj = new_array(makecon(array_klass), count_val);
+ // Push resultant oop onto stack
+ push(obj);
+}
+
+// Expand simple expressions like new int[3][5] and new Object[2][nonConLen].
+// Also handle the degenerate 1-dimensional case of anewarray.
+Node* Parse::expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, int ndimensions) {
+ Node* length = lengths[0];
+ assert(length != NULL, "");
+ Node* array = new_array(makecon(TypeKlassPtr::make(array_klass)), length);
+ if (ndimensions > 1) {
+ jint length_con = find_int_con(length, -1);
+ guarantee(length_con >= 0, "non-constant multianewarray");
+ ciArrayKlass* array_klass_1 = array_klass->as_obj_array_klass()->element_klass()->as_array_klass();
+ const TypePtr* adr_type = TypeAryPtr::OOPS;
+ const Type* elemtype = _gvn.type(array)->is_aryptr()->elem();
+ const intptr_t header = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
+ for (jint i = 0; i < length_con; i++) {
+ Node* elem = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1);
+ intptr_t offset = header + ((intptr_t)i << LogBytesPerWord);
+ Node* eaddr = basic_plus_adr(array, offset);
+ store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT);
+ }
+ }
+ return array;
+}
+
+void Parse::do_multianewarray() {
+ int ndimensions = iter().get_dimensions();
+
+ // the m-dimensional array
+ bool will_link;
+ ciArrayKlass* array_klass = iter().get_klass(will_link)->as_array_klass();
+ assert(will_link, "multianewarray: typeflow responsibility");
+
+ // Note: Array classes are always initialized; no is_initialized check.
+
+ enum { MAX_DIMENSION = 5 };
+ if (ndimensions > MAX_DIMENSION || ndimensions <= 0) {
+ uncommon_trap(Deoptimization::Reason_unhandled,
+ Deoptimization::Action_none);
+ return;
+ }
+
+ kill_dead_locals();
+
+ // get the lengths from the stack (first dimension is on top)
+ Node* length[MAX_DIMENSION+1];
+ length[ndimensions] = NULL; // terminating null for make_runtime_call
+ int j;
+ for (j = ndimensions-1; j >= 0 ; j--) length[j] = pop();
+
+ // The original expression was of this form: new T[length0][length1]...
+ // It is often the case that the lengths are small (except the last).
+ // If that happens, use the fast 1-d creator a constant number of times.
+ const jint expand_limit = MIN2((juint)MultiArrayExpandLimit, (juint)100);
+ jint expand_count = 1; // count of allocations in the expansion
+ jint expand_fanout = 1; // running total fanout
+ for (j = 0; j < ndimensions-1; j++) {
+ jint dim_con = find_int_con(length[j], -1);
+ expand_fanout *= dim_con;
+ expand_count += expand_fanout; // count the level-J sub-arrays
+ if (dim_con < 0
+ || dim_con > expand_limit
+ || expand_count > expand_limit) {
+ expand_count = 0;
+ break;
+ }
+ }
+
+ // Can use multianewarray instead of [a]newarray if only one dimension,
+ // or if all non-final dimensions are small constants.
+ if (expand_count == 1 || (1 <= expand_count && expand_count <= expand_limit)) {
+ Node* obj = expand_multianewarray(array_klass, &length[0], ndimensions);
+ push(obj);
+ return;
+ }
+
+ address fun = NULL;
+ switch (ndimensions) {
+ //case 1: Actually, there is no case 1. It's handled by new_array.
+ case 2: fun = OptoRuntime::multianewarray2_Java(); break;
+ case 3: fun = OptoRuntime::multianewarray3_Java(); break;
+ case 4: fun = OptoRuntime::multianewarray4_Java(); break;
+ case 5: fun = OptoRuntime::multianewarray5_Java(); break;
+ default: ShouldNotReachHere();
+ };
+
+ Node* c = make_runtime_call(RC_NO_LEAF | RC_NO_IO,
+ OptoRuntime::multianewarray_Type(ndimensions),
+ fun, NULL, TypeRawPtr::BOTTOM,
+ makecon(TypeKlassPtr::make(array_klass)),
+ length[0], length[1], length[2],
+ length[3], length[4]);
+ Node* res = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms));
+
+ const Type* type = TypeOopPtr::make_from_klass_raw(array_klass);
+
+ // Improve the type: We know it's not null, exact, and of a given length.
+ type = type->is_ptr()->cast_to_ptr_type(TypePtr::NotNull);
+ type = type->is_aryptr()->cast_to_exactness(true);
+
+ const TypeInt* ltype = _gvn.find_int_type(length[0]);
+ if (ltype != NULL)
+ type = type->is_aryptr()->cast_to_size(ltype);
+
+ // We cannot sharpen the nested sub-arrays, since the top level is mutable.
+
+ Node* cast = _gvn.transform( new (C, 2) CheckCastPPNode(control(), res, type) );
+ push(cast);
+
+ // Possible improvements:
+ // - Make a fast path for small multi-arrays. (W/ implicit init. loops.)
+ // - Issue CastII against length[*] values, to TypeInt::POS.
+}
diff --git a/src/share/vm/opto/parseHelper.cpp b/src/share/vm/opto/parseHelper.cpp
new file mode 100644
index 000000000..d34ca998c
--- /dev/null
+++ b/src/share/vm/opto/parseHelper.cpp
@@ -0,0 +1,520 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parseHelper.cpp.incl"
+
+//------------------------------make_dtrace_method_entry_exit ----------------
+// Dtrace -- record entry or exit of a method if compiled with dtrace support
+void GraphKit::make_dtrace_method_entry_exit(ciMethod* method, bool is_entry) {
+ const TypeFunc *call_type = OptoRuntime::dtrace_method_entry_exit_Type();
+ address call_address = is_entry ? CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry) :
+ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit);
+ const char *call_name = is_entry ? "dtrace_method_entry" : "dtrace_method_exit";
+
+ // Get base of thread-local storage area
+ Node* thread = _gvn.transform( new (C, 1) ThreadLocalNode() );
+
+ // Get method
+ const TypeInstPtr* method_type = TypeInstPtr::make(TypePtr::Constant, method->klass(), true, method, 0);
+ Node *method_node = _gvn.transform( new (C, 1) ConPNode(method_type) );
+
+ kill_dead_locals();
+
+ // For some reason, this call reads only raw memory.
+ const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+ make_runtime_call(RC_LEAF | RC_NARROW_MEM,
+ call_type, call_address,
+ call_name, raw_adr_type,
+ thread, method_node);
+}
+
+
+//=============================================================================
+//------------------------------do_checkcast-----------------------------------
+void Parse::do_checkcast() {
+ bool will_link;
+ ciKlass* klass = iter().get_klass(will_link);
+
+ Node *obj = peek();
+
+ // Throw uncommon trap if class is not loaded or the value we are casting
+ // _from_ is not loaded, and value is not null. If the value _is_ NULL,
+ // then the checkcast does nothing.
+ const TypeInstPtr *tp = _gvn.type(obj)->isa_instptr();
+ if (!will_link || (tp && !tp->is_loaded())) {
+ if (C->log() != NULL) {
+ if (!will_link) {
+ C->log()->elem("assert_null reason='checkcast' klass='%d'",
+ C->log()->identify(klass));
+ }
+ if (tp && !tp->is_loaded()) {
+ // %%% Cannot happen?
+ C->log()->elem("assert_null reason='checkcast source' klass='%d'",
+ C->log()->identify(tp->klass()));
+ }
+ }
+ do_null_assert(obj, T_OBJECT);
+ assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" );
+ if (!stopped()) {
+ profile_null_checkcast();
+ }
+ return;
+ }
+
+ Node *res = gen_checkcast(obj, makecon(TypeKlassPtr::make(klass)) );
+
+ // Pop from stack AFTER gen_checkcast because it can uncommon trap and
+ // the debug info has to be correct.
+ pop();
+ push(res);
+}
+
+
+//------------------------------do_instanceof----------------------------------
+void Parse::do_instanceof() {
+ if (stopped()) return;
+ // We would like to return false if class is not loaded, emitting a
+ // dependency, but Java requires instanceof to load its operand.
+
+ // Throw uncommon trap if class is not loaded
+ bool will_link;
+ ciKlass* klass = iter().get_klass(will_link);
+
+ if (!will_link) {
+ if (C->log() != NULL) {
+ C->log()->elem("assert_null reason='instanceof' klass='%d'",
+ C->log()->identify(klass));
+ }
+ do_null_assert(peek(), T_OBJECT);
+ assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" );
+ if (!stopped()) {
+ // The object is now known to be null.
+ // Shortcut the effect of gen_instanceof and return "false" directly.
+ pop(); // pop the null
+ push(_gvn.intcon(0)); // push false answer
+ }
+ return;
+ }
+
+ // Push the bool result back on stack
+ push( gen_instanceof( pop(), makecon(TypeKlassPtr::make(klass)) ) );
+}
+
+//------------------------------array_store_check------------------------------
+// pull array from stack and check that the store is valid
+void Parse::array_store_check() {
+
+ // Shorthand access to array store elements
+ Node *obj = stack(_sp-1);
+ Node *idx = stack(_sp-2);
+ Node *ary = stack(_sp-3);
+
+ if (_gvn.type(obj) == TypePtr::NULL_PTR) {
+ // There's never a type check on null values.
+ // This cutout lets us avoid the uncommon_trap(Reason_array_check)
+ // below, which turns into a performance liability if the
+ // gen_checkcast folds up completely.
+ return;
+ }
+
+ // Extract the array klass type
+ int klass_offset = oopDesc::klass_offset_in_bytes();
+ Node* p = basic_plus_adr( ary, ary, klass_offset );
+ // p's type is array-of-OOPS plus klass_offset
+ Node* array_klass = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeInstPtr::KLASS));
+ // Get the array klass
+ const TypeKlassPtr *tak = _gvn.type(array_klass)->is_klassptr();
+
+ // array_klass's type is generally INexact array-of-oop. Heroically
+ // cast the array klass to EXACT array and uncommon-trap if the cast
+ // fails.
+ bool always_see_exact_class = false;
+ if (MonomorphicArrayCheck
+ && !too_many_traps(Deoptimization::Reason_array_check)) {
+ always_see_exact_class = true;
+ // (If no MDO at all, hope for the best, until a trap actually occurs.)
+ }
+
+ // Is the array klass is exactly its defined type?
+ if (always_see_exact_class && !tak->klass_is_exact()) {
+ // Make a constant out of the inexact array klass
+ const TypeKlassPtr *extak = tak->cast_to_exactness(true)->is_klassptr();
+ Node* con = makecon(extak);
+ Node* cmp = _gvn.transform(new (C, 3) CmpPNode( array_klass, con ));
+ Node* bol = _gvn.transform(new (C, 2) BoolNode( cmp, BoolTest::eq ));
+ Node* ctrl= control();
+ { BuildCutout unless(this, bol, PROB_MAX);
+ uncommon_trap(Deoptimization::Reason_array_check,
+ Deoptimization::Action_maybe_recompile,
+ tak->klass());
+ }
+ if (stopped()) { // MUST uncommon-trap?
+ set_control(ctrl); // Then Don't Do It, just fall into the normal checking
+ } else { // Cast array klass to exactness:
+ // Use the exact constant value we know it is.
+ replace_in_map(array_klass,con);
+ CompileLog* log = C->log();
+ if (log != NULL) {
+ log->elem("cast_up reason='monomorphic_array' from='%d' to='(exact)'",
+ log->identify(tak->klass()));
+ }
+ array_klass = con; // Use cast value moving forward
+ }
+ }
+
+ // Come here for polymorphic array klasses
+
+ // Extract the array element class
+ int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+ Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset);
+ Node *a_e_klass = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p2, tak));
+
+ // Check (the hard way) and throw if not a subklass.
+ // Result is ignored, we just need the CFG effects.
+ gen_checkcast( obj, a_e_klass );
+}
+
+
+//------------------------------do_new-----------------------------------------
+void Parse::do_new() {
+ kill_dead_locals();
+
+ bool will_link;
+ ciInstanceKlass* klass = iter().get_klass(will_link)->as_instance_klass();
+ assert(will_link, "_new: typeflow responsibility");
+
+ // Should initialize, or throw an InstantiationError?
+ if (!klass->is_initialized() ||
+ klass->is_abstract() || klass->is_interface() ||
+ klass->name() == ciSymbol::java_lang_Class() ||
+ iter().is_unresolved_klass()) {
+ uncommon_trap(Deoptimization::Reason_uninitialized,
+ Deoptimization::Action_reinterpret,
+ klass);
+ return;
+ }
+
+ Node* kls = makecon(TypeKlassPtr::make(klass));
+ Node* obj = new_instance(kls);
+
+ // Push resultant oop onto stack
+ push(obj);
+}
+
+#ifndef PRODUCT
+//------------------------------dump_map_adr_mem-------------------------------
+// Debug dump of the mapping from address types to MergeMemNode indices.
+void Parse::dump_map_adr_mem() const {
+ tty->print_cr("--- Mapping from address types to memory Nodes ---");
+ MergeMemNode *mem = map() == NULL ? NULL : (map()->memory()->is_MergeMem() ?
+ map()->memory()->as_MergeMem() : NULL);
+ for (uint i = 0; i < (uint)C->num_alias_types(); i++) {
+ C->alias_type(i)->print_on(tty);
+ tty->print("\t");
+ // Node mapping, if any
+ if (mem && i < mem->req() && mem->in(i) && mem->in(i) != mem->empty_memory()) {
+ mem->in(i)->dump();
+ } else {
+ tty->cr();
+ }
+ }
+}
+
+#endif
+
+
+//=============================================================================
+//
+// parser methods for profiling
+
+
+//----------------------test_counter_against_threshold ------------------------
+void Parse::test_counter_against_threshold(Node* cnt, int limit) {
+ // Test the counter against the limit and uncommon trap if greater.
+
+ // This code is largely copied from the range check code in
+ // array_addressing()
+
+ // Test invocation count vs threshold
+ Node *threshold = makecon(TypeInt::make(limit));
+ Node *chk = _gvn.transform( new (C, 3) CmpUNode( cnt, threshold) );
+ BoolTest::mask btest = BoolTest::lt;
+ Node *tst = _gvn.transform( new (C, 2) BoolNode( chk, btest) );
+ // Branch to failure if threshold exceeded
+ { BuildCutout unless(this, tst, PROB_ALWAYS);
+ uncommon_trap(Deoptimization::Reason_age,
+ Deoptimization::Action_maybe_recompile);
+ }
+}
+
+//----------------------increment_and_test_invocation_counter-------------------
+void Parse::increment_and_test_invocation_counter(int limit) {
+ if (!count_invocations()) return;
+
+ // Get the methodOop node.
+ const TypePtr* adr_type = TypeOopPtr::make_from_constant(method());
+ Node *methodOop_node = makecon(adr_type);
+
+ // Load the interpreter_invocation_counter from the methodOop.
+ int offset = methodOopDesc::interpreter_invocation_counter_offset_in_bytes();
+ Node* adr_node = basic_plus_adr(methodOop_node, methodOop_node, offset);
+ Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+
+ test_counter_against_threshold(cnt, limit);
+
+ // Add one to the counter and store
+ Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(1)));
+ store_to_memory( NULL, adr_node, incr, T_INT, adr_type );
+}
+
+//----------------------------method_data_addressing---------------------------
+Node* Parse::method_data_addressing(ciMethodData* md, ciProfileData* data, ByteSize counter_offset, Node* idx, uint stride) {
+ // Get offset within methodDataOop of the data array
+ ByteSize data_offset = methodDataOopDesc::data_offset();
+
+ // Get cell offset of the ProfileData within data array
+ int cell_offset = md->dp_to_di(data->dp());
+
+ // Add in counter_offset, the # of bytes into the ProfileData of counter or flag
+ int offset = in_bytes(data_offset) + cell_offset + in_bytes(counter_offset);
+
+ const TypePtr* adr_type = TypeOopPtr::make_from_constant(md);
+ Node* mdo = makecon(adr_type);
+ Node* ptr = basic_plus_adr(mdo, mdo, offset);
+
+ if (stride != 0) {
+ Node* str = _gvn.MakeConX(stride);
+ Node* scale = _gvn.transform( new (C, 3) MulXNode( idx, str ) );
+ ptr = _gvn.transform( new (C, 4) AddPNode( mdo, ptr, scale ) );
+ }
+
+ return ptr;
+}
+
+//--------------------------increment_md_counter_at----------------------------
+void Parse::increment_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize counter_offset, Node* idx, uint stride) {
+ Node* adr_node = method_data_addressing(md, data, counter_offset, idx, stride);
+
+ const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
+ Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+ Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(DataLayout::counter_increment)));
+ store_to_memory(NULL, adr_node, incr, T_INT, adr_type );
+}
+
+//--------------------------test_for_osr_md_counter_at-------------------------
+void Parse::test_for_osr_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize counter_offset, int limit) {
+ Node* adr_node = method_data_addressing(md, data, counter_offset);
+
+ const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
+ Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+
+ test_counter_against_threshold(cnt, limit);
+}
+
+//-------------------------------set_md_flag_at--------------------------------
+void Parse::set_md_flag_at(ciMethodData* md, ciProfileData* data, int flag_constant) {
+ Node* adr_node = method_data_addressing(md, data, DataLayout::flags_offset());
+
+ const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
+ Node* flags = make_load(NULL, adr_node, TypeInt::BYTE, T_BYTE, adr_type);
+ Node* incr = _gvn.transform(new (C, 3) OrINode(flags, _gvn.intcon(flag_constant)));
+ store_to_memory(NULL, adr_node, incr, T_BYTE, adr_type);
+}
+
+//----------------------------profile_taken_branch-----------------------------
+void Parse::profile_taken_branch(int target_bci, bool force_update) {
+ // This is a potential osr_site if we have a backedge.
+ int cur_bci = bci();
+ bool osr_site =
+ (target_bci <= cur_bci) && count_invocations() && UseOnStackReplacement;
+
+ // If we are going to OSR, restart at the target bytecode.
+ set_bci(target_bci);
+
+ // To do: factor out the the limit calculations below. These duplicate
+ // the similar limit calculations in the interpreter.
+
+ if (method_data_update() || force_update) {
+ ciMethodData* md = method()->method_data();
+ assert(md != NULL, "expected valid ciMethodData");
+ ciProfileData* data = md->bci_to_data(cur_bci);
+ assert(data->is_JumpData(), "need JumpData for taken branch");
+ increment_md_counter_at(md, data, JumpData::taken_offset());
+ }
+
+ // In the new tiered system this is all we need to do. In the old
+ // (c2 based) tiered sytem we must do the code below.
+#ifndef TIERED
+ if (method_data_update()) {
+ ciMethodData* md = method()->method_data();
+ if (osr_site) {
+ ciProfileData* data = md->bci_to_data(cur_bci);
+ int limit = (CompileThreshold
+ * (OnStackReplacePercentage - InterpreterProfilePercentage)) / 100;
+ test_for_osr_md_counter_at(md, data, JumpData::taken_offset(), limit);
+ }
+ } else {
+ // With method data update off, use the invocation counter to trigger an
+ // OSR compilation, as done in the interpreter.
+ if (osr_site) {
+ int limit = (CompileThreshold * OnStackReplacePercentage) / 100;
+ increment_and_test_invocation_counter(limit);
+ }
+ }
+#endif // TIERED
+
+ // Restore the original bytecode.
+ set_bci(cur_bci);
+}
+
+//--------------------------profile_not_taken_branch---------------------------
+void Parse::profile_not_taken_branch(bool force_update) {
+
+ if (method_data_update() || force_update) {
+ ciMethodData* md = method()->method_data();
+ assert(md != NULL, "expected valid ciMethodData");
+ ciProfileData* data = md->bci_to_data(bci());
+ assert(data->is_BranchData(), "need BranchData for not taken branch");
+ increment_md_counter_at(md, data, BranchData::not_taken_offset());
+ }
+
+}
+
+//---------------------------------profile_call--------------------------------
+void Parse::profile_call(Node* receiver) {
+ if (!method_data_update()) return;
+
+ profile_generic_call();
+
+ switch (bc()) {
+ case Bytecodes::_invokevirtual:
+ case Bytecodes::_invokeinterface:
+ profile_receiver_type(receiver);
+ break;
+ case Bytecodes::_invokestatic:
+ case Bytecodes::_invokespecial:
+ break;
+ default: fatal("unexpected call bytecode");
+ }
+}
+
+//------------------------------profile_generic_call---------------------------
+void Parse::profile_generic_call() {
+ assert(method_data_update(), "must be generating profile code");
+
+ ciMethodData* md = method()->method_data();
+ assert(md != NULL, "expected valid ciMethodData");
+ ciProfileData* data = md->bci_to_data(bci());
+ assert(data->is_CounterData(), "need CounterData for not taken branch");
+ increment_md_counter_at(md, data, CounterData::count_offset());
+}
+
+//-----------------------------profile_receiver_type---------------------------
+void Parse::profile_receiver_type(Node* receiver) {
+ assert(method_data_update(), "must be generating profile code");
+
+ // Skip if we aren't tracking receivers
+ if (TypeProfileWidth < 1) return;
+
+ ciMethodData* md = method()->method_data();
+ assert(md != NULL, "expected valid ciMethodData");
+ ciProfileData* data = md->bci_to_data(bci());
+ assert(data->is_ReceiverTypeData(), "need ReceiverTypeData here");
+ ciReceiverTypeData* rdata = (ciReceiverTypeData*)data->as_ReceiverTypeData();
+
+ Node* method_data = method_data_addressing(md, rdata, in_ByteSize(0));
+
+ // Using an adr_type of TypePtr::BOTTOM to work around anti-dep problems.
+ // A better solution might be to use TypeRawPtr::BOTTOM with RC_NARROW_MEM.
+ make_runtime_call(RC_LEAF, OptoRuntime::profile_receiver_type_Type(),
+ CAST_FROM_FN_PTR(address,
+ OptoRuntime::profile_receiver_type_C),
+ "profile_receiver_type_C",
+ TypePtr::BOTTOM,
+ method_data, receiver);
+}
+
+//---------------------------------profile_ret---------------------------------
+void Parse::profile_ret(int target_bci) {
+ if (!method_data_update()) return;
+
+ // Skip if we aren't tracking ret targets
+ if (TypeProfileWidth < 1) return;
+
+ ciMethodData* md = method()->method_data();
+ assert(md != NULL, "expected valid ciMethodData");
+ ciProfileData* data = md->bci_to_data(bci());
+ assert(data->is_RetData(), "need RetData for ret");
+ ciRetData* ret_data = (ciRetData*)data->as_RetData();
+
+ // Look for the target_bci is already in the table
+ uint row;
+ bool table_full = true;
+ for (row = 0; row < ret_data->row_limit(); row++) {
+ int key = ret_data->bci(row);
+ table_full &= (key != RetData::no_bci);
+ if (key == target_bci) break;
+ }
+
+ if (row >= ret_data->row_limit()) {
+ // The target_bci was not found in the table.
+ if (!table_full) {
+ // XXX: Make slow call to update RetData
+ }
+ return;
+ }
+
+ // the target_bci is already in the table
+ increment_md_counter_at(md, data, RetData::bci_count_offset(row));
+}
+
+//--------------------------profile_null_checkcast----------------------------
+void Parse::profile_null_checkcast() {
+ // Set the null-seen flag, done in conjunction with the usual null check. We
+ // never unset the flag, so this is a one-way switch.
+ if (!method_data_update()) return;
+
+ ciMethodData* md = method()->method_data();
+ assert(md != NULL, "expected valid ciMethodData");
+ ciProfileData* data = md->bci_to_data(bci());
+ assert(data->is_BitData(), "need BitData for checkcast");
+ set_md_flag_at(md, data, BitData::null_seen_byte_constant());
+}
+
+//-----------------------------profile_switch_case-----------------------------
+void Parse::profile_switch_case(int table_index) {
+ if (!method_data_update()) return;
+
+ ciMethodData* md = method()->method_data();
+ assert(md != NULL, "expected valid ciMethodData");
+
+ ciProfileData* data = md->bci_to_data(bci());
+ assert(data->is_MultiBranchData(), "need MultiBranchData for switch case");
+ if (table_index >= 0) {
+ increment_md_counter_at(md, data, MultiBranchData::case_count_offset(table_index));
+ } else {
+ increment_md_counter_at(md, data, MultiBranchData::default_count_offset());
+ }
+}
diff --git a/src/share/vm/opto/phase.cpp b/src/share/vm/opto/phase.cpp
new file mode 100644
index 000000000..5e046dab0
--- /dev/null
+++ b/src/share/vm/opto/phase.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_phase.cpp.incl"
+
+#ifndef PRODUCT
+int Phase::_total_bytes_compiled = 0;
+
+elapsedTimer Phase::_t_totalCompilation;
+elapsedTimer Phase::_t_methodCompilation;
+elapsedTimer Phase::_t_stubCompilation;
+#endif
+
+// The next timers used for LogCompilation
+elapsedTimer Phase::_t_parser;
+elapsedTimer Phase::_t_escapeAnalysis;
+elapsedTimer Phase::_t_optimizer;
+elapsedTimer Phase::_t_idealLoop;
+elapsedTimer Phase::_t_ccp;
+elapsedTimer Phase::_t_matcher;
+elapsedTimer Phase::_t_registerAllocation;
+elapsedTimer Phase::_t_output;
+
+#ifndef PRODUCT
+elapsedTimer Phase::_t_graphReshaping;
+elapsedTimer Phase::_t_scheduler;
+elapsedTimer Phase::_t_removeEmptyBlocks;
+elapsedTimer Phase::_t_macroExpand;
+elapsedTimer Phase::_t_peephole;
+elapsedTimer Phase::_t_codeGeneration;
+elapsedTimer Phase::_t_registerMethod;
+elapsedTimer Phase::_t_temporaryTimer1;
+elapsedTimer Phase::_t_temporaryTimer2;
+
+// Subtimers for _t_optimizer
+elapsedTimer Phase::_t_iterGVN;
+elapsedTimer Phase::_t_iterGVN2;
+
+// Subtimers for _t_registerAllocation
+elapsedTimer Phase::_t_ctorChaitin;
+elapsedTimer Phase::_t_buildIFGphysical;
+elapsedTimer Phase::_t_computeLive;
+elapsedTimer Phase::_t_regAllocSplit;
+elapsedTimer Phase::_t_postAllocCopyRemoval;
+elapsedTimer Phase::_t_fixupSpills;
+
+// Subtimers for _t_output
+elapsedTimer Phase::_t_instrSched;
+elapsedTimer Phase::_t_buildOopMaps;
+#endif
+
+//------------------------------Phase------------------------------------------
+Phase::Phase( PhaseNumber pnum ) : _pnum(pnum), C( pnum == Compiler ? NULL : Compile::current()) {
+ // Poll for requests from shutdown mechanism to quiesce comiler (4448539, 4448544).
+ // This is an effective place to poll, since the compiler is full of phases.
+ // In particular, every inlining site uses a recursively created Parse phase.
+ CompileBroker::maybe_block();
+}
+
+#ifndef PRODUCT
+static const double minimum_reported_time = 0.0001; // seconds
+static const double expected_method_compile_coverage = 0.97; // %
+static const double minimum_meaningful_method_compile = 2.00; // seconds
+
+void Phase::print_timers() {
+ tty->print_cr ("Accumulated compiler times:");
+ tty->print_cr ("---------------------------");
+ tty->print_cr (" Total compilation: %3.3f sec.", Phase::_t_totalCompilation.seconds());
+ tty->print (" method compilation : %3.3f sec", Phase::_t_methodCompilation.seconds());
+ tty->print ("/%d bytes",_total_bytes_compiled);
+ tty->print_cr (" (%3.0f bytes per sec) ", Phase::_total_bytes_compiled / Phase::_t_methodCompilation.seconds());
+ tty->print_cr (" stub compilation : %3.3f sec.", Phase::_t_stubCompilation.seconds());
+ tty->print_cr (" Phases:");
+ tty->print_cr (" parse : %3.3f sec", Phase::_t_parser.seconds());
+ if (DoEscapeAnalysis) {
+ tty->print_cr (" escape analysis : %3.3f sec", Phase::_t_escapeAnalysis.seconds());
+ }
+ tty->print_cr (" optimizer : %3.3f sec", Phase::_t_optimizer.seconds());
+ if( Verbose || WizardMode ) {
+ tty->print_cr (" iterGVN : %3.3f sec", Phase::_t_iterGVN.seconds());
+ tty->print_cr (" idealLoop : %3.3f sec", Phase::_t_idealLoop.seconds());
+ tty->print_cr (" ccp : %3.3f sec", Phase::_t_ccp.seconds());
+ tty->print_cr (" iterGVN2 : %3.3f sec", Phase::_t_iterGVN2.seconds());
+ tty->print_cr (" graphReshape : %3.3f sec", Phase::_t_graphReshaping.seconds());
+ double optimizer_subtotal = Phase::_t_iterGVN.seconds() +
+ Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
+ Phase::_t_graphReshaping.seconds();
+ double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0));
+ tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", optimizer_subtotal, percent_of_optimizer);
+ }
+ tty->print_cr (" matcher : %3.3f sec", Phase::_t_matcher.seconds());
+ tty->print_cr (" scheduler : %3.3f sec", Phase::_t_scheduler.seconds());
+ tty->print_cr (" regalloc : %3.3f sec", Phase::_t_registerAllocation.seconds());
+ if( Verbose || WizardMode ) {
+ tty->print_cr (" ctorChaitin : %3.3f sec", Phase::_t_ctorChaitin.seconds());
+ tty->print_cr (" buildIFG : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
+ tty->print_cr (" computeLive : %3.3f sec", Phase::_t_computeLive.seconds());
+ tty->print_cr (" regAllocSplit: %3.3f sec", Phase::_t_regAllocSplit.seconds());
+ tty->print_cr (" postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
+ tty->print_cr (" fixupSpills : %3.3f sec", Phase::_t_fixupSpills.seconds());
+ double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
+ Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
+ Phase::_t_regAllocSplit.seconds() + Phase::_t_fixupSpills.seconds() +
+ Phase::_t_postAllocCopyRemoval.seconds();
+ double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
+ tty->print_cr (" subtotal : %3.3f sec, %3.2f %%", regalloc_subtotal, percent_of_regalloc);
+ }
+ tty->print_cr (" macroExpand : %3.3f sec", Phase::_t_macroExpand.seconds());
+ tty->print_cr (" removeEmpty : %3.3f sec", Phase::_t_removeEmptyBlocks.seconds());
+ tty->print_cr (" peephole : %3.3f sec", Phase::_t_peephole.seconds());
+ tty->print_cr (" codeGen : %3.3f sec", Phase::_t_codeGeneration.seconds());
+ tty->print_cr (" install_code : %3.3f sec", Phase::_t_registerMethod.seconds());
+ tty->print_cr (" ------------ : ----------");
+ double phase_subtotal = Phase::_t_parser.seconds() +
+ (DoEscapeAnalysis ? Phase::_t_escapeAnalysis.seconds() : 0.0) +
+ Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() +
+ Phase::_t_matcher.seconds() + Phase::_t_scheduler.seconds() +
+ Phase::_t_registerAllocation.seconds() + Phase::_t_removeEmptyBlocks.seconds() +
+ Phase::_t_macroExpand.seconds() + Phase::_t_peephole.seconds() +
+ Phase::_t_codeGeneration.seconds() + Phase::_t_registerMethod.seconds();
+ double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0;
+ // counters inside Compile::CodeGen include time for adapters and stubs
+ // so phase-total can be greater than 100%
+ tty->print_cr (" total : %3.3f sec, %3.2f %%", phase_subtotal, percent_of_method_compile);
+
+ assert( percent_of_method_compile > expected_method_compile_coverage ||
+ phase_subtotal < minimum_meaningful_method_compile,
+ "Must account for method compilation");
+
+ if( Phase::_t_temporaryTimer1.seconds() > minimum_reported_time ) {
+ tty->cr();
+ tty->print_cr (" temporaryTimer1: %3.3f sec", Phase::_t_temporaryTimer1.seconds());
+ }
+ if( Phase::_t_temporaryTimer2.seconds() > minimum_reported_time ) {
+ tty->cr();
+ tty->print_cr (" temporaryTimer2: %3.3f sec", Phase::_t_temporaryTimer2.seconds());
+ }
+ tty->print_cr (" output : %3.3f sec", Phase::_t_output.seconds());
+ tty->print_cr (" isched : %3.3f sec", Phase::_t_instrSched.seconds());
+ tty->print_cr (" bldOopMaps: %3.3f sec", Phase::_t_buildOopMaps.seconds());
+}
+#endif
diff --git a/src/share/vm/opto/phase.hpp b/src/share/vm/opto/phase.hpp
new file mode 100644
index 000000000..bee7dfef7
--- /dev/null
+++ b/src/share/vm/opto/phase.hpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Compile;
+
+//------------------------------Phase------------------------------------------
+// Most optimizations are done in Phases. Creating a phase does any long
+// running analysis required, and caches the analysis in internal data
+// structures. Later the analysis is queried using transform() calls to
+// guide transforming the program. When the Phase is deleted, so is any
+// cached analysis info. This basic Phase class mostly contains timing and
+// memory management code.
+class Phase : public StackObj {
+public:
+ enum PhaseNumber {
+ Compiler, // Top-level compiler phase
+ Parser, // Parse bytecodes
+ Remove_Useless, // Remove useless nodes
+ Optimistic, // Optimistic analysis phase
+ GVN, // Pessimistic global value numbering phase
+ Ins_Select, // Instruction selection phase
+ Copy_Elimination, // Copy Elimination
+ Dead_Code_Elimination, // DCE and compress Nodes
+ Conditional_Constant, // Conditional Constant Propagation
+ CFG, // Build a CFG
+ DefUse, // Build Def->Use chains
+ Register_Allocation, // Register allocation, duh
+ LIVE, // Dragon-book LIVE range problem
+ Interference_Graph, // Building the IFG
+ Coalesce, // Coalescing copies
+ Conditional_CProp, // Conditional Constant Propagation
+ Ideal_Loop, // Find idealized trip-counted loops
+ Macro_Expand, // Expand macro nodes
+ Peephole, // Apply peephole optimizations
+ last_phase
+ };
+protected:
+ enum PhaseNumber _pnum; // Phase number (for stat gathering)
+
+#ifndef PRODUCT
+ static int _total_bytes_compiled;
+
+ // accumulated timers
+ static elapsedTimer _t_totalCompilation;
+ static elapsedTimer _t_methodCompilation;
+ static elapsedTimer _t_stubCompilation;
+#endif
+
+// The next timers used for LogCompilation
+ static elapsedTimer _t_parser;
+ static elapsedTimer _t_escapeAnalysis;
+ static elapsedTimer _t_optimizer;
+ static elapsedTimer _t_idealLoop;
+ static elapsedTimer _t_ccp;
+ static elapsedTimer _t_matcher;
+ static elapsedTimer _t_registerAllocation;
+ static elapsedTimer _t_output;
+
+#ifndef PRODUCT
+ static elapsedTimer _t_graphReshaping;
+ static elapsedTimer _t_scheduler;
+ static elapsedTimer _t_removeEmptyBlocks;
+ static elapsedTimer _t_macroExpand;
+ static elapsedTimer _t_peephole;
+ static elapsedTimer _t_codeGeneration;
+ static elapsedTimer _t_registerMethod;
+ static elapsedTimer _t_temporaryTimer1;
+ static elapsedTimer _t_temporaryTimer2;
+
+// Subtimers for _t_optimizer
+ static elapsedTimer _t_iterGVN;
+ static elapsedTimer _t_iterGVN2;
+
+// Subtimers for _t_registerAllocation
+ static elapsedTimer _t_ctorChaitin;
+ static elapsedTimer _t_buildIFGphysical;
+ static elapsedTimer _t_computeLive;
+ static elapsedTimer _t_regAllocSplit;
+ static elapsedTimer _t_postAllocCopyRemoval;
+ static elapsedTimer _t_fixupSpills;
+
+// Subtimers for _t_output
+ static elapsedTimer _t_instrSched;
+ static elapsedTimer _t_buildOopMaps;
+#endif
+public:
+ Compile * C;
+ Phase( PhaseNumber pnum );
+#ifndef PRODUCT
+ static void print_timers();
+#endif
+};
diff --git a/src/share/vm/opto/phaseX.cpp b/src/share/vm/opto/phaseX.cpp
new file mode 100644
index 000000000..3c5cd2cca
--- /dev/null
+++ b/src/share/vm/opto/phaseX.cpp
@@ -0,0 +1,1758 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_phaseX.cpp.incl"
+
+//=============================================================================
+#define NODE_HASH_MINIMUM_SIZE 255
+//------------------------------NodeHash---------------------------------------
+NodeHash::NodeHash(uint est_max_size) :
+ _max( round_up(est_max_size < NODE_HASH_MINIMUM_SIZE ? NODE_HASH_MINIMUM_SIZE : est_max_size) ),
+ _a(Thread::current()->resource_area()),
+ _table( NEW_ARENA_ARRAY( _a , Node* , _max ) ), // (Node**)_a->Amalloc(_max * sizeof(Node*)) ),
+ _inserts(0), _insert_limit( insert_limit() ),
+ _look_probes(0), _lookup_hits(0), _lookup_misses(0),
+ _total_insert_probes(0), _total_inserts(0),
+ _insert_probes(0), _grows(0) {
+ // _sentinel must be in the current node space
+ _sentinel = new (Compile::current(), 1) ProjNode(NULL, TypeFunc::Control);
+ memset(_table,0,sizeof(Node*)*_max);
+}
+
+//------------------------------NodeHash---------------------------------------
+NodeHash::NodeHash(Arena *arena, uint est_max_size) :
+ _max( round_up(est_max_size < NODE_HASH_MINIMUM_SIZE ? NODE_HASH_MINIMUM_SIZE : est_max_size) ),
+ _a(arena),
+ _table( NEW_ARENA_ARRAY( _a , Node* , _max ) ),
+ _inserts(0), _insert_limit( insert_limit() ),
+ _look_probes(0), _lookup_hits(0), _lookup_misses(0),
+ _delete_probes(0), _delete_hits(0), _delete_misses(0),
+ _total_insert_probes(0), _total_inserts(0),
+ _insert_probes(0), _grows(0) {
+ // _sentinel must be in the current node space
+ _sentinel = new (Compile::current(), 1) ProjNode(NULL, TypeFunc::Control);
+ memset(_table,0,sizeof(Node*)*_max);
+}
+
+//------------------------------NodeHash---------------------------------------
+NodeHash::NodeHash(NodeHash *nh) {
+ debug_only(_table = (Node**)badAddress); // interact correctly w/ operator=
+ // just copy in all the fields
+ *this = *nh;
+ // nh->_sentinel must be in the current node space
+}
+
+//------------------------------hash_find--------------------------------------
+// Find in hash table
+Node *NodeHash::hash_find( const Node *n ) {
+ // ((Node*)n)->set_hash( n->hash() );
+ uint hash = n->hash();
+ if (hash == Node::NO_HASH) {
+ debug_only( _lookup_misses++ );
+ return NULL;
+ }
+ uint key = hash & (_max-1);
+ uint stride = key | 0x01;
+ debug_only( _look_probes++ );
+ Node *k = _table[key]; // Get hashed value
+ if( !k ) { // ?Miss?
+ debug_only( _lookup_misses++ );
+ return NULL; // Miss!
+ }
+
+ int op = n->Opcode();
+ uint req = n->req();
+ while( 1 ) { // While probing hash table
+ if( k->req() == req && // Same count of inputs
+ k->Opcode() == op ) { // Same Opcode
+ for( uint i=0; i<req; i++ )
+ if( n->in(i)!=k->in(i)) // Different inputs?
+ goto collision; // "goto" is a speed hack...
+ if( n->cmp(*k) ) { // Check for any special bits
+ debug_only( _lookup_hits++ );
+ return k; // Hit!
+ }
+ }
+ collision:
+ debug_only( _look_probes++ );
+ key = (key + stride/*7*/) & (_max-1); // Stride through table with relative prime
+ k = _table[key]; // Get hashed value
+ if( !k ) { // ?Miss?
+ debug_only( _lookup_misses++ );
+ return NULL; // Miss!
+ }
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//------------------------------hash_find_insert-------------------------------
+// Find in hash table, insert if not already present
+// Used to preserve unique entries in hash table
+Node *NodeHash::hash_find_insert( Node *n ) {
+ // n->set_hash( );
+ uint hash = n->hash();
+ if (hash == Node::NO_HASH) {
+ debug_only( _lookup_misses++ );
+ return NULL;
+ }
+ uint key = hash & (_max-1);
+ uint stride = key | 0x01; // stride must be relatively prime to table siz
+ uint first_sentinel = 0; // replace a sentinel if seen.
+ debug_only( _look_probes++ );
+ Node *k = _table[key]; // Get hashed value
+ if( !k ) { // ?Miss?
+ debug_only( _lookup_misses++ );
+ _table[key] = n; // Insert into table!
+ debug_only(n->enter_hash_lock()); // Lock down the node while in the table.
+ check_grow(); // Grow table if insert hit limit
+ return NULL; // Miss!
+ }
+ else if( k == _sentinel ) {
+ first_sentinel = key; // Can insert here
+ }
+
+ int op = n->Opcode();
+ uint req = n->req();
+ while( 1 ) { // While probing hash table
+ if( k->req() == req && // Same count of inputs
+ k->Opcode() == op ) { // Same Opcode
+ for( uint i=0; i<req; i++ )
+ if( n->in(i)!=k->in(i)) // Different inputs?
+ goto collision; // "goto" is a speed hack...
+ if( n->cmp(*k) ) { // Check for any special bits
+ debug_only( _lookup_hits++ );
+ return k; // Hit!
+ }
+ }
+ collision:
+ debug_only( _look_probes++ );
+ key = (key + stride) & (_max-1); // Stride through table w/ relative prime
+ k = _table[key]; // Get hashed value
+ if( !k ) { // ?Miss?
+ debug_only( _lookup_misses++ );
+ key = (first_sentinel == 0) ? key : first_sentinel; // ?saw sentinel?
+ _table[key] = n; // Insert into table!
+ debug_only(n->enter_hash_lock()); // Lock down the node while in the table.
+ check_grow(); // Grow table if insert hit limit
+ return NULL; // Miss!
+ }
+ else if( first_sentinel == 0 && k == _sentinel ) {
+ first_sentinel = key; // Can insert here
+ }
+
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//------------------------------hash_insert------------------------------------
+// Insert into hash table
+void NodeHash::hash_insert( Node *n ) {
+ // // "conflict" comments -- print nodes that conflict
+ // bool conflict = false;
+ // n->set_hash();
+ uint hash = n->hash();
+ if (hash == Node::NO_HASH) {
+ return;
+ }
+ check_grow();
+ uint key = hash & (_max-1);
+ uint stride = key | 0x01;
+
+ while( 1 ) { // While probing hash table
+ debug_only( _insert_probes++ );
+ Node *k = _table[key]; // Get hashed value
+ if( !k || (k == _sentinel) ) break; // Found a slot
+ assert( k != n, "already inserted" );
+ // if( PrintCompilation && PrintOptoStatistics && Verbose ) { tty->print(" conflict: "); k->dump(); conflict = true; }
+ key = (key + stride) & (_max-1); // Stride through table w/ relative prime
+ }
+ _table[key] = n; // Insert into table!
+ debug_only(n->enter_hash_lock()); // Lock down the node while in the table.
+ // if( conflict ) { n->dump(); }
+}
+
+//------------------------------hash_delete------------------------------------
+// Replace in hash table with sentinal
+bool NodeHash::hash_delete( const Node *n ) {
+ Node *k;
+ uint hash = n->hash();
+ if (hash == Node::NO_HASH) {
+ debug_only( _delete_misses++ );
+ return false;
+ }
+ uint key = hash & (_max-1);
+ uint stride = key | 0x01;
+ debug_only( uint counter = 0; );
+ for( ; /* (k != NULL) && (k != _sentinal) */; ) {
+ debug_only( counter++ );
+ debug_only( _delete_probes++ );
+ k = _table[key]; // Get hashed value
+ if( !k ) { // Miss?
+ debug_only( _delete_misses++ );
+#ifdef ASSERT
+ if( VerifyOpto ) {
+ for( uint i=0; i < _max; i++ )
+ assert( _table[i] != n, "changed edges with rehashing" );
+ }
+#endif
+ return false; // Miss! Not in chain
+ }
+ else if( n == k ) {
+ debug_only( _delete_hits++ );
+ _table[key] = _sentinel; // Hit! Label as deleted entry
+ debug_only(((Node*)n)->exit_hash_lock()); // Unlock the node upon removal from table.
+ return true;
+ }
+ else {
+ // collision: move through table with prime offset
+ key = (key + stride/*7*/) & (_max-1);
+ assert( counter <= _insert_limit, "Cycle in hash-table");
+ }
+ }
+ ShouldNotReachHere();
+ return false;
+}
+
+//------------------------------round_up---------------------------------------
+// Round up to nearest power of 2
+uint NodeHash::round_up( uint x ) {
+ x += (x>>2); // Add 25% slop
+ if( x <16 ) return 16; // Small stuff
+ uint i=16;
+ while( i < x ) i <<= 1; // Double to fit
+ return i; // Return hash table size
+}
+
+//------------------------------grow-------------------------------------------
+// Grow _table to next power of 2 and insert old entries
+void NodeHash::grow() {
+ // Record old state
+ uint old_max = _max;
+ Node **old_table = _table;
+ // Construct new table with twice the space
+ _grows++;
+ _total_inserts += _inserts;
+ _total_insert_probes += _insert_probes;
+ _inserts = 0;
+ _insert_probes = 0;
+ _max = _max << 1;
+ _table = NEW_ARENA_ARRAY( _a , Node* , _max ); // (Node**)_a->Amalloc( _max * sizeof(Node*) );
+ memset(_table,0,sizeof(Node*)*_max);
+ _insert_limit = insert_limit();
+ // Insert old entries into the new table
+ for( uint i = 0; i < old_max; i++ ) {
+ Node *m = *old_table++;
+ if( !m || m == _sentinel ) continue;
+ debug_only(m->exit_hash_lock()); // Unlock the node upon removal from old table.
+ hash_insert(m);
+ }
+}
+
+//------------------------------clear------------------------------------------
+// Clear all entries in _table to NULL but keep storage
+void NodeHash::clear() {
+#ifdef ASSERT
+ // Unlock all nodes upon removal from table.
+ for (uint i = 0; i < _max; i++) {
+ Node* n = _table[i];
+ if (!n || n == _sentinel) continue;
+ n->exit_hash_lock();
+ }
+#endif
+
+ memset( _table, 0, _max * sizeof(Node*) );
+}
+
+//-----------------------remove_useless_nodes----------------------------------
+// Remove useless nodes from value table,
+// implementation does not depend on hash function
+void NodeHash::remove_useless_nodes(VectorSet &useful) {
+
+ // Dead nodes in the hash table inherited from GVN should not replace
+ // existing nodes, remove dead nodes.
+ uint max = size();
+ Node *sentinel_node = sentinel();
+ for( uint i = 0; i < max; ++i ) {
+ Node *n = at(i);
+ if(n != NULL && n != sentinel_node && !useful.test(n->_idx)) {
+ debug_only(n->exit_hash_lock()); // Unlock the node when removed
+ _table[i] = sentinel_node; // Replace with placeholder
+ }
+ }
+}
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+// Dump statistics for the hash table
+void NodeHash::dump() {
+ _total_inserts += _inserts;
+ _total_insert_probes += _insert_probes;
+ if( PrintCompilation && PrintOptoStatistics && Verbose && (_inserts > 0) ) { // PrintOptoGVN
+ if( PrintCompilation2 ) {
+ for( uint i=0; i<_max; i++ )
+ if( _table[i] )
+ tty->print("%d/%d/%d ",i,_table[i]->hash()&(_max-1),_table[i]->_idx);
+ }
+ tty->print("\nGVN Hash stats: %d grows to %d max_size\n", _grows, _max);
+ tty->print(" %d/%d (%8.1f%% full)\n", _inserts, _max, (double)_inserts/_max*100.0);
+ tty->print(" %dp/(%dh+%dm) (%8.2f probes/lookup)\n", _look_probes, _lookup_hits, _lookup_misses, (double)_look_probes/(_lookup_hits+_lookup_misses));
+ tty->print(" %dp/%di (%8.2f probes/insert)\n", _total_insert_probes, _total_inserts, (double)_total_insert_probes/_total_inserts);
+ // sentinels increase lookup cost, but not insert cost
+ assert((_lookup_misses+_lookup_hits)*4+100 >= _look_probes, "bad hash function");
+ assert( _inserts+(_inserts>>3) < _max, "table too full" );
+ assert( _inserts*3+100 >= _insert_probes, "bad hash function" );
+ }
+}
+
+Node *NodeHash::find_index(uint idx) { // For debugging
+ // Find an entry by its index value
+ for( uint i = 0; i < _max; i++ ) {
+ Node *m = _table[i];
+ if( !m || m == _sentinel ) continue;
+ if( m->_idx == (uint)idx ) return m;
+ }
+ return NULL;
+}
+#endif
+
+#ifdef ASSERT
+NodeHash::~NodeHash() {
+ // Unlock all nodes upon destruction of table.
+ if (_table != (Node**)badAddress) clear();
+}
+
+void NodeHash::operator=(const NodeHash& nh) {
+ // Unlock all nodes upon replacement of table.
+ if (&nh == this) return;
+ if (_table != (Node**)badAddress) clear();
+ memcpy(this, &nh, sizeof(*this));
+ // Do not increment hash_lock counts again.
+ // Instead, be sure we never again use the source table.
+ ((NodeHash*)&nh)->_table = (Node**)badAddress;
+}
+
+
+#endif
+
+
+//=============================================================================
+//------------------------------PhaseRemoveUseless-----------------------------
+// 1) Use a breadthfirst walk to collect useful nodes reachable from root.
+PhaseRemoveUseless::PhaseRemoveUseless( PhaseGVN *gvn, Unique_Node_List *worklist ) : Phase(Remove_Useless),
+ _useful(Thread::current()->resource_area()) {
+
+ // Implementation requires 'UseLoopSafepoints == true' and an edge from root
+ // to each SafePointNode at a backward branch. Inserted in add_safepoint().
+ if( !UseLoopSafepoints || !OptoRemoveUseless ) return;
+
+ // Identify nodes that are reachable from below, useful.
+ C->identify_useful_nodes(_useful);
+
+ // Remove all useless nodes from PhaseValues' recorded types
+ // Must be done before disconnecting nodes to preserve hash-table-invariant
+ gvn->remove_useless_nodes(_useful.member_set());
+
+ // Remove all useless nodes from future worklist
+ worklist->remove_useless_nodes(_useful.member_set());
+
+ // Disconnect 'useless' nodes that are adjacent to useful nodes
+ C->remove_useless_nodes(_useful);
+
+ // Remove edges from "root" to each SafePoint at a backward branch.
+ // They were inserted during parsing (see add_safepoint()) to make infinite
+ // loops without calls or exceptions visible to root, i.e., useful.
+ Node *root = C->root();
+ if( root != NULL ) {
+ for( uint i = root->req(); i < root->len(); ++i ) {
+ Node *n = root->in(i);
+ if( n != NULL && n->is_SafePoint() ) {
+ root->rm_prec(i);
+ --i;
+ }
+ }
+ }
+}
+
+
+//=============================================================================
+//------------------------------PhaseTransform---------------------------------
+PhaseTransform::PhaseTransform( PhaseNumber pnum ) : Phase(pnum),
+ _arena(Thread::current()->resource_area()),
+ _nodes(_arena),
+ _types(_arena)
+{
+ init_con_caches();
+#ifndef PRODUCT
+ clear_progress();
+ clear_transforms();
+ set_allow_progress(true);
+#endif
+ // Force allocation for currently existing nodes
+ _types.map(C->unique(), NULL);
+}
+
+//------------------------------PhaseTransform---------------------------------
+PhaseTransform::PhaseTransform( Arena *arena, PhaseNumber pnum ) : Phase(pnum),
+ _arena(arena),
+ _nodes(arena),
+ _types(arena)
+{
+ init_con_caches();
+#ifndef PRODUCT
+ clear_progress();
+ clear_transforms();
+ set_allow_progress(true);
+#endif
+ // Force allocation for currently existing nodes
+ _types.map(C->unique(), NULL);
+}
+
+//------------------------------PhaseTransform---------------------------------
+// Initialize with previously generated type information
+PhaseTransform::PhaseTransform( PhaseTransform *pt, PhaseNumber pnum ) : Phase(pnum),
+ _arena(pt->_arena),
+ _nodes(pt->_nodes),
+ _types(pt->_types)
+{
+ init_con_caches();
+#ifndef PRODUCT
+ clear_progress();
+ clear_transforms();
+ set_allow_progress(true);
+#endif
+}
+
+void PhaseTransform::init_con_caches() {
+ memset(_icons,0,sizeof(_icons));
+ memset(_lcons,0,sizeof(_lcons));
+ memset(_zcons,0,sizeof(_zcons));
+}
+
+
+//--------------------------------find_int_type--------------------------------
+const TypeInt* PhaseTransform::find_int_type(Node* n) {
+ if (n == NULL) return NULL;
+ // Call type_or_null(n) to determine node's type since we might be in
+ // parse phase and call n->Value() may return wrong type.
+ // (For example, a phi node at the beginning of loop parsing is not ready.)
+ const Type* t = type_or_null(n);
+ if (t == NULL) return NULL;
+ return t->isa_int();
+}
+
+
+//-------------------------------find_long_type--------------------------------
+const TypeLong* PhaseTransform::find_long_type(Node* n) {
+ if (n == NULL) return NULL;
+ // (See comment above on type_or_null.)
+ const Type* t = type_or_null(n);
+ if (t == NULL) return NULL;
+ return t->isa_long();
+}
+
+
+#ifndef PRODUCT
+void PhaseTransform::dump_old2new_map() const {
+ _nodes.dump();
+}
+
+void PhaseTransform::dump_new( uint nidx ) const {
+ for( uint i=0; i<_nodes.Size(); i++ )
+ if( _nodes[i] && _nodes[i]->_idx == nidx ) {
+ _nodes[i]->dump();
+ tty->cr();
+ tty->print_cr("Old index= %d",i);
+ return;
+ }
+ tty->print_cr("Node %d not found in the new indices", nidx);
+}
+
+//------------------------------dump_types-------------------------------------
+void PhaseTransform::dump_types( ) const {
+ _types.dump();
+}
+
+//------------------------------dump_nodes_and_types---------------------------
+void PhaseTransform::dump_nodes_and_types(const Node *root, uint depth, bool only_ctrl) {
+ VectorSet visited(Thread::current()->resource_area());
+ dump_nodes_and_types_recur( root, depth, only_ctrl, visited );
+}
+
+//------------------------------dump_nodes_and_types_recur---------------------
+void PhaseTransform::dump_nodes_and_types_recur( const Node *n, uint depth, bool only_ctrl, VectorSet &visited) {
+ if( !n ) return;
+ if( depth == 0 ) return;
+ if( visited.test_set(n->_idx) ) return;
+ for( uint i=0; i<n->len(); i++ ) {
+ if( only_ctrl && !(n->is_Region()) && i != TypeFunc::Control ) continue;
+ dump_nodes_and_types_recur( n->in(i), depth-1, only_ctrl, visited );
+ }
+ n->dump();
+ if (type_or_null(n) != NULL) {
+ tty->print(" "); type(n)->dump(); tty->cr();
+ }
+}
+
+#endif
+
+
+//=============================================================================
+//------------------------------PhaseValues------------------------------------
+// Set minimum table size to "255"
+PhaseValues::PhaseValues( Arena *arena, uint est_max_size ) : PhaseTransform(arena, GVN), _table(arena, est_max_size) {
+ NOT_PRODUCT( clear_new_values(); )
+}
+
+//------------------------------PhaseValues------------------------------------
+// Set minimum table size to "255"
+PhaseValues::PhaseValues( PhaseValues *ptv ) : PhaseTransform( ptv, GVN ),
+ _table(&ptv->_table) {
+ NOT_PRODUCT( clear_new_values(); )
+}
+
+//------------------------------PhaseValues------------------------------------
+// Used by +VerifyOpto. Clear out hash table but copy _types array.
+PhaseValues::PhaseValues( PhaseValues *ptv, const char *dummy ) : PhaseTransform( ptv, GVN ),
+ _table(ptv->arena(),ptv->_table.size()) {
+ NOT_PRODUCT( clear_new_values(); )
+}
+
+//------------------------------~PhaseValues-----------------------------------
+#ifndef PRODUCT
+PhaseValues::~PhaseValues() {
+ _table.dump();
+
+ // Statistics for value progress and efficiency
+ if( PrintCompilation && Verbose && WizardMode ) {
+ tty->print("\n%sValues: %d nodes ---> %d/%d (%d)",
+ is_IterGVN() ? "Iter" : " ", C->unique(), made_progress(), made_transforms(), made_new_values());
+ if( made_transforms() != 0 ) {
+ tty->print_cr(" ratio %f", made_progress()/(float)made_transforms() );
+ } else {
+ tty->cr();
+ }
+ }
+}
+#endif
+
+//------------------------------makecon----------------------------------------
+ConNode* PhaseTransform::makecon(const Type *t) {
+ assert(t->singleton(), "must be a constant");
+ assert(!t->empty() || t == Type::TOP, "must not be vacuous range");
+ switch (t->base()) { // fast paths
+ case Type::Half:
+ case Type::Top: return (ConNode*) C->top();
+ case Type::Int: return intcon( t->is_int()->get_con() );
+ case Type::Long: return longcon( t->is_long()->get_con() );
+ }
+ if (t->is_zero_type())
+ return zerocon(t->basic_type());
+ return uncached_makecon(t);
+}
+
+//--------------------------uncached_makecon-----------------------------------
+// Make an idealized constant - one of ConINode, ConPNode, etc.
+ConNode* PhaseValues::uncached_makecon(const Type *t) {
+ assert(t->singleton(), "must be a constant");
+ ConNode* x = ConNode::make(C, t);
+ ConNode* k = (ConNode*)hash_find_insert(x); // Value numbering
+ if (k == NULL) {
+ set_type(x, t); // Missed, provide type mapping
+ GrowableArray<Node_Notes*>* nna = C->node_note_array();
+ if (nna != NULL) {
+ Node_Notes* loc = C->locate_node_notes(nna, x->_idx, true);
+ loc->clear(); // do not put debug info on constants
+ }
+ // Collect points-to information for escape analysys
+ ConnectionGraph *cgr = C->congraph();
+ if (cgr != NULL) {
+ cgr->record_escape(x, this);
+ }
+ } else {
+ x->destruct(); // Hit, destroy duplicate constant
+ x = k; // use existing constant
+ }
+ return x;
+}
+
+//------------------------------intcon-----------------------------------------
+// Fast integer constant. Same as "transform(new ConINode(TypeInt::make(i)))"
+ConINode* PhaseTransform::intcon(int i) {
+ // Small integer? Check cache! Check that cached node is not dead
+ if (i >= _icon_min && i <= _icon_max) {
+ ConINode* icon = _icons[i-_icon_min];
+ if (icon != NULL && icon->in(TypeFunc::Control) != NULL)
+ return icon;
+ }
+ ConINode* icon = (ConINode*) uncached_makecon(TypeInt::make(i));
+ assert(icon->is_Con(), "");
+ if (i >= _icon_min && i <= _icon_max)
+ _icons[i-_icon_min] = icon; // Cache small integers
+ return icon;
+}
+
+//------------------------------longcon----------------------------------------
+// Fast long constant.
+ConLNode* PhaseTransform::longcon(jlong l) {
+ // Small integer? Check cache! Check that cached node is not dead
+ if (l >= _lcon_min && l <= _lcon_max) {
+ ConLNode* lcon = _lcons[l-_lcon_min];
+ if (lcon != NULL && lcon->in(TypeFunc::Control) != NULL)
+ return lcon;
+ }
+ ConLNode* lcon = (ConLNode*) uncached_makecon(TypeLong::make(l));
+ assert(lcon->is_Con(), "");
+ if (l >= _lcon_min && l <= _lcon_max)
+ _lcons[l-_lcon_min] = lcon; // Cache small integers
+ return lcon;
+}
+
+//------------------------------zerocon-----------------------------------------
+// Fast zero or null constant. Same as "transform(ConNode::make(Type::get_zero_type(bt)))"
+ConNode* PhaseTransform::zerocon(BasicType bt) {
+ assert((uint)bt <= _zcon_max, "domain check");
+ ConNode* zcon = _zcons[bt];
+ if (zcon != NULL && zcon->in(TypeFunc::Control) != NULL)
+ return zcon;
+ zcon = (ConNode*) uncached_makecon(Type::get_zero_type(bt));
+ _zcons[bt] = zcon;
+ return zcon;
+}
+
+
+
+//=============================================================================
+//------------------------------transform--------------------------------------
+// Return a node which computes the same function as this node, but in a
+// faster or cheaper fashion. The Node passed in here must have no other
+// pointers to it, as its storage will be reclaimed if the Node can be
+// optimized away.
+Node *PhaseGVN::transform( Node *n ) {
+ NOT_PRODUCT( set_transforms(); )
+
+ // Apply the Ideal call in a loop until it no longer applies
+ Node *k = n;
+ NOT_PRODUCT( uint loop_count = 0; )
+ while( 1 ) {
+ Node *i = k->Ideal(this, /*can_reshape=*/false);
+ if( !i ) break;
+ assert( i->_idx >= k->_idx, "Idealize should return new nodes, use Identity to return old nodes" );
+ // Can never reclaim storage for Ideal calls, because the Ideal call
+ // returns a new Node, bumping the High Water Mark and our old Node
+ // is caught behind the new one.
+ //if( k != i ) {
+ //k->destruct(); // Reclaim storage for recent node
+ k = i;
+ //}
+ assert(loop_count++ < K, "infinite loop in PhaseGVN::transform");
+ }
+ NOT_PRODUCT( if( loop_count != 0 ) { set_progress(); } )
+
+ // If brand new node, make space in type array.
+ ensure_type_or_null(k);
+
+ // Cache result of Value call since it can be expensive
+ // (abstract interpretation of node 'k' using phase->_types[ inputs ])
+ const Type *t = k->Value(this); // Get runtime Value set
+ assert(t != NULL, "value sanity");
+ if (type_or_null(k) != t) {
+#ifndef PRODUCT
+ // Do not record transformation or value construction on first visit
+ if (type_or_null(k) == NULL) {
+ inc_new_values();
+ set_progress();
+ }
+#endif
+ set_type(k, t);
+ // If k is a TypeNode, capture any more-precise type permanently into Node
+ k->raise_bottom_type(t);
+ }
+
+ if( t->singleton() && !k->is_Con() ) {
+ //k->destruct(); // Reclaim storage for recent node
+ NOT_PRODUCT( set_progress(); )
+ return makecon(t); // Turn into a constant
+ }
+
+ // Now check for Identities
+ Node *i = k->Identity(this); // Look for a nearby replacement
+ if( i != k ) { // Found? Return replacement!
+ //k->destruct(); // Reclaim storage for recent node
+ NOT_PRODUCT( set_progress(); )
+ return i;
+ }
+
+ // Try Global Value Numbering
+ i = hash_find_insert(k); // Found older value when i != NULL
+ if( i && i != k ) { // Hit? Return the old guy
+ NOT_PRODUCT( set_progress(); )
+ return i;
+ }
+
+ // Collect points-to information for escape analysys
+ ConnectionGraph *cgr = C->congraph();
+ if (cgr != NULL) {
+ cgr->record_escape(k, this);
+ }
+
+ // Return Idealized original
+ return k;
+}
+
+//------------------------------transform--------------------------------------
+// Return a node which computes the same function as this node, but
+// in a faster or cheaper fashion.
+Node *PhaseGVN::transform_no_reclaim( Node *n ) {
+ NOT_PRODUCT( set_transforms(); )
+
+ // Apply the Ideal call in a loop until it no longer applies
+ Node *k = n;
+ NOT_PRODUCT( uint loop_count = 0; )
+ while( 1 ) {
+ Node *i = k->Ideal(this, /*can_reshape=*/false);
+ if( !i ) break;
+ assert( i->_idx >= k->_idx, "Idealize should return new nodes, use Identity to return old nodes" );
+ k = i;
+ assert(loop_count++ < K, "infinite loop in PhaseGVN::transform");
+ }
+ NOT_PRODUCT( if( loop_count != 0 ) { set_progress(); } )
+
+
+ // If brand new node, make space in type array.
+ ensure_type_or_null(k);
+
+ // Since I just called 'Value' to compute the set of run-time values
+ // for this Node, and 'Value' is non-local (and therefore expensive) I'll
+ // cache Value. Later requests for the local phase->type of this Node can
+ // use the cached Value instead of suffering with 'bottom_type'.
+ const Type *t = k->Value(this); // Get runtime Value set
+ assert(t != NULL, "value sanity");
+ if (type_or_null(k) != t) {
+#ifndef PRODUCT
+ // Do not count initial visit to node as a transformation
+ if (type_or_null(k) == NULL) {
+ inc_new_values();
+ set_progress();
+ }
+#endif
+ set_type(k, t);
+ // If k is a TypeNode, capture any more-precise type permanently into Node
+ k->raise_bottom_type(t);
+ }
+
+ if( t->singleton() && !k->is_Con() ) {
+ NOT_PRODUCT( set_progress(); )
+ return makecon(t); // Turn into a constant
+ }
+
+ // Now check for Identities
+ Node *i = k->Identity(this); // Look for a nearby replacement
+ if( i != k ) { // Found? Return replacement!
+ NOT_PRODUCT( set_progress(); )
+ return i;
+ }
+
+ // Global Value Numbering
+ i = hash_find_insert(k); // Insert if new
+ if( i && (i != k) ) {
+ // Return the pre-existing node
+ NOT_PRODUCT( set_progress(); )
+ return i;
+ }
+
+ // Return Idealized original
+ return k;
+}
+
+#ifdef ASSERT
+//------------------------------dead_loop_check--------------------------------
+// Check for a simple dead loop when a data node references itself direcly
+// or through an other data node excluding cons and phis.
+void PhaseGVN::dead_loop_check( Node *n ) {
+ // Phi may reference itself in a loop
+ if (n != NULL && !n->is_dead_loop_safe() && !n->is_CFG()) {
+ // Do 2 levels check and only data inputs.
+ bool no_dead_loop = true;
+ uint cnt = n->req();
+ for (uint i = 1; i < cnt && no_dead_loop; i++) {
+ Node *in = n->in(i);
+ if (in == n) {
+ no_dead_loop = false;
+ } else if (in != NULL && !in->is_dead_loop_safe()) {
+ uint icnt = in->req();
+ for (uint j = 1; j < icnt && no_dead_loop; j++) {
+ if (in->in(j) == n || in->in(j) == in)
+ no_dead_loop = false;
+ }
+ }
+ }
+ if (!no_dead_loop) n->dump(3);
+ assert(no_dead_loop, "dead loop detected");
+ }
+}
+#endif
+
+//=============================================================================
+//------------------------------PhaseIterGVN-----------------------------------
+// Initialize hash table to fresh and clean for +VerifyOpto
+PhaseIterGVN::PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ) : PhaseGVN(igvn,dummy), _worklist( ) {
+}
+
+//------------------------------PhaseIterGVN-----------------------------------
+// Initialize with previous PhaseIterGVN info; used by PhaseCCP
+PhaseIterGVN::PhaseIterGVN( PhaseIterGVN *igvn ) : PhaseGVN(igvn),
+ _worklist( igvn->_worklist )
+{
+}
+
+//------------------------------PhaseIterGVN-----------------------------------
+// Initialize with previous PhaseGVN info from Parser
+PhaseIterGVN::PhaseIterGVN( PhaseGVN *gvn ) : PhaseGVN(gvn),
+ _worklist(*C->for_igvn())
+{
+ uint max;
+
+ // Dead nodes in the hash table inherited from GVN were not treated as
+ // roots during def-use info creation; hence they represent an invisible
+ // use. Clear them out.
+ max = _table.size();
+ for( uint i = 0; i < max; ++i ) {
+ Node *n = _table.at(i);
+ if(n != NULL && n != _table.sentinel() && n->outcnt() == 0) {
+ if( n->is_top() ) continue;
+ assert( false, "Parse::remove_useless_nodes missed this node");
+ hash_delete(n);
+ }
+ }
+
+ // Any Phis or Regions on the worklist probably had uses that could not
+ // make more progress because the uses were made while the Phis and Regions
+ // were in half-built states. Put all uses of Phis and Regions on worklist.
+ max = _worklist.size();
+ for( uint j = 0; j < max; j++ ) {
+ Node *n = _worklist.at(j);
+ uint uop = n->Opcode();
+ if( uop == Op_Phi || uop == Op_Region ||
+ n->is_Type() ||
+ n->is_Mem() )
+ add_users_to_worklist(n);
+ }
+}
+
+
+#ifndef PRODUCT
+void PhaseIterGVN::verify_step(Node* n) {
+ _verify_window[_verify_counter % _verify_window_size] = n;
+ ++_verify_counter;
+ ResourceMark rm;
+ ResourceArea *area = Thread::current()->resource_area();
+ VectorSet old_space(area), new_space(area);
+ if (C->unique() < 1000 ||
+ 0 == _verify_counter % (C->unique() < 10000 ? 10 : 100)) {
+ ++_verify_full_passes;
+ Node::verify_recur(C->root(), -1, old_space, new_space);
+ }
+ const int verify_depth = 4;
+ for ( int i = 0; i < _verify_window_size; i++ ) {
+ Node* n = _verify_window[i];
+ if ( n == NULL ) continue;
+ if( n->in(0) == NodeSentinel ) { // xform_idom
+ _verify_window[i] = n->in(1);
+ --i; continue;
+ }
+ // Typical fanout is 1-2, so this call visits about 6 nodes.
+ Node::verify_recur(n, verify_depth, old_space, new_space);
+ }
+}
+#endif
+
+
+//------------------------------init_worklist----------------------------------
+// Initialize worklist for each node.
+void PhaseIterGVN::init_worklist( Node *n ) {
+ if( _worklist.member(n) ) return;
+ _worklist.push(n);
+ uint cnt = n->req();
+ for( uint i =0 ; i < cnt; i++ ) {
+ Node *m = n->in(i);
+ if( m ) init_worklist(m);
+ }
+}
+
+//------------------------------optimize---------------------------------------
+void PhaseIterGVN::optimize() {
+ debug_only(uint num_processed = 0;);
+#ifndef PRODUCT
+ {
+ _verify_counter = 0;
+ _verify_full_passes = 0;
+ for ( int i = 0; i < _verify_window_size; i++ ) {
+ _verify_window[i] = NULL;
+ }
+ }
+#endif
+
+ // Pull from worklist; transform node;
+ // If node has changed: update edge info and put uses on worklist.
+ while( _worklist.size() ) {
+ Node *n = _worklist.pop();
+ if (TraceIterativeGVN && Verbose) {
+ tty->print(" Pop ");
+ NOT_PRODUCT( n->dump(); )
+ debug_only(if( (num_processed++ % 100) == 0 ) _worklist.print_set();)
+ }
+
+ if (n->outcnt() != 0) {
+
+#ifndef PRODUCT
+ uint wlsize = _worklist.size();
+ const Type* oldtype = type_or_null(n);
+#endif //PRODUCT
+
+ Node *nn = transform_old(n);
+
+#ifndef PRODUCT
+ if (TraceIterativeGVN) {
+ const Type* newtype = type_or_null(n);
+ if (nn != n) {
+ // print old node
+ tty->print("< ");
+ if (oldtype != newtype && oldtype != NULL) {
+ oldtype->dump();
+ }
+ do { tty->print("\t"); } while (tty->position() < 16);
+ tty->print("<");
+ n->dump();
+ }
+ if (oldtype != newtype || nn != n) {
+ // print new node and/or new type
+ if (oldtype == NULL) {
+ tty->print("* ");
+ } else if (nn != n) {
+ tty->print("> ");
+ } else {
+ tty->print("= ");
+ }
+ if (newtype == NULL) {
+ tty->print("null");
+ } else {
+ newtype->dump();
+ }
+ do { tty->print("\t"); } while (tty->position() < 16);
+ nn->dump();
+ }
+ if (Verbose && wlsize < _worklist.size()) {
+ tty->print(" Push {");
+ while (wlsize != _worklist.size()) {
+ Node* pushed = _worklist.at(wlsize++);
+ tty->print(" %d", pushed->_idx);
+ }
+ tty->print_cr(" }");
+ }
+ }
+ if( VerifyIterativeGVN && nn != n ) {
+ verify_step((Node*) NULL); // ignore n, it might be subsumed
+ }
+#endif
+ } else if (!n->is_top()) {
+ remove_dead_node(n);
+ }
+ }
+
+#ifndef PRODUCT
+ C->verify_graph_edges();
+ if( VerifyOpto && allow_progress() ) {
+ // Must turn off allow_progress to enable assert and break recursion
+ C->root()->verify();
+ { // Check if any progress was missed using IterGVN
+ // Def-Use info enables transformations not attempted in wash-pass
+ // e.g. Region/Phi cleanup, ...
+ // Null-check elision -- may not have reached fixpoint
+ // do not propagate to dominated nodes
+ ResourceMark rm;
+ PhaseIterGVN igvn2(this,"Verify"); // Fresh and clean!
+ // Fill worklist completely
+ igvn2.init_worklist(C->root());
+
+ igvn2.set_allow_progress(false);
+ igvn2.optimize();
+ igvn2.set_allow_progress(true);
+ }
+ }
+ if ( VerifyIterativeGVN && PrintOpto ) {
+ if ( _verify_counter == _verify_full_passes )
+ tty->print_cr("VerifyIterativeGVN: %d transforms and verify passes",
+ _verify_full_passes);
+ else
+ tty->print_cr("VerifyIterativeGVN: %d transforms, %d full verify passes",
+ _verify_counter, _verify_full_passes);
+ }
+#endif
+}
+
+
+//------------------register_new_node_with_optimizer---------------------------
+// Register a new node with the optimizer. Update the types array, the def-use
+// info. Put on worklist.
+Node* PhaseIterGVN::register_new_node_with_optimizer(Node* n, Node* orig) {
+ set_type_bottom(n);
+ _worklist.push(n);
+ if (orig != NULL) C->copy_node_notes_to(n, orig);
+ return n;
+}
+
+//------------------------------transform--------------------------------------
+// Non-recursive: idealize Node 'n' with respect to its inputs and its value
+Node *PhaseIterGVN::transform( Node *n ) {
+ // If brand new node, make space in type array, and give it a type.
+ ensure_type_or_null(n);
+ if (type_or_null(n) == NULL) {
+ set_type_bottom(n);
+ }
+
+ return transform_old(n);
+}
+
+//------------------------------transform_old----------------------------------
+Node *PhaseIterGVN::transform_old( Node *n ) {
+#ifndef PRODUCT
+ debug_only(uint loop_count = 0;);
+ set_transforms();
+#endif
+ // Remove 'n' from hash table in case it gets modified
+ _table.hash_delete(n);
+ if( VerifyIterativeGVN ) {
+ assert( !_table.find_index(n->_idx), "found duplicate entry in table");
+ }
+
+ // Apply the Ideal call in a loop until it no longer applies
+ Node *k = n;
+ DEBUG_ONLY(dead_loop_check(k);)
+ Node *i = k->Ideal(this, /*can_reshape=*/true);
+#ifndef PRODUCT
+ if( VerifyIterativeGVN )
+ verify_step(k);
+ if( i && VerifyOpto ) {
+ if( !allow_progress() ) {
+ if (i->is_Add() && i->outcnt() == 1) {
+ // Switched input to left side because this is the only use
+ } else if( i->is_If() && (i->in(0) == NULL) ) {
+ // This IF is dead because it is dominated by an equivalent IF When
+ // dominating if changed, info is not propagated sparsely to 'this'
+ // Propagating this info further will spuriously identify other
+ // progress.
+ return i;
+ } else
+ set_progress();
+ } else
+ set_progress();
+ }
+#endif
+
+ while( i ) {
+#ifndef PRODUCT
+ debug_only( if( loop_count >= K ) i->dump(4); )
+ assert(loop_count < K, "infinite loop in PhaseIterGVN::transform");
+ debug_only( loop_count++; )
+#endif
+ assert((i->_idx >= k->_idx) || i->is_top(), "Idealize should return new nodes, use Identity to return old nodes");
+ // Made a change; put users of original Node on worklist
+ add_users_to_worklist( k );
+ // Replacing root of transform tree?
+ if( k != i ) {
+ // Make users of old Node now use new.
+ subsume_node( k, i );
+ k = i;
+ }
+ DEBUG_ONLY(dead_loop_check(k);)
+ // Try idealizing again
+ i = k->Ideal(this, /*can_reshape=*/true);
+#ifndef PRODUCT
+ if( VerifyIterativeGVN )
+ verify_step(k);
+ if( i && VerifyOpto ) set_progress();
+#endif
+ }
+
+ // If brand new node, make space in type array.
+ ensure_type_or_null(k);
+
+ // See what kind of values 'k' takes on at runtime
+ const Type *t = k->Value(this);
+ assert(t != NULL, "value sanity");
+
+ // Since I just called 'Value' to compute the set of run-time values
+ // for this Node, and 'Value' is non-local (and therefore expensive) I'll
+ // cache Value. Later requests for the local phase->type of this Node can
+ // use the cached Value instead of suffering with 'bottom_type'.
+ if (t != type_or_null(k)) {
+ NOT_PRODUCT( set_progress(); )
+ NOT_PRODUCT( inc_new_values();)
+ set_type(k, t);
+ // If k is a TypeNode, capture any more-precise type permanently into Node
+ k->raise_bottom_type(t);
+ // Move users of node to worklist
+ add_users_to_worklist( k );
+ }
+
+ // If 'k' computes a constant, replace it with a constant
+ if( t->singleton() && !k->is_Con() ) {
+ NOT_PRODUCT( set_progress(); )
+ Node *con = makecon(t); // Make a constant
+ add_users_to_worklist( k );
+ subsume_node( k, con ); // Everybody using k now uses con
+ return con;
+ }
+
+ // Now check for Identities
+ i = k->Identity(this); // Look for a nearby replacement
+ if( i != k ) { // Found? Return replacement!
+ NOT_PRODUCT( set_progress(); )
+ add_users_to_worklist( k );
+ subsume_node( k, i ); // Everybody using k now uses i
+ return i;
+ }
+
+ // Global Value Numbering
+ i = hash_find_insert(k); // Check for pre-existing node
+ if( i && (i != k) ) {
+ // Return the pre-existing node if it isn't dead
+ NOT_PRODUCT( set_progress(); )
+ add_users_to_worklist( k );
+ subsume_node( k, i ); // Everybody using k now uses i
+ return i;
+ }
+
+ // Return Idealized original
+ return k;
+}
+
+//---------------------------------saturate------------------------------------
+const Type* PhaseIterGVN::saturate(const Type* new_type, const Type* old_type,
+ const Type* limit_type) const {
+ return new_type->narrow(old_type);
+}
+
+//------------------------------remove_globally_dead_node----------------------
+// Kill a globally dead Node. All uses are also globally dead and are
+// aggressively trimmed.
+void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
+ assert(dead != C->root(), "killing root, eh?");
+ if (dead->is_top()) return;
+ NOT_PRODUCT( set_progress(); )
+ // Remove from iterative worklist
+ _worklist.remove(dead);
+ if (!dead->is_Con()) { // Don't kill cons but uses
+ // Remove from hash table
+ _table.hash_delete( dead );
+ // Smash all inputs to 'dead', isolating him completely
+ for( uint i = 0; i < dead->req(); i++ ) {
+ Node *in = dead->in(i);
+ if( in ) { // Points to something?
+ dead->set_req(i,NULL); // Kill the edge
+ if (in->outcnt() == 0 && in != C->top()) {// Made input go dead?
+ remove_dead_node(in); // Recursively remove
+ } else if (in->outcnt() == 1 &&
+ in->has_special_unique_user()) {
+ _worklist.push(in->unique_out());
+ } else if (in->outcnt() <= 2 && dead->is_Phi()) {
+ if( in->Opcode() == Op_Region )
+ _worklist.push(in);
+ else if( in->is_Store() ) {
+ DUIterator_Fast imax, i = in->fast_outs(imax);
+ _worklist.push(in->fast_out(i));
+ i++;
+ if(in->outcnt() == 2) {
+ _worklist.push(in->fast_out(i));
+ i++;
+ }
+ assert(!(i < imax), "sanity");
+ }
+ }
+ }
+ }
+
+ if (dead->is_macro()) {
+ C->remove_macro_node(dead);
+ }
+ }
+ // Aggressively kill globally dead uses
+ // (Cannot use DUIterator_Last because of the indefinite number
+ // of edge deletions per loop trip.)
+ while (dead->outcnt() > 0) {
+ remove_globally_dead_node(dead->raw_out(0));
+ }
+}
+
+//------------------------------subsume_node-----------------------------------
+// Remove users from node 'old' and add them to node 'nn'.
+void PhaseIterGVN::subsume_node( Node *old, Node *nn ) {
+ assert( old != hash_find(old), "should already been removed" );
+ assert( old != C->top(), "cannot subsume top node");
+ // Copy debug or profile information to the new version:
+ C->copy_node_notes_to(nn, old);
+ // Move users of node 'old' to node 'nn'
+ for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin; ) {
+ Node* use = old->last_out(i); // for each use...
+ // use might need re-hashing (but it won't if it's a new node)
+ bool is_in_table = _table.hash_delete( use );
+ // Update use-def info as well
+ // We remove all occurrences of old within use->in,
+ // so as to avoid rehashing any node more than once.
+ // The hash table probe swamps any outer loop overhead.
+ uint num_edges = 0;
+ for (uint jmax = use->len(), j = 0; j < jmax; j++) {
+ if (use->in(j) == old) {
+ use->set_req(j, nn);
+ ++num_edges;
+ }
+ }
+ // Insert into GVN hash table if unique
+ // If a duplicate, 'use' will be cleaned up when pulled off worklist
+ if( is_in_table ) {
+ hash_find_insert(use);
+ }
+ i -= num_edges; // we deleted 1 or more copies of this edge
+ }
+
+ // Smash all inputs to 'old', isolating him completely
+ Node *temp = new (C, 1) Node(1);
+ temp->init_req(0,nn); // Add a use to nn to prevent him from dying
+ remove_dead_node( old );
+ temp->del_req(0); // Yank bogus edge
+#ifndef PRODUCT
+ if( VerifyIterativeGVN ) {
+ for ( int i = 0; i < _verify_window_size; i++ ) {
+ if ( _verify_window[i] == old )
+ _verify_window[i] = nn;
+ }
+ }
+#endif
+ _worklist.remove(temp); // this can be necessary
+ temp->destruct(); // reuse the _idx of this little guy
+}
+
+//------------------------------add_users_to_worklist--------------------------
+void PhaseIterGVN::add_users_to_worklist0( Node *n ) {
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ _worklist.push(n->fast_out(i)); // Push on worklist
+ }
+}
+
+void PhaseIterGVN::add_users_to_worklist( Node *n ) {
+ add_users_to_worklist0(n);
+
+ // Move users of node to worklist
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node* use = n->fast_out(i); // Get use
+
+ if( use->is_Multi() || // Multi-definer? Push projs on worklist
+ use->is_Store() ) // Enable store/load same address
+ add_users_to_worklist0(use);
+
+ // If we changed the receiver type to a call, we need to revisit
+ // the Catch following the call. It's looking for a non-NULL
+ // receiver to know when to enable the regular fall-through path
+ // in addition to the NullPtrException path.
+ if (use->is_CallDynamicJava() && n == use->in(TypeFunc::Parms)) {
+ Node* p = use->as_CallDynamicJava()->proj_out(TypeFunc::Control);
+ if (p != NULL) {
+ add_users_to_worklist0(p);
+ }
+ }
+
+ if( use->is_Cmp() ) { // Enable CMP/BOOL optimization
+ add_users_to_worklist(use); // Put Bool on worklist
+ // Look for the 'is_x2logic' pattern: "x ? : 0 : 1" and put the
+ // phi merging either 0 or 1 onto the worklist
+ if (use->outcnt() > 0) {
+ Node* bol = use->raw_out(0);
+ if (bol->outcnt() > 0) {
+ Node* iff = bol->raw_out(0);
+ if (iff->outcnt() == 2) {
+ Node* ifproj0 = iff->raw_out(0);
+ Node* ifproj1 = iff->raw_out(1);
+ if (ifproj0->outcnt() > 0 && ifproj1->outcnt() > 0) {
+ Node* region0 = ifproj0->raw_out(0);
+ Node* region1 = ifproj1->raw_out(0);
+ if( region0 == region1 )
+ add_users_to_worklist0(region0);
+ }
+ }
+ }
+ }
+ }
+
+ uint use_op = use->Opcode();
+ // If changed Cast input, check Phi users for simple cycles
+ if( use->is_ConstraintCast() || use->Opcode() == Op_CheckCastPP ) {
+ for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
+ Node* u = use->fast_out(i2);
+ if (u->is_Phi())
+ _worklist.push(u);
+ }
+ }
+ // If changed LShift inputs, check RShift users for useless sign-ext
+ if( use_op == Op_LShiftI ) {
+ for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
+ Node* u = use->fast_out(i2);
+ if (u->Opcode() == Op_RShiftI)
+ _worklist.push(u);
+ }
+ }
+ // If changed AddP inputs, check Stores for loop invariant
+ if( use_op == Op_AddP ) {
+ for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
+ Node* u = use->fast_out(i2);
+ if (u->is_Mem())
+ _worklist.push(u);
+ }
+ }
+ // If changed initialization activity, check dependent Stores
+ if (use_op == Op_Allocate || use_op == Op_AllocateArray) {
+ InitializeNode* init = use->as_Allocate()->initialization();
+ if (init != NULL) {
+ Node* imem = init->proj_out(TypeFunc::Memory);
+ if (imem != NULL) add_users_to_worklist0(imem);
+ }
+ }
+ if (use_op == Op_Initialize) {
+ Node* imem = use->as_Initialize()->proj_out(TypeFunc::Memory);
+ if (imem != NULL) add_users_to_worklist0(imem);
+ }
+ }
+}
+
+//=============================================================================
+#ifndef PRODUCT
+uint PhaseCCP::_total_invokes = 0;
+uint PhaseCCP::_total_constants = 0;
+#endif
+//------------------------------PhaseCCP---------------------------------------
+// Conditional Constant Propagation, ala Wegman & Zadeck
+PhaseCCP::PhaseCCP( PhaseIterGVN *igvn ) : PhaseIterGVN(igvn) {
+ NOT_PRODUCT( clear_constants(); )
+ assert( _worklist.size() == 0, "" );
+ // Clear out _nodes from IterGVN. Must be clear to transform call.
+ _nodes.clear(); // Clear out from IterGVN
+ analyze();
+}
+
+#ifndef PRODUCT
+//------------------------------~PhaseCCP--------------------------------------
+PhaseCCP::~PhaseCCP() {
+ inc_invokes();
+ _total_constants += count_constants();
+}
+#endif
+
+
+#ifdef ASSERT
+static bool ccp_type_widens(const Type* t, const Type* t0) {
+ assert(t->meet(t0) == t, "Not monotonic");
+ switch (t->base() == t0->base() ? t->base() : Type::Top) {
+ case Type::Int:
+ assert(t0->isa_int()->_widen <= t->isa_int()->_widen, "widen increases");
+ break;
+ case Type::Long:
+ assert(t0->isa_long()->_widen <= t->isa_long()->_widen, "widen increases");
+ break;
+ }
+ return true;
+}
+#endif //ASSERT
+
+//------------------------------analyze----------------------------------------
+void PhaseCCP::analyze() {
+ // Initialize all types to TOP, optimistic analysis
+ for (int i = C->unique() - 1; i >= 0; i--) {
+ _types.map(i,Type::TOP);
+ }
+
+ // Push root onto worklist
+ Unique_Node_List worklist;
+ worklist.push(C->root());
+
+ // Pull from worklist; compute new value; push changes out.
+ // This loop is the meat of CCP.
+ while( worklist.size() ) {
+ Node *n = worklist.pop();
+ const Type *t = n->Value(this);
+ if (t != type(n)) {
+ assert(ccp_type_widens(t, type(n)), "ccp type must widen");
+#ifndef PRODUCT
+ if( TracePhaseCCP ) {
+ t->dump();
+ do { tty->print("\t"); } while (tty->position() < 16);
+ n->dump();
+ }
+#endif
+ set_type(n, t);
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node* m = n->fast_out(i); // Get user
+ if( m->is_Region() ) { // New path to Region? Must recheck Phis too
+ for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) {
+ Node* p = m->fast_out(i2); // Propagate changes to uses
+ if( p->bottom_type() != type(p) ) // If not already bottomed out
+ worklist.push(p); // Propagate change to user
+ }
+ }
+ // If we changed the reciever type to a call, we need to revisit
+ // the Catch following the call. It's looking for a non-NULL
+ // receiver to know when to enable the regular fall-through path
+ // in addition to the NullPtrException path
+ if (m->is_Call()) {
+ for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) {
+ Node* p = m->fast_out(i2); // Propagate changes to uses
+ if (p->is_Proj() && p->as_Proj()->_con == TypeFunc::Control && p->outcnt() == 1)
+ worklist.push(p->unique_out());
+ }
+ }
+ if( m->bottom_type() != type(m) ) // If not already bottomed out
+ worklist.push(m); // Propagate change to user
+ }
+ }
+ }
+}
+
+//------------------------------do_transform-----------------------------------
+// Top level driver for the recursive transformer
+void PhaseCCP::do_transform() {
+ // Correct leaves of new-space Nodes; they point to old-space.
+ C->set_root( transform(C->root())->as_Root() );
+ assert( C->top(), "missing TOP node" );
+ assert( C->root(), "missing root" );
+}
+
+//------------------------------transform--------------------------------------
+// Given a Node in old-space, clone him into new-space.
+// Convert any of his old-space children into new-space children.
+Node *PhaseCCP::transform( Node *n ) {
+ Node *new_node = _nodes[n->_idx]; // Check for transformed node
+ if( new_node != NULL )
+ return new_node; // Been there, done that, return old answer
+ new_node = transform_once(n); // Check for constant
+ _nodes.map( n->_idx, new_node ); // Flag as having been cloned
+
+ // Allocate stack of size _nodes.Size()/2 to avoid frequent realloc
+ GrowableArray <Node *> trstack(C->unique() >> 1);
+
+ trstack.push(new_node); // Process children of cloned node
+ while ( trstack.is_nonempty() ) {
+ Node *clone = trstack.pop();
+ uint cnt = clone->req();
+ for( uint i = 0; i < cnt; i++ ) { // For all inputs do
+ Node *input = clone->in(i);
+ if( input != NULL ) { // Ignore NULLs
+ Node *new_input = _nodes[input->_idx]; // Check for cloned input node
+ if( new_input == NULL ) {
+ new_input = transform_once(input); // Check for constant
+ _nodes.map( input->_idx, new_input );// Flag as having been cloned
+ trstack.push(new_input);
+ }
+ assert( new_input == clone->in(i), "insanity check");
+ }
+ }
+ }
+ return new_node;
+}
+
+
+//------------------------------transform_once---------------------------------
+// For PhaseCCP, transformation is IDENTITY unless Node computed a constant.
+Node *PhaseCCP::transform_once( Node *n ) {
+ const Type *t = type(n);
+ // Constant? Use constant Node instead
+ if( t->singleton() ) {
+ Node *nn = n; // Default is to return the original constant
+ if( t == Type::TOP ) {
+ // cache my top node on the Compile instance
+ if( C->cached_top_node() == NULL || C->cached_top_node()->in(0) == NULL ) {
+ C->set_cached_top_node( ConNode::make(C, Type::TOP) );
+ set_type(C->top(), Type::TOP);
+ }
+ nn = C->top();
+ }
+ if( !n->is_Con() ) {
+ if( t != Type::TOP ) {
+ nn = makecon(t); // ConNode::make(t);
+ NOT_PRODUCT( inc_constants(); )
+ } else if( n->is_Region() ) { // Unreachable region
+ // Note: nn == C->top()
+ n->set_req(0, NULL); // Cut selfreference
+ // Eagerly remove dead phis to avoid phis copies creation.
+ for (DUIterator i = n->outs(); n->has_out(i); i++) {
+ Node* m = n->out(i);
+ if( m->is_Phi() ) {
+ assert(type(m) == Type::TOP, "Unreachable region should not have live phis.");
+ add_users_to_worklist(m);
+ hash_delete(m); // Yank from hash before hacking edges
+ subsume_node(m, nn);
+ --i; // deleted this phi; rescan starting with next position
+ }
+ }
+ }
+ add_users_to_worklist(n); // Users of about-to-be-constant 'n'
+ hash_delete(n); // Removed 'n' from table before subsuming it
+ subsume_node(n,nn); // Update DefUse edges for new constant
+ }
+ return nn;
+ }
+
+ // If x is a TypeNode, capture any more-precise type permanently into Node
+ if (t != n->bottom_type()) {
+ hash_delete(n); // changing bottom type may force a rehash
+ n->raise_bottom_type(t);
+ _worklist.push(n); // n re-enters the hash table via the worklist
+ }
+
+ // Idealize graph using DU info. Must clone() into new-space.
+ // DU info is generally used to show profitability, progress or safety
+ // (but generally not needed for correctness).
+ Node *nn = n->Ideal_DU_postCCP(this);
+
+ // TEMPORARY fix to ensure that 2nd GVN pass eliminates NULL checks
+ switch( n->Opcode() ) {
+ case Op_FastLock: // Revisit FastLocks for lock coarsening
+ case Op_If:
+ case Op_CountedLoopEnd:
+ case Op_Region:
+ case Op_Loop:
+ case Op_CountedLoop:
+ case Op_Conv2B:
+ case Op_Opaque1:
+ case Op_Opaque2:
+ _worklist.push(n);
+ break;
+ default:
+ break;
+ }
+ if( nn ) {
+ _worklist.push(n);
+ // Put users of 'n' onto worklist for second igvn transform
+ add_users_to_worklist(n);
+ return nn;
+ }
+
+ return n;
+}
+
+//---------------------------------saturate------------------------------------
+const Type* PhaseCCP::saturate(const Type* new_type, const Type* old_type,
+ const Type* limit_type) const {
+ const Type* wide_type = new_type->widen(old_type);
+ if (wide_type != new_type) { // did we widen?
+ // If so, we may have widened beyond the limit type. Clip it back down.
+ new_type = wide_type->filter(limit_type);
+ }
+ return new_type;
+}
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+void PhaseCCP::print_statistics() {
+ tty->print_cr("CCP: %d constants found: %d", _total_invokes, _total_constants);
+}
+#endif
+
+
+//=============================================================================
+#ifndef PRODUCT
+uint PhasePeephole::_total_peepholes = 0;
+#endif
+//------------------------------PhasePeephole----------------------------------
+// Conditional Constant Propagation, ala Wegman & Zadeck
+PhasePeephole::PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg )
+ : PhaseTransform(Peephole), _regalloc(regalloc), _cfg(cfg) {
+ NOT_PRODUCT( clear_peepholes(); )
+}
+
+#ifndef PRODUCT
+//------------------------------~PhasePeephole---------------------------------
+PhasePeephole::~PhasePeephole() {
+ _total_peepholes += count_peepholes();
+}
+#endif
+
+//------------------------------transform--------------------------------------
+Node *PhasePeephole::transform( Node *n ) {
+ ShouldNotCallThis();
+ return NULL;
+}
+
+//------------------------------do_transform-----------------------------------
+void PhasePeephole::do_transform() {
+ bool method_name_not_printed = true;
+
+ // Examine each basic block
+ for( uint block_number = 1; block_number < _cfg._num_blocks; ++block_number ) {
+ Block *block = _cfg._blocks[block_number];
+ bool block_not_printed = true;
+
+ // and each instruction within a block
+ uint end_index = block->_nodes.size();
+ // block->end_idx() not valid after PhaseRegAlloc
+ for( uint instruction_index = 1; instruction_index < end_index; ++instruction_index ) {
+ Node *n = block->_nodes.at(instruction_index);
+ if( n->is_Mach() ) {
+ MachNode *m = n->as_Mach();
+ int deleted_count = 0;
+ // check for peephole opportunities
+ MachNode *m2 = m->peephole( block, instruction_index, _regalloc, deleted_count, C );
+ if( m2 != NULL ) {
+#ifndef PRODUCT
+ if( PrintOptoPeephole ) {
+ // Print method, first time only
+ if( C->method() && method_name_not_printed ) {
+ C->method()->print_short_name(); tty->cr();
+ method_name_not_printed = false;
+ }
+ // Print this block
+ if( Verbose && block_not_printed) {
+ tty->print_cr("in block");
+ block->dump();
+ block_not_printed = false;
+ }
+ // Print instructions being deleted
+ for( int i = (deleted_count - 1); i >= 0; --i ) {
+ block->_nodes.at(instruction_index-i)->as_Mach()->format(_regalloc); tty->cr();
+ }
+ tty->print_cr("replaced with");
+ // Print new instruction
+ m2->format(_regalloc);
+ tty->print("\n\n");
+ }
+#endif
+ // Remove old nodes from basic block and update instruction_index
+ // (old nodes still exist and may have edges pointing to them
+ // as register allocation info is stored in the allocator using
+ // the node index to live range mappings.)
+ uint safe_instruction_index = (instruction_index - deleted_count);
+ for( ; (instruction_index > safe_instruction_index); --instruction_index ) {
+ block->_nodes.remove( instruction_index );
+ }
+ // install new node after safe_instruction_index
+ block->_nodes.insert( safe_instruction_index + 1, m2 );
+ end_index = block->_nodes.size() - 1; // Recompute new block size
+ NOT_PRODUCT( inc_peepholes(); )
+ }
+ }
+ }
+ }
+}
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+void PhasePeephole::print_statistics() {
+ tty->print_cr("Peephole: peephole rules applied: %d", _total_peepholes);
+}
+#endif
+
+
+//=============================================================================
+//------------------------------set_req_X--------------------------------------
+void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) {
+ assert( is_not_dead(n), "can not use dead node");
+ assert( igvn->hash_find(this) != this, "Need to remove from hash before changing edges" );
+ Node *old = in(i);
+ set_req(i, n);
+
+ // old goes dead?
+ if( old ) {
+ switch (old->outcnt()) {
+ case 0: // Kill all his inputs, and recursively kill other dead nodes.
+ if (!old->is_top())
+ igvn->remove_dead_node( old );
+ break;
+ case 1:
+ if( old->is_Store() || old->has_special_unique_user() )
+ igvn->add_users_to_worklist( old );
+ break;
+ case 2:
+ if( old->is_Store() )
+ igvn->add_users_to_worklist( old );
+ if( old->Opcode() == Op_Region )
+ igvn->_worklist.push(old);
+ break;
+ case 3:
+ if( old->Opcode() == Op_Region ) {
+ igvn->_worklist.push(old);
+ igvn->add_users_to_worklist( old );
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+}
+
+//-------------------------------replace_by-----------------------------------
+// Using def-use info, replace one node for another. Follow the def-use info
+// to all users of the OLD node. Then make all uses point to the NEW node.
+void Node::replace_by(Node *new_node) {
+ assert(!is_top(), "top node has no DU info");
+ for (DUIterator_Last imin, i = last_outs(imin); i >= imin; ) {
+ Node* use = last_out(i);
+ uint uses_found = 0;
+ for (uint j = 0; j < use->len(); j++) {
+ if (use->in(j) == this) {
+ if (j < use->req())
+ use->set_req(j, new_node);
+ else use->set_prec(j, new_node);
+ uses_found++;
+ }
+ }
+ i -= uses_found; // we deleted 1 or more copies of this edge
+ }
+}
+
+//=============================================================================
+//-----------------------------------------------------------------------------
+void Type_Array::grow( uint i ) {
+ if( !_max ) {
+ _max = 1;
+ _types = (const Type**)_a->Amalloc( _max * sizeof(Type*) );
+ _types[0] = NULL;
+ }
+ uint old = _max;
+ while( i >= _max ) _max <<= 1; // Double to fit
+ _types = (const Type**)_a->Arealloc( _types, old*sizeof(Type*),_max*sizeof(Type*));
+ memset( &_types[old], 0, (_max-old)*sizeof(Type*) );
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void Type_Array::dump() const {
+ uint max = Size();
+ for( uint i = 0; i < max; i++ ) {
+ if( _types[i] != NULL ) {
+ tty->print(" %d\t== ", i); _types[i]->dump(); tty->cr();
+ }
+ }
+}
+#endif
diff --git a/src/share/vm/opto/phaseX.hpp b/src/share/vm/opto/phaseX.hpp
new file mode 100644
index 000000000..46439c91e
--- /dev/null
+++ b/src/share/vm/opto/phaseX.hpp
@@ -0,0 +1,516 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Compile;
+class ConINode;
+class ConLNode;
+class Node;
+class Type;
+class PhaseTransform;
+class PhaseGVN;
+class PhaseIterGVN;
+class PhaseCCP;
+class PhasePeephole;
+class PhaseRegAlloc;
+
+
+//-----------------------------------------------------------------------------
+// Expandable closed hash-table of nodes, initialized to NULL.
+// Note that the constructor just zeros things
+// Storage is reclaimed when the Arena's lifetime is over.
+class NodeHash : public StackObj {
+protected:
+ Arena *_a; // Arena to allocate in
+ uint _max; // Size of table (power of 2)
+ uint _inserts; // For grow and debug, count of hash_inserts
+ uint _insert_limit; // 'grow' when _inserts reaches _insert_limit
+ Node **_table; // Hash table of Node pointers
+ Node *_sentinel; // Replaces deleted entries in hash table
+
+public:
+ NodeHash(uint est_max_size);
+ NodeHash(Arena *arena, uint est_max_size);
+ NodeHash(NodeHash *use_this_state);
+#ifdef ASSERT
+ ~NodeHash(); // Unlock all nodes upon destruction of table.
+ void operator=(const NodeHash&); // Unlock all nodes upon replacement of table.
+#endif
+ Node *hash_find(const Node*);// Find an equivalent version in hash table
+ Node *hash_find_insert(Node*);// If not in table insert else return found node
+ void hash_insert(Node*); // Insert into hash table
+ bool hash_delete(const Node*);// Replace with _sentinel in hash table
+ void check_grow() {
+ _inserts++;
+ if( _inserts == _insert_limit ) { grow(); }
+ assert( _inserts <= _insert_limit, "hash table overflow");
+ assert( _inserts < _max, "hash table overflow" );
+ }
+ static uint round_up(uint); // Round up to nearest power of 2
+ void grow(); // Grow _table to next power of 2 and rehash
+ // Return 75% of _max, rounded up.
+ uint insert_limit() const { return _max - (_max>>2); }
+
+ void clear(); // Set all entries to NULL, keep storage.
+ // Size of hash table
+ uint size() const { return _max; }
+ // Return Node* at index in table
+ Node *at(uint table_index) {
+ assert(table_index < _max, "Must be within table");
+ return _table[table_index];
+ }
+
+ void remove_useless_nodes(VectorSet &useful); // replace with sentinel
+
+ Node *sentinel() { return _sentinel; }
+
+#ifndef PRODUCT
+ Node *find_index(uint idx); // For debugging
+ void dump(); // For debugging, dump statistics
+#endif
+ uint _grows; // For debugging, count of table grow()s
+ uint _look_probes; // For debugging, count of hash probes
+ uint _lookup_hits; // For debugging, count of hash_finds
+ uint _lookup_misses; // For debugging, count of hash_finds
+ uint _insert_probes; // For debugging, count of hash probes
+ uint _delete_probes; // For debugging, count of hash probes for deletes
+ uint _delete_hits; // For debugging, count of hash probes for deletes
+ uint _delete_misses; // For debugging, count of hash probes for deletes
+ uint _total_inserts; // For debugging, total inserts into hash table
+ uint _total_insert_probes; // For debugging, total probes while inserting
+};
+
+
+//-----------------------------------------------------------------------------
+// Map dense integer indices to Types. Uses classic doubling-array trick.
+// Abstractly provides an infinite array of Type*'s, initialized to NULL.
+// Note that the constructor just zeros things, and since I use Arena
+// allocation I do not need a destructor to reclaim storage.
+// Despite the general name, this class is customized for use by PhaseTransform.
+class Type_Array : public StackObj {
+ Arena *_a; // Arena to allocate in
+ uint _max;
+ const Type **_types;
+ void grow( uint i ); // Grow array node to fit
+ const Type *operator[] ( uint i ) const // Lookup, or NULL for not mapped
+ { return (i<_max) ? _types[i] : (Type*)NULL; }
+ friend class PhaseTransform;
+public:
+ Type_Array(Arena *a) : _a(a), _max(0), _types(0) {}
+ Type_Array(Type_Array *ta) : _a(ta->_a), _max(ta->_max), _types(ta->_types) { }
+ const Type *fast_lookup(uint i) const{assert(i<_max,"oob");return _types[i];}
+ // Extend the mapping: index i maps to Type *n.
+ void map( uint i, const Type *n ) { if( i>=_max ) grow(i); _types[i] = n; }
+ uint Size() const { return _max; }
+#ifndef PRODUCT
+ void dump() const;
+#endif
+};
+
+
+//------------------------------PhaseRemoveUseless-----------------------------
+// Remove useless nodes from GVN hash-table, worklist, and graph
+class PhaseRemoveUseless : public Phase {
+protected:
+ Unique_Node_List _useful; // Nodes reachable from root
+ // list is allocated from current resource area
+public:
+ PhaseRemoveUseless( PhaseGVN *gvn, Unique_Node_List *worklist );
+
+ Unique_Node_List *get_useful() { return &_useful; }
+};
+
+
+//------------------------------PhaseTransform---------------------------------
+// Phases that analyze, then transform. Constructing the Phase object does any
+// global or slow analysis. The results are cached later for a fast
+// transformation pass. When the Phase object is deleted the cached analysis
+// results are deleted.
+class PhaseTransform : public Phase {
+protected:
+ Arena* _arena;
+ Node_Array _nodes; // Map old node indices to new nodes.
+ Type_Array _types; // Map old node indices to Types.
+
+ // ConNode caches:
+ enum { _icon_min = -1 * HeapWordSize,
+ _icon_max = 16 * HeapWordSize,
+ _lcon_min = _icon_min,
+ _lcon_max = _icon_max,
+ _zcon_max = (uint)T_CONFLICT
+ };
+ ConINode* _icons[_icon_max - _icon_min + 1]; // cached jint constant nodes
+ ConLNode* _lcons[_lcon_max - _lcon_min + 1]; // cached jlong constant nodes
+ ConNode* _zcons[_zcon_max + 1]; // cached is_zero_type nodes
+ void init_con_caches();
+
+ // Support both int and long caches because either might be an intptr_t,
+ // so they show up frequently in address computations.
+
+public:
+ PhaseTransform( PhaseNumber pnum );
+ PhaseTransform( Arena *arena, PhaseNumber pnum );
+ PhaseTransform( PhaseTransform *phase, PhaseNumber pnum );
+
+ Arena* arena() { return _arena; }
+ Type_Array& types() { return _types; }
+ // _nodes is used in varying ways by subclasses, which define local accessors
+
+public:
+ // Get a previously recorded type for the node n.
+ // This type must already have been recorded.
+ // If you want the type of a very new (untransformed) node,
+ // you must use type_or_null, and test the result for NULL.
+ const Type* type(const Node* n) const {
+ const Type* t = _types.fast_lookup(n->_idx);
+ assert(t != NULL, "must set before get");
+ return t;
+ }
+ // Get a previously recorded type for the node n,
+ // or else return NULL if there is none.
+ const Type* type_or_null(const Node* n) const {
+ return _types.fast_lookup(n->_idx);
+ }
+ // Record a type for a node.
+ void set_type(const Node* n, const Type *t) {
+ assert(t != NULL, "type must not be null");
+ _types.map(n->_idx, t);
+ }
+ // Record an initial type for a node, the node's bottom type.
+ void set_type_bottom(const Node* n) {
+ // Use this for initialization when bottom_type() (or better) is not handy.
+ // Usually the initialization shoudl be to n->Value(this) instead,
+ // or a hand-optimized value like Type::MEMORY or Type::CONTROL.
+ assert(_types[n->_idx] == NULL, "must set the initial type just once");
+ _types.map(n->_idx, n->bottom_type());
+ }
+ // Make sure the types array is big enough to record a size for the node n.
+ // (In product builds, we never want to do range checks on the types array!)
+ void ensure_type_or_null(const Node* n) {
+ if (n->_idx >= _types.Size())
+ _types.map(n->_idx, NULL); // Grow the types array as needed.
+ }
+
+ // Utility functions:
+ const TypeInt* find_int_type( Node* n);
+ const TypeLong* find_long_type(Node* n);
+ jint find_int_con( Node* n, jint value_if_unknown) {
+ const TypeInt* t = find_int_type(n);
+ return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+ }
+ jlong find_long_con(Node* n, jlong value_if_unknown) {
+ const TypeLong* t = find_long_type(n);
+ return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+ }
+
+ // Make an idealized constant, i.e., one of ConINode, ConPNode, ConFNode, etc.
+ // Same as transform(ConNode::make(t)).
+ ConNode* makecon(const Type* t);
+ virtual ConNode* uncached_makecon(const Type* t) // override in PhaseValues
+ { ShouldNotCallThis(); return NULL; }
+
+ // Fast int or long constant. Same as TypeInt::make(i) or TypeLong::make(l).
+ ConINode* intcon(jint i);
+ ConLNode* longcon(jlong l);
+
+ // Fast zero or null constant. Same as makecon(Type::get_zero_type(bt)).
+ ConNode* zerocon(BasicType bt);
+
+ // Return a node which computes the same function as this node, but
+ // in a faster or cheaper fashion.
+ virtual Node *transform( Node *n ) = 0;
+
+ // Return whether two Nodes are equivalent.
+ // Must not be recursive, since the recursive version is built from this.
+ // For pessimistic optimizations this is simply pointer equivalence.
+ bool eqv(const Node* n1, const Node* n2) const { return n1 == n2; }
+
+ // Return whether two Nodes are equivalent, after stripping casting.
+ bool eqv_uncast(const Node* n1, const Node* n2) const {
+ return eqv(n1->uncast(), n2->uncast());
+ }
+
+ // For pessimistic passes, the return type must monotonically narrow.
+ // For optimistic passes, the return type must monotonically widen.
+ // It is possible to get into a "death march" in either type of pass,
+ // where the types are continually moving but it will take 2**31 or
+ // more steps to converge. This doesn't happen on most normal loops.
+ //
+ // Here is an example of a deadly loop for an optimistic pass, along
+ // with a partial trace of inferred types:
+ // x = phi(0,x'); L: x' = x+1; if (x' >= 0) goto L;
+ // 0 1 join([0..max], 1)
+ // [0..1] [1..2] join([0..max], [1..2])
+ // [0..2] [1..3] join([0..max], [1..3])
+ // ... ... ...
+ // [0..max] [min]u[1..max] join([0..max], [min..max])
+ // [0..max] ==> fixpoint
+ // We would have proven, the hard way, that the iteration space is all
+ // non-negative ints, with the loop terminating due to 32-bit overflow.
+ //
+ // Here is the corresponding example for a pessimistic pass:
+ // x = phi(0,x'); L: x' = x-1; if (x' >= 0) goto L;
+ // int int join([0..max], int)
+ // [0..max] [-1..max-1] join([0..max], [-1..max-1])
+ // [0..max-1] [-1..max-2] join([0..max], [-1..max-2])
+ // ... ... ...
+ // [0..1] [-1..0] join([0..max], [-1..0])
+ // 0 -1 join([0..max], -1)
+ // 0 == fixpoint
+ // We would have proven, the hard way, that the iteration space is {0}.
+ // (Usually, other optimizations will make the "if (x >= 0)" fold up
+ // before we get into trouble. But not always.)
+ //
+ // It's a pleasant thing to observe that the pessimistic pass
+ // will make short work of the optimistic pass's deadly loop,
+ // and vice versa. That is a good example of the complementary
+ // purposes of the CCP (optimistic) vs. GVN (pessimistic) phases.
+ //
+ // In any case, only widen or narrow a few times before going to the
+ // correct flavor of top or bottom.
+ //
+ // This call only needs to be made once as the data flows around any
+ // given cycle. We do it at Phis, and nowhere else.
+ // The types presented are the new type of a phi (computed by PhiNode::Value)
+ // and the previously computed type, last time the phi was visited.
+ //
+ // The third argument is upper limit for the saturated value,
+ // if the phase wishes to widen the new_type.
+ // If the phase is narrowing, the old type provides a lower limit.
+ // Caller guarantees that old_type and new_type are no higher than limit_type.
+ virtual const Type* saturate(const Type* new_type, const Type* old_type,
+ const Type* limit_type) const
+ { ShouldNotCallThis(); return NULL; }
+
+#ifndef PRODUCT
+ void dump_old2new_map() const;
+ void dump_new( uint new_lidx ) const;
+ void dump_types() const;
+ void dump_nodes_and_types(const Node *root, uint depth, bool only_ctrl = true);
+ void dump_nodes_and_types_recur( const Node *n, uint depth, bool only_ctrl, VectorSet &visited);
+
+ uint _count_progress; // For profiling, count transforms that make progress
+ void set_progress() { ++_count_progress; assert( allow_progress(),"No progress allowed during verification") }
+ void clear_progress() { _count_progress = 0; }
+ uint made_progress() const { return _count_progress; }
+
+ uint _count_transforms; // For profiling, count transforms performed
+ void set_transforms() { ++_count_transforms; }
+ void clear_transforms() { _count_transforms = 0; }
+ uint made_transforms() const{ return _count_transforms; }
+
+ bool _allow_progress; // progress not allowed during verification pass
+ void set_allow_progress(bool allow) { _allow_progress = allow; }
+ bool allow_progress() { return _allow_progress; }
+#endif
+};
+
+//------------------------------PhaseValues------------------------------------
+// Phase infrastructure to support values
+class PhaseValues : public PhaseTransform {
+protected:
+ NodeHash _table; // Hash table for value-numbering
+
+public:
+ PhaseValues( Arena *arena, uint est_max_size );
+ PhaseValues( PhaseValues *pt );
+ PhaseValues( PhaseValues *ptv, const char *dummy );
+ NOT_PRODUCT( ~PhaseValues(); )
+ virtual PhaseIterGVN *is_IterGVN() { return 0; }
+
+ // Some Ideal and other transforms delete --> modify --> insert values
+ bool hash_delete(Node *n) { return _table.hash_delete(n); }
+ void hash_insert(Node *n) { _table.hash_insert(n); }
+ Node *hash_find_insert(Node *n){ return _table.hash_find_insert(n); }
+ Node *hash_find(const Node *n) { return _table.hash_find(n); }
+
+ // Used after parsing to eliminate values that are no longer in program
+ void remove_useless_nodes(VectorSet &useful) { _table.remove_useless_nodes(useful); }
+
+ virtual ConNode* uncached_makecon(const Type* t); // override from PhaseTransform
+
+ virtual const Type* saturate(const Type* new_type, const Type* old_type,
+ const Type* limit_type) const
+ { return new_type; }
+
+#ifndef PRODUCT
+ uint _count_new_values; // For profiling, count new values produced
+ void inc_new_values() { ++_count_new_values; }
+ void clear_new_values() { _count_new_values = 0; }
+ uint made_new_values() const { return _count_new_values; }
+#endif
+};
+
+
+//------------------------------PhaseGVN---------------------------------------
+// Phase for performing local, pessimistic GVN-style optimizations.
+class PhaseGVN : public PhaseValues {
+public:
+ PhaseGVN( Arena *arena, uint est_max_size ) : PhaseValues( arena, est_max_size ) {}
+ PhaseGVN( PhaseGVN *gvn ) : PhaseValues( gvn ) {}
+ PhaseGVN( PhaseGVN *gvn, const char *dummy ) : PhaseValues( gvn, dummy ) {}
+
+ // Return a node which computes the same function as this node, but
+ // in a faster or cheaper fashion.
+ Node *transform( Node *n );
+ Node *transform_no_reclaim( Node *n );
+
+ // Check for a simple dead loop when a data node references itself.
+ DEBUG_ONLY(void dead_loop_check(Node *n);)
+};
+
+//------------------------------PhaseIterGVN-----------------------------------
+// Phase for iteratively performing local, pessimistic GVN-style optimizations.
+// and ideal transformations on the graph.
+class PhaseIterGVN : public PhaseGVN {
+ // Idealize old Node 'n' with respect to its inputs and its value
+ virtual Node *transform_old( Node *a_node );
+protected:
+
+ // Idealize new Node 'n' with respect to its inputs and its value
+ virtual Node *transform( Node *a_node );
+
+ // Warm up hash table, type table and initial worklist
+ void init_worklist( Node *a_root );
+
+ virtual const Type* saturate(const Type* new_type, const Type* old_type,
+ const Type* limit_type) const;
+ // Usually returns new_type. Returns old_type if new_type is only a slight
+ // improvement, such that it would take many (>>10) steps to reach 2**32.
+
+public:
+ PhaseIterGVN( PhaseIterGVN *igvn ); // Used by CCP constructor
+ PhaseIterGVN( PhaseGVN *gvn ); // Used after Parser
+ PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ); // Used after +VerifyOpto
+
+ virtual PhaseIterGVN *is_IterGVN() { return this; }
+
+ Unique_Node_List _worklist; // Iterative worklist
+
+ // Given def-use info and an initial worklist, apply Node::Ideal,
+ // Node::Value, Node::Identity, hash-based value numbering, Node::Ideal_DU
+ // and dominator info to a fixed point.
+ void optimize();
+
+ // Register a new node with the iter GVN pass without transforming it.
+ // Used when we need to restructure a Region/Phi area and all the Regions
+ // and Phis need to complete this one big transform before any other
+ // transforms can be triggered on the region.
+ // Optional 'orig' is an earlier version of this node.
+ // It is significant only for debugging and profiling.
+ Node* register_new_node_with_optimizer(Node* n, Node* orig = NULL);
+
+ // Kill a globally dead Node. It is allowed to have uses which are
+ // assumed dead and left 'in limbo'.
+ void remove_globally_dead_node( Node *dead );
+
+ // Kill all inputs to a dead node, recursively making more dead nodes.
+ // The Node must be dead locally, i.e., have no uses.
+ void remove_dead_node( Node *dead ) {
+ assert(dead->outcnt() == 0 && !dead->is_top(), "node must be dead");
+ remove_globally_dead_node(dead);
+ }
+
+ // Subsume users of node 'old' into node 'nn'
+ // If no Def-Use info existed for 'nn' it will after call.
+ void subsume_node( Node *old, Node *nn );
+
+ // Add users of 'n' to worklist
+ void add_users_to_worklist0( Node *n );
+ void add_users_to_worklist ( Node *n );
+
+#ifndef PRODUCT
+protected:
+ // Sub-quadratic implementation of VerifyIterativeGVN.
+ unsigned long _verify_counter;
+ unsigned long _verify_full_passes;
+ enum { _verify_window_size = 30 };
+ Node* _verify_window[_verify_window_size];
+ void verify_step(Node* n);
+#endif
+};
+
+//------------------------------PhaseCCP---------------------------------------
+// Phase for performing global Conditional Constant Propagation.
+// Should be replaced with combined CCP & GVN someday.
+class PhaseCCP : public PhaseIterGVN {
+ // Non-recursive. Use analysis to transform single Node.
+ virtual Node *transform_once( Node *n );
+
+public:
+ PhaseCCP( PhaseIterGVN *igvn ); // Compute conditional constants
+ NOT_PRODUCT( ~PhaseCCP(); )
+
+ // Worklist algorithm identifies constants
+ void analyze();
+ // Recursive traversal of program. Used analysis to modify program.
+ virtual Node *transform( Node *n );
+ // Do any transformation after analysis
+ void do_transform();
+
+ virtual const Type* saturate(const Type* new_type, const Type* old_type,
+ const Type* limit_type) const;
+ // Returns new_type->widen(old_type), which increments the widen bits until
+ // giving up with TypeInt::INT or TypeLong::LONG.
+ // Result is clipped to limit_type if necessary.
+
+#ifndef PRODUCT
+ static uint _total_invokes; // For profiling, count invocations
+ void inc_invokes() { ++PhaseCCP::_total_invokes; }
+
+ static uint _total_constants; // For profiling, count constants found
+ uint _count_constants;
+ void clear_constants() { _count_constants = 0; }
+ void inc_constants() { ++_count_constants; }
+ uint count_constants() const { return _count_constants; }
+
+ static void print_statistics();
+#endif
+};
+
+
+//------------------------------PhasePeephole----------------------------------
+// Phase for performing peephole optimizations on register allocated basic blocks.
+class PhasePeephole : public PhaseTransform {
+ PhaseRegAlloc *_regalloc;
+ PhaseCFG &_cfg;
+ // Recursive traversal of program. Pure function is unused in this phase
+ virtual Node *transform( Node *n );
+
+public:
+ PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg );
+ NOT_PRODUCT( ~PhasePeephole(); )
+
+ // Do any transformation after analysis
+ void do_transform();
+
+#ifndef PRODUCT
+ static uint _total_peepholes; // For profiling, count peephole rules applied
+ uint _count_peepholes;
+ void clear_peepholes() { _count_peepholes = 0; }
+ void inc_peepholes() { ++_count_peepholes; }
+ uint count_peepholes() const { return _count_peepholes; }
+
+ static void print_statistics();
+#endif
+};
diff --git a/src/share/vm/opto/postaloc.cpp b/src/share/vm/opto/postaloc.cpp
new file mode 100644
index 000000000..35b469bae
--- /dev/null
+++ b/src/share/vm/opto/postaloc.cpp
@@ -0,0 +1,584 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_postaloc.cpp.incl"
+
+// see if this register kind does not requires two registers
+static bool is_single_register(uint x) {
+#ifdef _LP64
+ return (x != Op_RegD && x != Op_RegL && x != Op_RegP);
+#else
+ return (x != Op_RegD && x != Op_RegL);
+#endif
+}
+
+//------------------------------may_be_copy_of_callee-----------------------------
+// Check to see if we can possibly be a copy of a callee-save value.
+bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
+ // Short circuit if there are no callee save registers
+ if (_matcher.number_of_saved_registers() == 0) return false;
+
+ // Expect only a spill-down and reload on exit for callee-save spills.
+ // Chains of copies cannot be deep.
+ // 5008997 - This is wishful thinking. Register allocator seems to
+ // be splitting live ranges for callee save registers to such
+ // an extent that in large methods the chains can be very long
+ // (50+). The conservative answer is to return true if we don't
+ // know as this prevents optimizations from occuring.
+
+ const int limit = 60;
+ int i;
+ for( i=0; i < limit; i++ ) {
+ if( def->is_Proj() && def->in(0)->is_Start() &&
+ _matcher.is_save_on_entry(lrgs(n2lidx(def)).reg()) )
+ return true; // Direct use of callee-save proj
+ if( def->is_Copy() ) // Copies carry value through
+ def = def->in(def->is_Copy());
+ else if( def->is_Phi() ) // Phis can merge it from any direction
+ def = def->in(1);
+ else
+ break;
+ guarantee(def != NULL, "must not resurrect dead copy");
+ }
+ // If we reached the end and didn't find a callee save proj
+ // then this may be a callee save proj so we return true
+ // as the conservative answer. If we didn't reach then end
+ // we must have discovered that it was not a callee save
+ // else we would have returned.
+ return i == limit;
+}
+
+
+
+//------------------------------yank_if_dead-----------------------------------
+// Removed an edge from 'old'. Yank if dead. Return adjustment counts to
+// iterators in the current block.
+int PhaseChaitin::yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+ int blk_adjust=0;
+ while (old->outcnt() == 0 && old != C->top()) {
+ Block *oldb = _cfg._bbs[old->_idx];
+ oldb->find_remove(old);
+ // Count 1 if deleting an instruction from the current block
+ if( oldb == current_block ) blk_adjust++;
+ _cfg._bbs.map(old->_idx,NULL);
+ OptoReg::Name old_reg = lrgs(n2lidx(old)).reg();
+ if( regnd && (*regnd)[old_reg]==old ) { // Instruction is currently available?
+ value->map(old_reg,NULL); // Yank from value/regnd maps
+ regnd->map(old_reg,NULL); // This register's value is now unknown
+ }
+ Node *tmp = old->req() > 1 ? old->in(1) : NULL;
+ old->disconnect_inputs(NULL);
+ if( !tmp ) break;
+ old = tmp;
+ }
+ return blk_adjust;
+}
+
+//------------------------------use_prior_register-----------------------------
+// Use the prior value instead of the current value, in an effort to make
+// the current value go dead. Return block iterator adjustment, in case
+// we yank some instructions from this block.
+int PhaseChaitin::use_prior_register( Node *n, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd ) {
+ // No effect?
+ if( def == n->in(idx) ) return 0;
+ // Def is currently dead and can be removed? Do not resurrect
+ if( def->outcnt() == 0 ) return 0;
+
+ // Not every pair of physical registers are assignment compatible,
+ // e.g. on sparc floating point registers are not assignable to integer
+ // registers.
+ const LRG &def_lrg = lrgs(n2lidx(def));
+ OptoReg::Name def_reg = def_lrg.reg();
+ const RegMask &use_mask = n->in_RegMask(idx);
+ bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
+ : (use_mask.is_AllStack() != 0));
+ // Check for a copy to or from a misaligned pair.
+ can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair();
+
+ if (!can_use)
+ return 0;
+
+ // Capture the old def in case it goes dead...
+ Node *old = n->in(idx);
+
+ // Save-on-call copies can only be elided if the entire copy chain can go
+ // away, lest we get the same callee-save value alive in 2 locations at
+ // once. We check for the obvious trivial case here. Although it can
+ // sometimes be elided with cooperation outside our scope, here we will just
+ // miss the opportunity. :-(
+ if( may_be_copy_of_callee(def) ) {
+ if( old->outcnt() > 1 ) return 0; // We're the not last user
+ int idx = old->is_Copy();
+ assert( idx, "chain of copies being removed" );
+ Node *old2 = old->in(idx); // Chain of copies
+ if( old2->outcnt() > 1 ) return 0; // old is not the last user
+ int idx2 = old2->is_Copy();
+ if( !idx2 ) return 0; // Not a chain of 2 copies
+ if( def != old2->in(idx2) ) return 0; // Chain of exactly 2 copies
+ }
+
+ // Use the new def
+ n->set_req(idx,def);
+ _post_alloc++;
+
+ // Is old def now dead? We successfully yanked a copy?
+ return yank_if_dead(old,current_block,&value,&regnd);
+}
+
+
+//------------------------------skip_copies------------------------------------
+// Skip through any number of copies (that don't mod oop-i-ness)
+Node *PhaseChaitin::skip_copies( Node *c ) {
+ int idx = c->is_Copy();
+ uint is_oop = lrgs(n2lidx(c))._is_oop;
+ while (idx != 0) {
+ guarantee(c->in(idx) != NULL, "must not resurrect dead copy");
+ if (lrgs(n2lidx(c->in(idx)))._is_oop != is_oop)
+ break; // casting copy, not the same value
+ c = c->in(idx);
+ idx = c->is_Copy();
+ }
+ return c;
+}
+
+//------------------------------elide_copy-------------------------------------
+// Remove (bypass) copies along Node n, edge k.
+int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs ) {
+ int blk_adjust = 0;
+
+ uint nk_idx = n2lidx(n->in(k));
+ OptoReg::Name nk_reg = lrgs(nk_idx ).reg();
+
+ // Remove obvious same-register copies
+ Node *x = n->in(k);
+ int idx;
+ while( (idx=x->is_Copy()) != 0 ) {
+ Node *copy = x->in(idx);
+ guarantee(copy != NULL, "must not resurrect dead copy");
+ if( lrgs(n2lidx(copy)).reg() != nk_reg ) break;
+ blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
+ if( n->in(k) != copy ) break; // Failed for some cutout?
+ x = copy; // Progress, try again
+ }
+
+ // Phis and 2-address instructions cannot change registers so easily - their
+ // outputs must match their input.
+ if( !can_change_regs )
+ return blk_adjust; // Only check stupid copies!
+
+ // Loop backedges won't have a value-mapping yet
+ if( &value == NULL ) return blk_adjust;
+
+ // Skip through all copies to the _value_ being used. Do not change from
+ // int to pointer. This attempts to jump through a chain of copies, where
+ // intermediate copies might be illegal, i.e., value is stored down to stack
+ // then reloaded BUT survives in a register the whole way.
+ Node *val = skip_copies(n->in(k));
+
+ if( val == x ) return blk_adjust; // No progress?
+
+ bool single = is_single_register(val->ideal_reg());
+ uint val_idx = n2lidx(val);
+ OptoReg::Name val_reg = lrgs(val_idx).reg();
+
+ // See if it happens to already be in the correct register!
+ // (either Phi's direct register, or the common case of the name
+ // never-clobbered original-def register)
+ if( value[val_reg] == val &&
+ // Doubles check both halves
+ ( single || value[val_reg-1] == val ) ) {
+ blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
+ if( n->in(k) == regnd[val_reg] ) // Success! Quit trying
+ return blk_adjust;
+ }
+
+ // See if we can skip the copy by changing registers. Don't change from
+ // using a register to using the stack unless we know we can remove a
+ // copy-load. Otherwise we might end up making a pile of Intel cisc-spill
+ // ops reading from memory instead of just loading once and using the
+ // register.
+
+ // Also handle duplicate copies here.
+ const Type *t = val->is_Con() ? val->bottom_type() : NULL;
+
+ // Scan all registers to see if this value is around already
+ for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
+ Node *vv = value[reg];
+ if( !single ) { // Doubles check for aligned-adjacent pair
+ if( (reg&1)==0 ) continue; // Wrong half of a pair
+ if( vv != value[reg-1] ) continue; // Not a complete pair
+ }
+ if( vv == val || // Got a direct hit?
+ (t && vv && vv->bottom_type() == t && vv->is_Mach() &&
+ vv->as_Mach()->rule() == val->as_Mach()->rule()) ) { // Or same constant?
+ assert( !n->is_Phi(), "cannot change registers at a Phi so easily" );
+ if( OptoReg::is_stack(nk_reg) || // CISC-loading from stack OR
+ OptoReg::is_reg(reg) || // turning into a register use OR
+ regnd[reg]->outcnt()==1 ) { // last use of a spill-load turns into a CISC use
+ blk_adjust += use_prior_register(n,k,regnd[reg],current_block,value,regnd);
+ if( n->in(k) == regnd[reg] ) // Success! Quit trying
+ return blk_adjust;
+ } // End of if not degrading to a stack
+ } // End of if found value in another register
+ } // End of scan all machine registers
+ return blk_adjust;
+}
+
+
+//
+// Check if nreg already contains the constant value val. Normal copy
+// elimination doesn't doesn't work on constants because multiple
+// nodes can represent the same constant so the type and rule of the
+// MachNode must be checked to ensure equivalence.
+//
+bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Block *current_block,
+ Node_List& value, Node_List& regnd,
+ OptoReg::Name nreg, OptoReg::Name nreg2) {
+ if (value[nreg] != val && val->is_Con() &&
+ value[nreg] != NULL && value[nreg]->is_Con() &&
+ (nreg2 == OptoReg::Bad || value[nreg] == value[nreg2]) &&
+ value[nreg]->bottom_type() == val->bottom_type() &&
+ value[nreg]->as_Mach()->rule() == val->as_Mach()->rule()) {
+ // This code assumes that two MachNodes representing constants
+ // which have the same rule and the same bottom type will produce
+ // identical effects into a register. This seems like it must be
+ // objectively true unless there are hidden inputs to the nodes
+ // but if that were to change this code would need to updated.
+ // Since they are equivalent the second one if redundant and can
+ // be removed.
+ //
+ // val will be replaced with the old value but val might have
+ // kills projections associated with it so remove them now so that
+ // yank_if_dead will be able to elminate the copy once the uses
+ // have been transferred to the old[value].
+ for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
+ Node* use = val->fast_out(i);
+ if (use->is_Proj() && use->outcnt() == 0) {
+ // Kill projections have no users and one input
+ use->set_req(0, C->top());
+ yank_if_dead(use, current_block, &value, &regnd);
+ --i; --imax;
+ }
+ }
+ _post_alloc++;
+ return true;
+ }
+ return false;
+}
+
+
+//------------------------------post_allocate_copy_removal---------------------
+// Post-Allocation peephole copy removal. We do this in 1 pass over the
+// basic blocks. We maintain a mapping of registers to Nodes (an array of
+// Nodes indexed by machine register or stack slot number). NULL means that a
+// register is not mapped to any Node. We can (want to have!) have several
+// registers map to the same Node. We walk forward over the instructions
+// updating the mapping as we go. At merge points we force a NULL if we have
+// to merge 2 different Nodes into the same register. Phi functions will give
+// us a new Node if there is a proper value merging. Since the blocks are
+// arranged in some RPO, we will visit all parent blocks before visiting any
+// successor blocks (except at loops).
+//
+// If we find a Copy we look to see if the Copy's source register is a stack
+// slot and that value has already been loaded into some machine register; if
+// so we use machine register directly. This turns a Load into a reg-reg
+// Move. We also look for reloads of identical constants.
+//
+// When we see a use from a reg-reg Copy, we will attempt to use the copy's
+// source directly and make the copy go dead.
+void PhaseChaitin::post_allocate_copy_removal() {
+ NOT_PRODUCT( Compile::TracePhase t3("postAllocCopyRemoval", &_t_postAllocCopyRemoval, TimeCompiler); )
+ ResourceMark rm;
+
+ // Need a mapping from basic block Node_Lists. We need a Node_List to
+ // map from register number to value-producing Node.
+ Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
+ memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
+ // Need a mapping from basic block Node_Lists. We need a Node_List to
+ // map from register number to register-defining Node.
+ Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
+ memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
+
+ // We keep unused Node_Lists on a free_list to avoid wasting
+ // memory.
+ GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16);
+
+ // For all blocks
+ for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+ uint j;
+ Block *b = _cfg._blocks[i];
+
+ // Count of Phis in block
+ uint phi_dex;
+ for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) {
+ Node *phi = b->_nodes[phi_dex];
+ if( !phi->is_Phi() )
+ break;
+ }
+
+ // If any predecessor has not been visited, we do not know the state
+ // of registers at the start. Check for this, while updating copies
+ // along Phi input edges
+ bool missing_some_inputs = false;
+ Block *freed = NULL;
+ for( j = 1; j < b->num_preds(); j++ ) {
+ Block *pb = _cfg._bbs[b->pred(j)->_idx];
+ // Remove copies along phi edges
+ for( uint k=1; k<phi_dex; k++ )
+ elide_copy( b->_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false );
+ if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge?
+ // See if this predecessor's mappings have been used by everybody
+ // who wants them. If so, free 'em.
+ uint k;
+ for( k=0; k<pb->_num_succs; k++ ) {
+ Block *pbsucc = pb->_succs[k];
+ if( !blk2value[pbsucc->_pre_order] && pbsucc != b )
+ break; // Found a future user
+ }
+ if( k >= pb->_num_succs ) { // No more uses, free!
+ freed = pb; // Record last block freed
+ free_list.push(blk2value[pb->_pre_order]);
+ free_list.push(blk2regnd[pb->_pre_order]);
+ }
+ } else { // This block has unvisited (loopback) inputs
+ missing_some_inputs = true;
+ }
+ }
+
+
+ // Extract Node_List mappings. If 'freed' is non-zero, we just popped
+ // 'freed's blocks off the list
+ Node_List &regnd = *(free_list.is_empty() ? new Node_List() : free_list.pop());
+ Node_List &value = *(free_list.is_empty() ? new Node_List() : free_list.pop());
+ assert( !freed || blk2value[freed->_pre_order] == &value, "" );
+ value.map(_max_reg,NULL);
+ regnd.map(_max_reg,NULL);
+ // Set mappings as OUR mappings
+ blk2value[b->_pre_order] = &value;
+ blk2regnd[b->_pre_order] = &regnd;
+
+ // Initialize value & regnd for this block
+ if( missing_some_inputs ) {
+ // Some predecessor has not yet been visited; zap map to empty
+ for( uint k = 0; k < (uint)_max_reg; k++ ) {
+ value.map(k,NULL);
+ regnd.map(k,NULL);
+ }
+ } else {
+ if( !freed ) { // Didn't get a freebie prior block
+ // Must clone some data
+ freed = _cfg._bbs[b->pred(1)->_idx];
+ Node_List &f_value = *blk2value[freed->_pre_order];
+ Node_List &f_regnd = *blk2regnd[freed->_pre_order];
+ for( uint k = 0; k < (uint)_max_reg; k++ ) {
+ value.map(k,f_value[k]);
+ regnd.map(k,f_regnd[k]);
+ }
+ }
+ // Merge all inputs together, setting to NULL any conflicts.
+ for( j = 1; j < b->num_preds(); j++ ) {
+ Block *pb = _cfg._bbs[b->pred(j)->_idx];
+ if( pb == freed ) continue; // Did self already via freelist
+ Node_List &p_regnd = *blk2regnd[pb->_pre_order];
+ for( uint k = 0; k < (uint)_max_reg; k++ ) {
+ if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs?
+ value.map(k,NULL); // Then no value handy
+ regnd.map(k,NULL);
+ }
+ }
+ }
+ }
+
+ // For all Phi's
+ for( j = 1; j < phi_dex; j++ ) {
+ uint k;
+ Node *phi = b->_nodes[j];
+ uint pidx = n2lidx(phi);
+ OptoReg::Name preg = lrgs(n2lidx(phi)).reg();
+
+ // Remove copies remaining on edges. Check for junk phi.
+ Node *u = NULL;
+ for( k=1; k<phi->req(); k++ ) {
+ Node *x = phi->in(k);
+ if( phi != x && u != x ) // Found a different input
+ u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input
+ }
+ if( u != NodeSentinel ) { // Junk Phi. Remove
+ b->_nodes.remove(j--); phi_dex--;
+ _cfg._bbs.map(phi->_idx,NULL);
+ phi->replace_by(u);
+ phi->disconnect_inputs(NULL);
+ continue;
+ }
+ // Note that if value[pidx] exists, then we merged no new values here
+ // and the phi is useless. This can happen even with the above phi
+ // removal for complex flows. I cannot keep the better known value here
+ // because locally the phi appears to define a new merged value. If I
+ // keep the better value then a copy of the phi, being unable to use the
+ // global flow analysis, can't "peek through" the phi to the original
+ // reaching value and so will act like it's defining a new value. This
+ // can lead to situations where some uses are from the old and some from
+ // the new values. Not illegal by itself but throws the over-strong
+ // assert in scheduling.
+ if( pidx ) {
+ value.map(preg,phi);
+ regnd.map(preg,phi);
+ OptoReg::Name preg_lo = OptoReg::add(preg,-1);
+ if( !is_single_register(phi->ideal_reg()) ) {
+ value.map(preg_lo,phi);
+ regnd.map(preg_lo,phi);
+ }
+ }
+ }
+
+ // For all remaining instructions
+ for( j = phi_dex; j < b->_nodes.size(); j++ ) {
+ Node *n = b->_nodes[j];
+
+ if( n->outcnt() == 0 && // Dead?
+ n != C->top() && // (ignore TOP, it has no du info)
+ !n->is_Proj() ) { // fat-proj kills
+ j -= yank_if_dead(n,b,&value,&regnd);
+ continue;
+ }
+
+ // Improve reaching-def info. Occasionally post-alloc's liveness gives
+ // up (at loop backedges, because we aren't doing a full flow pass).
+ // The presence of a live use essentially asserts that the use's def is
+ // alive and well at the use (or else the allocator fubar'd). Take
+ // advantage of this info to set a reaching def for the use-reg.
+ uint k;
+ for( k = 1; k < n->req(); k++ ) {
+ Node *def = n->in(k); // n->in(k) is a USE; def is the DEF for this USE
+ guarantee(def != NULL, "no disconnected nodes at this point");
+ uint useidx = n2lidx(def); // useidx is the live range index for this USE
+
+ if( useidx ) {
+ OptoReg::Name ureg = lrgs(useidx).reg();
+ if( !value[ureg] ) {
+ int idx; // Skip occasional useless copy
+ while( (idx=def->is_Copy()) != 0 &&
+ def->in(idx) != NULL && // NULL should not happen
+ ureg == lrgs(n2lidx(def->in(idx))).reg() )
+ def = def->in(idx);
+ Node *valdef = skip_copies(def); // tighten up val through non-useless copies
+ value.map(ureg,valdef); // record improved reaching-def info
+ regnd.map(ureg, def);
+ // Record other half of doubles
+ OptoReg::Name ureg_lo = OptoReg::add(ureg,-1);
+ if( !is_single_register(def->ideal_reg()) &&
+ ( !RegMask::can_represent(ureg_lo) ||
+ lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent
+ !value[ureg_lo] ) {
+ value.map(ureg_lo,valdef); // record improved reaching-def info
+ regnd.map(ureg_lo, def);
+ }
+ }
+ }
+ }
+
+ const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0;
+
+ // Remove copies along input edges
+ for( k = 1; k < n->req(); k++ )
+ j -= elide_copy( n, k, b, value, regnd, two_adr!=k );
+
+ // Unallocated Nodes define no registers
+ uint lidx = n2lidx(n);
+ if( !lidx ) continue;
+
+ // Update the register defined by this instruction
+ OptoReg::Name nreg = lrgs(lidx).reg();
+ // Skip through all copies to the _value_ being defined.
+ // Do not change from int to pointer
+ Node *val = skip_copies(n);
+
+ uint n_ideal_reg = n->ideal_reg();
+ if( is_single_register(n_ideal_reg) ) {
+ // If Node 'n' does not change the value mapped by the register,
+ // then 'n' is a useless copy. Do not update the register->node
+ // mapping so 'n' will go dead.
+ if( value[nreg] != val ) {
+ if (eliminate_copy_of_constant(val, b, value, regnd, nreg, OptoReg::Bad)) {
+ n->replace_by(regnd[nreg]);
+ j -= yank_if_dead(n,b,&value,&regnd);
+ } else {
+ // Update the mapping: record new Node defined by the register
+ regnd.map(nreg,n);
+ // Update mapping for defined *value*, which is the defined
+ // Node after skipping all copies.
+ value.map(nreg,val);
+ }
+ } else if( !may_be_copy_of_callee(n) && regnd[nreg]->outcnt() != 0 ) {
+ assert( n->is_Copy(), "" );
+ n->replace_by(regnd[nreg]);
+ j -= yank_if_dead(n,b,&value,&regnd);
+ }
+ } else {
+ // If the value occupies a register pair, record same info
+ // in both registers.
+ OptoReg::Name nreg_lo = OptoReg::add(nreg,-1);
+ if( RegMask::can_represent(nreg_lo) && // Either a spill slot, or
+ !lrgs(lidx).mask().Member(nreg_lo) ) { // Nearly always adjacent
+ // Sparc occasionally has non-adjacent pairs.
+ // Find the actual other value
+ RegMask tmp = lrgs(lidx).mask();
+ tmp.Remove(nreg);
+ nreg_lo = tmp.find_first_elem();
+ }
+ if( value[nreg] != val || value[nreg_lo] != val ) {
+ if (eliminate_copy_of_constant(n, b, value, regnd, nreg, nreg_lo)) {
+ n->replace_by(regnd[nreg]);
+ j -= yank_if_dead(n,b,&value,&regnd);
+ } else {
+ regnd.map(nreg , n );
+ regnd.map(nreg_lo, n );
+ value.map(nreg ,val);
+ value.map(nreg_lo,val);
+ }
+ } else if( !may_be_copy_of_callee(n) && regnd[nreg]->outcnt() != 0 ) {
+ assert( n->is_Copy(), "" );
+ n->replace_by(regnd[nreg]);
+ j -= yank_if_dead(n,b,&value,&regnd);
+ }
+ }
+
+ // Fat projections kill many registers
+ if( n_ideal_reg == MachProjNode::fat_proj ) {
+ RegMask rm = n->out_RegMask();
+ // wow, what an expensive iterator...
+ nreg = rm.find_first_elem();
+ while( OptoReg::is_valid(nreg)) {
+ rm.Remove(nreg);
+ value.map(nreg,n);
+ regnd.map(nreg,n);
+ nreg = rm.find_first_elem();
+ }
+ }
+
+ } // End of for all instructions in the block
+
+ } // End for all blocks
+}
diff --git a/src/share/vm/opto/reg_split.cpp b/src/share/vm/opto/reg_split.cpp
new file mode 100644
index 000000000..5101eb2e7
--- /dev/null
+++ b/src/share/vm/opto/reg_split.cpp
@@ -0,0 +1,1300 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_reg_split.cpp.incl"
+
+//------------------------------Split--------------------------------------
+// Walk the graph in RPO and for each lrg which spills, propogate reaching
+// definitions. During propogation, split the live range around regions of
+// High Register Pressure (HRP). If a Def is in a region of Low Register
+// Pressure (LRP), it will not get spilled until we encounter a region of
+// HRP between it and one of its uses. We will spill at the transition
+// point between LRP and HRP. Uses in the HRP region will use the spilled
+// Def. The first Use outside the HRP region will generate a SpillCopy to
+// hoist the live range back up into a register, and all subsequent uses
+// will use that new Def until another HRP region is encountered. Defs in
+// HRP regions will get trailing SpillCopies to push the LRG down into the
+// stack immediately.
+//
+// As a side effect, unlink from (hence make dead) coalesced copies.
+//
+
+static const char out_of_nodes[] = "out of nodes during split";
+
+//------------------------------get_spillcopy_wide-----------------------------
+// Get a SpillCopy node with wide-enough masks. Use the 'wide-mask', the
+// wide ideal-register spill-mask if possible. If the 'wide-mask' does
+// not cover the input (or output), use the input (or output) mask instead.
+Node *PhaseChaitin::get_spillcopy_wide( Node *def, Node *use, uint uidx ) {
+ // If ideal reg doesn't exist we've got a bad schedule happening
+ // that is forcing us to spill something that isn't spillable.
+ // Bail rather than abort
+ int ireg = def->ideal_reg();
+ if( ireg == 0 || ireg == Op_RegFlags ) {
+ C->record_method_not_compilable("attempted to spill a non-spillable item");
+ return NULL;
+ }
+ if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
+ return NULL;
+ }
+ const RegMask *i_mask = &def->out_RegMask();
+ const RegMask *w_mask = C->matcher()->idealreg2spillmask[ireg];
+ const RegMask *o_mask = use ? &use->in_RegMask(uidx) : w_mask;
+ const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
+ const RegMask *w_o_mask;
+
+ if( w_mask->overlap( *o_mask ) && // Overlap AND
+ ((ireg != Op_RegL && ireg != Op_RegD // Single use or aligned
+#ifdef _LP64
+ && ireg != Op_RegP
+#endif
+ ) || o_mask->is_aligned_Pairs()) ) {
+ // Don't come here for mis-aligned doubles
+ w_o_mask = w_mask;
+ } else { // wide ideal mask does not overlap with o_mask
+ // Mis-aligned doubles come here and XMM->FPR moves on x86.
+ w_o_mask = o_mask; // Must target desired registers
+ // Does the ideal-reg-mask overlap with o_mask? I.e., can I use
+ // a reg-reg move or do I need a trip across register classes
+ // (and thus through memory)?
+ if( !C->matcher()->idealreg2regmask[ireg]->overlap( *o_mask) && o_mask->is_UP() )
+ // Here we assume a trip through memory is required.
+ w_i_mask = &C->FIRST_STACK_mask();
+ }
+ return new (C) MachSpillCopyNode( def, *w_i_mask, *w_o_mask );
+}
+
+//------------------------------insert_proj------------------------------------
+// Insert the spill at chosen location. Skip over any interveneing Proj's or
+// Phis. Skip over a CatchNode and projs, inserting in the fall-through block
+// instead. Update high-pressure indices. Create a new live range.
+void PhaseChaitin::insert_proj( Block *b, uint i, Node *spill, uint maxlrg ) {
+ // Skip intervening ProjNodes. Do not insert between a ProjNode and
+ // its definer.
+ while( i < b->_nodes.size() &&
+ (b->_nodes[i]->is_Proj() ||
+ b->_nodes[i]->is_Phi() ) )
+ i++;
+
+ // Do not insert between a call and his Catch
+ if( b->_nodes[i]->is_Catch() ) {
+ // Put the instruction at the top of the fall-thru block.
+ // Find the fall-thru projection
+ while( 1 ) {
+ const CatchProjNode *cp = b->_nodes[++i]->as_CatchProj();
+ if( cp->_con == CatchProjNode::fall_through_index )
+ break;
+ }
+ int sidx = i - b->end_idx()-1;
+ b = b->_succs[sidx]; // Switch to successor block
+ i = 1; // Right at start of block
+ }
+
+ b->_nodes.insert(i,spill); // Insert node in block
+ _cfg._bbs.map(spill->_idx,b); // Update node->block mapping to reflect
+ // Adjust the point where we go hi-pressure
+ if( i <= b->_ihrp_index ) b->_ihrp_index++;
+ if( i <= b->_fhrp_index ) b->_fhrp_index++;
+
+ // Assign a new Live Range Number to the SpillCopy and grow
+ // the node->live range mapping.
+ new_lrg(spill,maxlrg);
+}
+
+//------------------------------split_DEF--------------------------------------
+// There are four catagories of Split; UP/DOWN x DEF/USE
+// Only three of these really occur as DOWN/USE will always color
+// Any Split with a DEF cannot CISC-Spill now. Thus we need
+// two helper routines, one for Split DEFS (insert after instruction),
+// one for Split USES (insert before instruction). DEF insertion
+// happens inside Split, where the Leaveblock array is updated.
+uint PhaseChaitin::split_DEF( Node *def, Block *b, int loc, uint maxlrg, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx ) {
+#ifdef ASSERT
+ // Increment the counter for this lrg
+ splits.at_put(slidx, splits.at(slidx)+1);
+#endif
+ // If we are spilling the memory op for an implicit null check, at the
+ // null check location (ie - null check is in HRP block) we need to do
+ // the null-check first, then spill-down in the following block.
+ // (The implicit_null_check function ensures the use is also dominated
+ // by the branch-not-taken block.)
+ Node *be = b->end();
+ if( be->is_MachNullCheck() && be->in(1) == def && def == b->_nodes[loc] ) {
+ // Spill goes in the branch-not-taken block
+ b = b->_succs[b->_nodes[b->end_idx()+1]->Opcode() == Op_IfTrue];
+ loc = 0; // Just past the Region
+ }
+ assert( loc >= 0, "must insert past block head" );
+
+ // Get a def-side SpillCopy
+ Node *spill = get_spillcopy_wide(def,NULL,0);
+ // Did we fail to split?, then bail
+ if (!spill) {
+ return 0;
+ }
+
+ // Insert the spill at chosen location
+ insert_proj( b, loc+1, spill, maxlrg++);
+
+ // Insert new node into Reaches array
+ Reachblock[slidx] = spill;
+ // Update debug list of reaching down definitions by adding this one
+ debug_defs[slidx] = spill;
+
+ // return updated count of live ranges
+ return maxlrg;
+}
+
+//------------------------------split_USE--------------------------------------
+// Splits at uses can involve redeffing the LRG, so no CISC Spilling there.
+// Debug uses want to know if def is already stack enabled.
+uint PhaseChaitin::split_USE( Node *def, Block *b, Node *use, uint useidx, uint maxlrg, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx ) {
+#ifdef ASSERT
+ // Increment the counter for this lrg
+ splits.at_put(slidx, splits.at(slidx)+1);
+#endif
+
+ // Some setup stuff for handling debug node uses
+ JVMState* jvms = use->jvms();
+ uint debug_start = jvms ? jvms->debug_start() : 999999;
+ uint debug_end = jvms ? jvms->debug_end() : 999999;
+
+ //-------------------------------------------
+ // Check for use of debug info
+ if (useidx >= debug_start && useidx < debug_end) {
+ // Actually it's perfectly legal for constant debug info to appear
+ // just unlikely. In this case the optimizer left a ConI of a 4
+ // as both inputs to a Phi with only a debug use. It's a single-def
+ // live range of a rematerializable value. The live range spills,
+ // rematerializes and now the ConI directly feeds into the debug info.
+ // assert(!def->is_Con(), "constant debug info already constructed directly");
+
+ // Special split handling for Debug Info
+ // If DEF is DOWN, just hook the edge and return
+ // If DEF is UP, Split it DOWN for this USE.
+ if( def->is_Mach() ) {
+ if( def_down ) {
+ // DEF is DOWN, so connect USE directly to the DEF
+ use->set_req(useidx, def);
+ } else {
+ // Block and index where the use occurs.
+ Block *b = _cfg._bbs[use->_idx];
+ // Put the clone just prior to use
+ int bindex = b->find_node(use);
+ // DEF is UP, so must copy it DOWN and hook in USE
+ // Insert SpillCopy before the USE, which uses DEF as its input,
+ // and defs a new live range, which is used by this node.
+ Node *spill = get_spillcopy_wide(def,use,useidx);
+ // did we fail to split?
+ if (!spill) {
+ // Bail
+ return 0;
+ }
+ // insert into basic block
+ insert_proj( b, bindex, spill, maxlrg++ );
+ // Use the new split
+ use->set_req(useidx,spill);
+ }
+ // No further split handling needed for this use
+ return maxlrg;
+ } // End special splitting for debug info live range
+ } // If debug info
+
+ // CISC-SPILLING
+ // Finally, check to see if USE is CISC-Spillable, and if so,
+ // gather_lrg_masks will add the flags bit to its mask, and
+ // no use side copy is needed. This frees up the live range
+ // register choices without causing copy coalescing, etc.
+ if( UseCISCSpill && cisc_sp ) {
+ int inp = use->cisc_operand();
+ if( inp != AdlcVMDeps::Not_cisc_spillable )
+ // Convert operand number to edge index number
+ inp = use->as_Mach()->operand_index(inp);
+ if( inp == (int)useidx ) {
+ use->set_req(useidx, def);
+#ifndef PRODUCT
+ if( TraceCISCSpill ) {
+ tty->print(" set_split: ");
+ use->dump();
+ }
+#endif
+ return maxlrg;
+ }
+ }
+
+ //-------------------------------------------
+ // Insert a Copy before the use
+
+ // Block and index where the use occurs.
+ int bindex;
+ // Phi input spill-copys belong at the end of the prior block
+ if( use->is_Phi() ) {
+ b = _cfg._bbs[b->pred(useidx)->_idx];
+ bindex = b->end_idx();
+ } else {
+ // Put the clone just prior to use
+ bindex = b->find_node(use);
+ }
+
+ Node *spill = get_spillcopy_wide( def, use, useidx );
+ if( !spill ) return 0; // Bailed out
+ // Insert SpillCopy before the USE, which uses the reaching DEF as
+ // its input, and defs a new live range, which is used by this node.
+ insert_proj( b, bindex, spill, maxlrg++ );
+ // Use the spill/clone
+ use->set_req(useidx,spill);
+
+ // return updated live range count
+ return maxlrg;
+}
+
+//------------------------------split_Rematerialize----------------------------
+// Clone a local copy of the def.
+Node *PhaseChaitin::split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru ) {
+ // The input live ranges will be stretched to the site of the new
+ // instruction. They might be stretched past a def and will thus
+ // have the old and new values of the same live range alive at the
+ // same time - a definite no-no. Split out private copies of
+ // the inputs.
+ if( def->req() > 1 ) {
+ for( uint i = 1; i < def->req(); i++ ) {
+ Node *in = def->in(i);
+ // Check for single-def (LRG cannot redefined)
+ uint lidx = n2lidx(in);
+ if( lidx >= _maxlrg ) continue; // Value is a recent spill-copy
+ if( lrgs(lidx)._def != NodeSentinel ) continue;
+
+ Block *b_def = _cfg._bbs[def->_idx];
+ int idx_def = b_def->find_node(def);
+ Node *in_spill = get_spillcopy_wide( in, def, i );
+ if( !in_spill ) return 0; // Bailed out
+ insert_proj(b_def,idx_def,in_spill,maxlrg++);
+ if( b_def == b )
+ insidx++;
+ def->set_req(i,in_spill);
+ }
+ }
+
+ Node *spill = def->clone();
+ if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
+ // Check when generating nodes
+ return 0;
+ }
+
+ // See if any inputs are currently being spilled, and take the
+ // latest copy of spilled inputs.
+ if( spill->req() > 1 ) {
+ for( uint i = 1; i < spill->req(); i++ ) {
+ Node *in = spill->in(i);
+ uint lidx = Find_id(in);
+
+ // Walk backwards thru spill copy node intermediates
+ if( walkThru )
+ while ( in->is_SpillCopy() && lidx >= _maxlrg ) {
+ in = in->in(1);
+ lidx = Find_id(in);
+ }
+
+ if( lidx < _maxlrg && lrgs(lidx).reg() >= LRG::SPILL_REG ) {
+ Node *rdef = Reachblock[lrg2reach[lidx]];
+ if( rdef ) spill->set_req(i,rdef);
+ }
+ }
+ }
+
+
+ assert( spill->out_RegMask().is_UP(), "rematerialize to a reg" );
+ // Rematerialized op is def->spilled+1
+ set_was_spilled(spill);
+ if( _spilled_once.test(def->_idx) )
+ set_was_spilled(spill);
+
+ insert_proj( b, insidx, spill, maxlrg++ );
+#ifdef ASSERT
+ // Increment the counter for this lrg
+ splits.at_put(slidx, splits.at(slidx)+1);
+#endif
+ // See if the cloned def kills any flags, and copy those kills as well
+ uint i = insidx+1;
+ if( clone_projs( b, i, def, spill, maxlrg ) ) {
+ // Adjust the point where we go hi-pressure
+ if( i <= b->_ihrp_index ) b->_ihrp_index++;
+ if( i <= b->_fhrp_index ) b->_fhrp_index++;
+ }
+
+ return spill;
+}
+
+//------------------------------is_high_pressure-------------------------------
+// Function to compute whether or not this live range is "high pressure"
+// in this block - whether it spills eagerly or not.
+bool PhaseChaitin::is_high_pressure( Block *b, LRG *lrg, uint insidx ) {
+ if( lrg->_was_spilled1 ) return true;
+ // Forced spilling due to conflict? Then split only at binding uses
+ // or defs, not for supposed capacity problems.
+ // CNC - Turned off 7/8/99, causes too much spilling
+ // if( lrg->_is_bound ) return false;
+
+ // Not yet reached the high-pressure cutoff point, so low pressure
+ uint hrp_idx = lrg->_is_float ? b->_fhrp_index : b->_ihrp_index;
+ if( insidx < hrp_idx ) return false;
+ // Register pressure for the block as a whole depends on reg class
+ int block_pres = lrg->_is_float ? b->_freg_pressure : b->_reg_pressure;
+ // Bound live ranges will split at the binding points first;
+ // Intermediate splits should assume the live range's register set
+ // got "freed up" and that num_regs will become INT_PRESSURE.
+ int bound_pres = lrg->_is_float ? FLOATPRESSURE : INTPRESSURE;
+ // Effective register pressure limit.
+ int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
+ ? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
+ // High pressure if block pressure requires more register freedom
+ // than live range has.
+ return block_pres >= lrg_pres;
+}
+
+
+//------------------------------prompt_use---------------------------------
+// True if lidx is used before any real register is def'd in the block
+bool PhaseChaitin::prompt_use( Block *b, uint lidx ) {
+ if( lrgs(lidx)._was_spilled2 ) return false;
+
+ // Scan block for 1st use.
+ for( uint i = 1; i <= b->end_idx(); i++ ) {
+ Node *n = b->_nodes[i];
+ // Ignore PHI use, these can be up or down
+ if( n->is_Phi() ) continue;
+ for( uint j = 1; j < n->req(); j++ )
+ if( Find_id(n->in(j)) == lidx )
+ return true; // Found 1st use!
+ if( n->out_RegMask().is_NotEmpty() ) return false;
+ }
+ return false;
+}
+
+//------------------------------Split--------------------------------------
+//----------Split Routine----------
+// ***** NEW SPLITTING HEURISTIC *****
+// DEFS: If the DEF is in a High Register Pressure(HRP) Block, split there.
+// Else, no split unless there is a HRP block between a DEF and
+// one of its uses, and then split at the HRP block.
+//
+// USES: If USE is in HRP, split at use to leave main LRG on stack.
+// Else, hoist LRG back up to register only (ie - split is also DEF)
+// We will compute a new maxlrg as we go
+uint PhaseChaitin::Split( uint maxlrg ) {
+ NOT_PRODUCT( Compile::TracePhase t3("regAllocSplit", &_t_regAllocSplit, TimeCompiler); )
+
+ uint bidx, pidx, slidx, insidx, inpidx, twoidx;
+ uint non_phi = 1, spill_cnt = 0;
+ Node **Reachblock;
+ Node *n1, *n2, *n3;
+ Node_List *defs,*phis;
+ bool *UPblock;
+ bool u1, u2, u3;
+ Block *b, *pred;
+ PhiNode *phi;
+ GrowableArray<uint> lidxs;
+
+ // Array of counters to count splits per live range
+ GrowableArray<uint> splits;
+
+ //----------Setup Code----------
+ // Create a convenient mapping from lrg numbers to reaches/leaves indices
+ uint *lrg2reach = NEW_RESOURCE_ARRAY( uint, _maxlrg );
+ // Keep track of DEFS & Phis for later passes
+ defs = new Node_List();
+ phis = new Node_List();
+ // Gather info on which LRG's are spilling, and build maps
+ for( bidx = 1; bidx < _maxlrg; bidx++ ) {
+ if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
+ assert(!lrgs(bidx).mask().is_AllStack(),"AllStack should color");
+ lrg2reach[bidx] = spill_cnt;
+ spill_cnt++;
+ lidxs.append(bidx);
+#ifdef ASSERT
+ // Initialize the split counts to zero
+ splits.append(0);
+#endif
+#ifndef PRODUCT
+ if( PrintOpto && WizardMode && lrgs(bidx)._was_spilled1 )
+ tty->print_cr("Warning, 2nd spill of L%d",bidx);
+#endif
+ }
+ }
+
+ // Create side arrays for propagating reaching defs info.
+ // Each block needs a node pointer for each spilling live range for the
+ // Def which is live into the block. Phi nodes handle multiple input
+ // Defs by querying the output of their predecessor blocks and resolving
+ // them to a single Def at the phi. The pointer is updated for each
+ // Def in the block, and then becomes the output for the block when
+ // processing of the block is complete. We also need to track whether
+ // a Def is UP or DOWN. UP means that it should get a register (ie -
+ // it is always in LRP regions), and DOWN means that it is probably
+ // on the stack (ie - it crosses HRP regions).
+ Node ***Reaches = NEW_RESOURCE_ARRAY( Node**, _cfg._num_blocks+1 );
+ bool **UP = NEW_RESOURCE_ARRAY( bool*, _cfg._num_blocks+1 );
+ Node **debug_defs = NEW_RESOURCE_ARRAY( Node*, spill_cnt );
+ VectorSet **UP_entry= NEW_RESOURCE_ARRAY( VectorSet*, spill_cnt );
+
+ // Initialize Reaches & UP
+ for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) {
+ Reaches[bidx] = NEW_RESOURCE_ARRAY( Node*, spill_cnt );
+ UP[bidx] = NEW_RESOURCE_ARRAY( bool, spill_cnt );
+ Node **Reachblock = Reaches[bidx];
+ bool *UPblock = UP[bidx];
+ for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+ UPblock[slidx] = true; // Assume they start in registers
+ Reachblock[slidx] = NULL; // Assume that no def is present
+ }
+ }
+
+ // Initialize to array of empty vectorsets
+ for( slidx = 0; slidx < spill_cnt; slidx++ )
+ UP_entry[slidx] = new VectorSet(Thread::current()->resource_area());
+
+ //----------PASS 1----------
+ //----------Propagation & Node Insertion Code----------
+ // Walk the Blocks in RPO for DEF & USE info
+ for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
+
+ if (C->check_node_count(spill_cnt, out_of_nodes)) {
+ return 0;
+ }
+
+ b = _cfg._blocks[bidx];
+ // Reaches & UP arrays for this block
+ Reachblock = Reaches[b->_pre_order];
+ UPblock = UP[b->_pre_order];
+ // Reset counter of start of non-Phi nodes in block
+ non_phi = 1;
+ //----------Block Entry Handling----------
+ // Check for need to insert a new phi
+ // Cycle through this block's predecessors, collecting Reaches
+ // info for each spilled LRG. If they are identical, no phi is
+ // needed. If they differ, check for a phi, and insert if missing,
+ // or update edges if present. Set current block's Reaches set to
+ // be either the phi's or the reaching def, as appropriate.
+ // If no Phi is needed, check if the LRG needs to spill on entry
+ // to the block due to HRP.
+ for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+ // Grab the live range number
+ uint lidx = lidxs.at(slidx);
+ // Do not bother splitting or putting in Phis for single-def
+ // rematerialized live ranges. This happens alot to constants
+ // with long live ranges.
+ if( lrgs(lidx)._def != NodeSentinel &&
+ lrgs(lidx)._def->rematerialize() ) {
+ // reset the Reaches & UP entries
+ Reachblock[slidx] = lrgs(lidx)._def;
+ UPblock[slidx] = true;
+ // Record following instruction in case 'n' rematerializes and
+ // kills flags
+ Block *pred1 = _cfg._bbs[b->pred(1)->_idx];
+ continue;
+ }
+
+ // Initialize needs_phi and needs_split
+ bool needs_phi = false;
+ bool needs_split = false;
+ // Walk the predecessor blocks to check inputs for that live range
+ // Grab predecessor block header
+ n1 = b->pred(1);
+ // Grab the appropriate reaching def info for inpidx
+ pred = _cfg._bbs[n1->_idx];
+ pidx = pred->_pre_order;
+ Node **Ltmp = Reaches[pidx];
+ bool *Utmp = UP[pidx];
+ n1 = Ltmp[slidx];
+ u1 = Utmp[slidx];
+ // Initialize node for saving type info
+ n3 = n1;
+ u3 = u1;
+
+ // Compare inputs to see if a Phi is needed
+ for( inpidx = 2; inpidx < b->num_preds(); inpidx++ ) {
+ // Grab predecessor block headers
+ n2 = b->pred(inpidx);
+ // Grab the appropriate reaching def info for inpidx
+ pred = _cfg._bbs[n2->_idx];
+ pidx = pred->_pre_order;
+ Ltmp = Reaches[pidx];
+ Utmp = UP[pidx];
+ n2 = Ltmp[slidx];
+ u2 = Utmp[slidx];
+ // For each LRG, decide if a phi is necessary
+ if( n1 != n2 ) {
+ needs_phi = true;
+ }
+ // See if the phi has mismatched inputs, UP vs. DOWN
+ if( n1 && n2 && (u1 != u2) ) {
+ needs_split = true;
+ }
+ // Move n2/u2 to n1/u1 for next iteration
+ n1 = n2;
+ u1 = u2;
+ // Preserve a non-NULL predecessor for later type referencing
+ if( (n3 == NULL) && (n2 != NULL) ){
+ n3 = n2;
+ u3 = u2;
+ }
+ } // End for all potential Phi inputs
+
+ // If a phi is needed, check for it
+ if( needs_phi ) {
+ // check block for appropriate phinode & update edges
+ for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
+ n1 = b->_nodes[insidx];
+ // bail if this is not a phi
+ phi = n1->is_Phi() ? n1->as_Phi() : NULL;
+ if( phi == NULL ) {
+ // Keep track of index of first non-PhiNode instruction in block
+ non_phi = insidx;
+ // break out of the for loop as we have handled all phi nodes
+ break;
+ }
+ // must be looking at a phi
+ if( Find_id(n1) == lidxs.at(slidx) ) {
+ // found the necessary phi
+ needs_phi = false;
+ // initialize the Reaches entry for this LRG
+ Reachblock[slidx] = phi;
+ break;
+ } // end if found correct phi
+ } // end for all phi's
+ // add new phinode if one not already found
+ if( needs_phi ) {
+ // create a new phi node and insert it into the block
+ // type is taken from left over pointer to a predecessor
+ assert(n3,"No non-NULL reaching DEF for a Phi");
+ phi = new (C, b->num_preds()) PhiNode(b->head(), n3->bottom_type());
+ // initialize the Reaches entry for this LRG
+ Reachblock[slidx] = phi;
+
+ // add node to block & node_to_block mapping
+ insert_proj( b, insidx++, phi, maxlrg++ );
+ non_phi++;
+ // Reset new phi's mapping to be the spilling live range
+ _names.map(phi->_idx, lidx);
+ assert(Find_id(phi) == lidx,"Bad update on Union-Find mapping");
+ } // end if not found correct phi
+ // Here you have either found or created the Phi, so record it
+ assert(phi != NULL,"Must have a Phi Node here");
+ phis->push(phi);
+ // PhiNodes should either force the LRG UP or DOWN depending
+ // on its inputs and the register pressure in the Phi's block.
+ UPblock[slidx] = true; // Assume new DEF is UP
+ // If entering a high-pressure area with no immediate use,
+ // assume Phi is DOWN
+ if( is_high_pressure( b, &lrgs(lidx), b->end_idx()) && !prompt_use(b,lidx) )
+ UPblock[slidx] = false;
+ // If we are not split up/down and all inputs are down, then we
+ // are down
+ if( !needs_split && !u3 )
+ UPblock[slidx] = false;
+ } // end if phi is needed
+
+ // Do not need a phi, so grab the reaching DEF
+ else {
+ // Grab predecessor block header
+ n1 = b->pred(1);
+ // Grab the appropriate reaching def info for k
+ pred = _cfg._bbs[n1->_idx];
+ pidx = pred->_pre_order;
+ Node **Ltmp = Reaches[pidx];
+ bool *Utmp = UP[pidx];
+ // reset the Reaches & UP entries
+ Reachblock[slidx] = Ltmp[slidx];
+ UPblock[slidx] = Utmp[slidx];
+ } // end else no Phi is needed
+ } // end for all spilling live ranges
+ // DEBUG
+#ifndef PRODUCT
+ if(trace_spilling()) {
+ tty->print("/`\nBlock %d: ", b->_pre_order);
+ tty->print("Reaching Definitions after Phi handling\n");
+ for( uint x = 0; x < spill_cnt; x++ ) {
+ tty->print("Spill Idx %d: UP %d: Node\n",x,UPblock[x]);
+ if( Reachblock[x] )
+ Reachblock[x]->dump();
+ else
+ tty->print("Undefined\n");
+ }
+ }
+#endif
+
+ //----------Non-Phi Node Splitting----------
+ // Since phi-nodes have now been handled, the Reachblock array for this
+ // block is initialized with the correct starting value for the defs which
+ // reach non-phi instructions in this block. Thus, process non-phi
+ // instructions normally, inserting SpillCopy nodes for all spill
+ // locations.
+
+ // Memoize any DOWN reaching definitions for use as DEBUG info
+ for( insidx = 0; insidx < spill_cnt; insidx++ ) {
+ debug_defs[insidx] = (UPblock[insidx]) ? NULL : Reachblock[insidx];
+ if( UPblock[insidx] ) // Memoize UP decision at block start
+ UP_entry[insidx]->set( b->_pre_order );
+ }
+
+ //----------Walk Instructions in the Block and Split----------
+ // For all non-phi instructions in the block
+ for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
+ Node *n = b->_nodes[insidx];
+ // Find the defining Node's live range index
+ uint defidx = Find_id(n);
+ uint cnt = n->req();
+
+ if( n->is_Phi() ) {
+ // Skip phi nodes after removing dead copies.
+ if( defidx < _maxlrg ) {
+ // Check for useless Phis. These appear if we spill, then
+ // coalesce away copies. Dont touch Phis in spilling live
+ // ranges; they are busy getting modifed in this pass.
+ if( lrgs(defidx).reg() < LRG::SPILL_REG ) {
+ uint i;
+ Node *u = NULL;
+ // Look for the Phi merging 2 unique inputs
+ for( i = 1; i < cnt; i++ ) {
+ // Ignore repeats and self
+ if( n->in(i) != u && n->in(i) != n ) {
+ // Found a unique input
+ if( u != NULL ) // If it's the 2nd, bail out
+ break;
+ u = n->in(i); // Else record it
+ }
+ }
+ assert( u, "at least 1 valid input expected" );
+ if( i >= cnt ) { // Didn't find 2+ unique inputs?
+ n->replace_by(u); // Then replace with unique input
+ n->disconnect_inputs(NULL);
+ b->_nodes.remove(insidx);
+ insidx--;
+ b->_ihrp_index--;
+ b->_fhrp_index--;
+ }
+ }
+ }
+ continue;
+ }
+ assert( insidx > b->_ihrp_index ||
+ (b->_reg_pressure < (uint)INTPRESSURE) ||
+ b->_ihrp_index > 4000000 ||
+ b->_ihrp_index >= b->end_idx() ||
+ !b->_nodes[b->_ihrp_index]->is_Proj(), "" );
+ assert( insidx > b->_fhrp_index ||
+ (b->_freg_pressure < (uint)FLOATPRESSURE) ||
+ b->_fhrp_index > 4000000 ||
+ b->_fhrp_index >= b->end_idx() ||
+ !b->_nodes[b->_fhrp_index]->is_Proj(), "" );
+
+ // ********** Handle Crossing HRP Boundry **********
+ if( (insidx == b->_ihrp_index) || (insidx == b->_fhrp_index) ) {
+ for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+ // Check for need to split at HRP boundry - split if UP
+ n1 = Reachblock[slidx];
+ // bail out if no reaching DEF
+ if( n1 == NULL ) continue;
+ // bail out if live range is 'isolated' around inner loop
+ uint lidx = lidxs.at(slidx);
+ // If live range is currently UP
+ if( UPblock[slidx] ) {
+ // set location to insert spills at
+ // SPLIT DOWN HERE - NO CISC SPILL
+ if( is_high_pressure( b, &lrgs(lidx), insidx ) &&
+ !n1->rematerialize() ) {
+ // If there is already a valid stack definition available, use it
+ if( debug_defs[slidx] != NULL ) {
+ Reachblock[slidx] = debug_defs[slidx];
+ }
+ else {
+ // Insert point is just past last use or def in the block
+ int insert_point = insidx-1;
+ while( insert_point > 0 ) {
+ Node *n = b->_nodes[insert_point];
+ // Hit top of block? Quit going backwards
+ if( n->is_Phi() ) break;
+ // Found a def? Better split after it.
+ if( n2lidx(n) == lidx ) break;
+ // Look for a use
+ uint i;
+ for( i = 1; i < n->req(); i++ )
+ if( n2lidx(n->in(i)) == lidx )
+ break;
+ // Found a use? Better split after it.
+ if( i < n->req() ) break;
+ insert_point--;
+ }
+ maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx++;
+ }
+ // This is a new DEF, so update UP
+ UPblock[slidx] = false;
+#ifndef PRODUCT
+ // DEBUG
+ if( trace_spilling() ) {
+ tty->print("\nNew Split DOWN DEF of Spill Idx ");
+ tty->print("%d, UP %d:\n",slidx,false);
+ n1->dump();
+ }
+#endif
+ }
+ } // end if LRG is UP
+ } // end for all spilling live ranges
+ assert( b->_nodes[insidx] == n, "got insidx set incorrectly" );
+ } // end if crossing HRP Boundry
+
+ // If the LRG index is oob, then this is a new spillcopy, skip it.
+ if( defidx >= _maxlrg ) {
+ continue;
+ }
+ LRG &deflrg = lrgs(defidx);
+ uint copyidx = n->is_Copy();
+ // Remove coalesced copy from CFG
+ if( copyidx && defidx == n2lidx(n->in(copyidx)) ) {
+ n->replace_by( n->in(copyidx) );
+ n->set_req( copyidx, NULL );
+ b->_nodes.remove(insidx--);
+ b->_ihrp_index--; // Adjust the point where we go hi-pressure
+ b->_fhrp_index--;
+ continue;
+ }
+
+#define DERIVED 0
+
+ // ********** Handle USES **********
+ bool nullcheck = false;
+ // Implicit null checks never use the spilled value
+ if( n->is_MachNullCheck() )
+ nullcheck = true;
+ if( !nullcheck ) {
+ // Search all inputs for a Spill-USE
+ JVMState* jvms = n->jvms();
+ uint oopoff = jvms ? jvms->oopoff() : cnt;
+ uint old_last = cnt - 1;
+ for( inpidx = 1; inpidx < cnt; inpidx++ ) {
+ // Derived/base pairs may be added to our inputs during this loop.
+ // If inpidx > old_last, then one of these new inputs is being
+ // handled. Skip the derived part of the pair, but process
+ // the base like any other input.
+ if( inpidx > old_last && ((inpidx - oopoff) & 1) == DERIVED ) {
+ continue; // skip derived_debug added below
+ }
+ // Get lidx of input
+ uint useidx = Find_id(n->in(inpidx));
+ // Not a brand-new split, and it is a spill use
+ if( useidx < _maxlrg && lrgs(useidx).reg() >= LRG::SPILL_REG ) {
+ // Check for valid reaching DEF
+ slidx = lrg2reach[useidx];
+ Node *def = Reachblock[slidx];
+ assert( def != NULL, "Using Undefined Value in Split()\n");
+
+ // (+++) %%%% remove this in favor of pre-pass in matcher.cpp
+ // monitor references do not care where they live, so just hook
+ if ( jvms && jvms->is_monitor_use(inpidx) ) {
+ // The effect of this clone is to drop the node out of the block,
+ // so that the allocator does not see it anymore, and therefore
+ // does not attempt to assign it a register.
+ def = def->clone();
+ _names.extend(def->_idx,0);
+ _cfg._bbs.map(def->_idx,b);
+ n->set_req(inpidx, def);
+ if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
+ return 0;
+ }
+ continue;
+ }
+
+ // Rematerializable? Then clone def at use site instead
+ // of store/load
+ if( def->rematerialize() ) {
+ int old_size = b->_nodes.size();
+ def = split_Rematerialize( def, b, insidx, maxlrg, splits, slidx, lrg2reach, Reachblock, true );
+ if( !def ) return 0; // Bail out
+ insidx += b->_nodes.size()-old_size;
+ }
+
+ MachNode *mach = n->is_Mach() ? n->as_Mach() : NULL;
+ // Base pointers and oopmap references do not care where they live.
+ if ((inpidx >= oopoff) ||
+ (mach && mach->ideal_Opcode() == Op_AddP && inpidx == AddPNode::Base)) {
+ if (def->rematerialize() && lrgs(useidx)._was_spilled2) {
+ // This def has been rematerialized a couple of times without
+ // progress. It doesn't care if it lives UP or DOWN, so
+ // spill it down now.
+ maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false,splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx++; // Reset iterator to skip USE side split
+ } else {
+ // Just hook the def edge
+ n->set_req(inpidx, def);
+ }
+
+ if (inpidx >= oopoff) {
+ // After oopoff, we have derived/base pairs. We must mention all
+ // derived pointers here as derived/base pairs for GC. If the
+ // derived value is spilling and we have a copy both in Reachblock
+ // (called here 'def') and debug_defs[slidx] we need to mention
+ // both in derived/base pairs or kill one.
+ Node *derived_debug = debug_defs[slidx];
+ if( ((inpidx - oopoff) & 1) == DERIVED && // derived vs base?
+ mach && mach->ideal_Opcode() != Op_Halt &&
+ derived_debug != NULL &&
+ derived_debug != def ) { // Actual 2nd value appears
+ // We have already set 'def' as a derived value.
+ // Also set debug_defs[slidx] as a derived value.
+ uint k;
+ for( k = oopoff; k < cnt; k += 2 )
+ if( n->in(k) == derived_debug )
+ break; // Found an instance of debug derived
+ if( k == cnt ) {// No instance of debug_defs[slidx]
+ // Add a derived/base pair to cover the debug info.
+ // We have to process the added base later since it is not
+ // handled yet at this point but skip derived part.
+ assert(((n->req() - oopoff) & 1) == DERIVED,
+ "must match skip condition above");
+ n->add_req( derived_debug ); // this will be skipped above
+ n->add_req( n->in(inpidx+1) ); // this will be processed
+ // Increment cnt to handle added input edges on
+ // subsequent iterations.
+ cnt += 2;
+ }
+ }
+ }
+ continue;
+ }
+ // Special logic for DEBUG info
+ if( jvms && b->_freq > BLOCK_FREQUENCY(0.5) ) {
+ uint debug_start = jvms->debug_start();
+ // If this is debug info use & there is a reaching DOWN def
+ if ((debug_start <= inpidx) && (debug_defs[slidx] != NULL)) {
+ assert(inpidx < oopoff, "handle only debug info here");
+ // Just hook it in & move on
+ n->set_req(inpidx, debug_defs[slidx]);
+ // (Note that this can make two sides of a split live at the
+ // same time: The debug def on stack, and another def in a
+ // register. The GC needs to know about both of them, but any
+ // derived pointers after oopoff will refer to only one of the
+ // two defs and the GC would therefore miss the other. Thus
+ // this hack is only allowed for debug info which is Java state
+ // and therefore never a derived pointer.)
+ continue;
+ }
+ }
+ // Grab register mask info
+ const RegMask &dmask = def->out_RegMask();
+ const RegMask &umask = n->in_RegMask(inpidx);
+
+ assert(inpidx < oopoff, "cannot use-split oop map info");
+
+ bool dup = UPblock[slidx];
+ bool uup = umask.is_UP();
+
+ // Need special logic to handle bound USES. Insert a split at this
+ // bound use if we can't rematerialize the def, or if we need the
+ // split to form a misaligned pair.
+ if( !umask.is_AllStack() &&
+ (int)umask.Size() <= lrgs(useidx).num_regs() &&
+ (!def->rematerialize() ||
+ umask.is_misaligned_Pair())) {
+ // These need a Split regardless of overlap or pressure
+ // SPLIT - NO DEF - NO CISC SPILL
+ maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx++; // Reset iterator to skip USE side split
+ continue;
+ }
+ // Here is the logic chart which describes USE Splitting:
+ // 0 = false or DOWN, 1 = true or UP
+ //
+ // Overlap | DEF | USE | Action
+ //-------------------------------------------------------
+ // 0 | 0 | 0 | Copy - mem -> mem
+ // 0 | 0 | 1 | Split-UP - Check HRP
+ // 0 | 1 | 0 | Split-DOWN - Debug Info?
+ // 0 | 1 | 1 | Copy - reg -> reg
+ // 1 | 0 | 0 | Reset Input Edge (no Split)
+ // 1 | 0 | 1 | Split-UP - Check HRP
+ // 1 | 1 | 0 | Split-DOWN - Debug Info?
+ // 1 | 1 | 1 | Reset Input Edge (no Split)
+ //
+ // So, if (dup == uup), then overlap test determines action,
+ // with true being no split, and false being copy. Else,
+ // if DEF is DOWN, Split-UP, and check HRP to decide on
+ // resetting DEF. Finally if DEF is UP, Split-DOWN, with
+ // special handling for Debug Info.
+ if( dup == uup ) {
+ if( dmask.overlap(umask) ) {
+ // Both are either up or down, and there is overlap, No Split
+ n->set_req(inpidx, def);
+ }
+ else { // Both are either up or down, and there is no overlap
+ if( dup ) { // If UP, reg->reg copy
+ // COPY ACROSS HERE - NO DEF - NO CISC SPILL
+ maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx++; // Reset iterator to skip USE side split
+ }
+ else { // DOWN, mem->mem copy
+ // COPY UP & DOWN HERE - NO DEF - NO CISC SPILL
+ // First Split-UP to move value into Register
+ uint def_ideal = def->ideal_reg();
+ const RegMask* tmp_rm = Matcher::idealreg2regmask[def_ideal];
+ Node *spill = new (C) MachSpillCopyNode(def, dmask, *tmp_rm);
+ insert_proj( b, insidx, spill, maxlrg );
+ // Then Split-DOWN as if previous Split was DEF
+ maxlrg = split_USE(spill,b,n,inpidx,maxlrg,false,false, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx += 2; // Reset iterator to skip USE side splits
+ }
+ } // End else no overlap
+ } // End if dup == uup
+ // dup != uup, so check dup for direction of Split
+ else {
+ if( dup ) { // If UP, Split-DOWN and check Debug Info
+ // If this node is already a SpillCopy, just patch the edge
+ // except the case of spilling to stack.
+ if( n->is_SpillCopy() ) {
+ RegMask tmp_rm(umask);
+ tmp_rm.SUBTRACT(Matcher::STACK_ONLY_mask);
+ if( dmask.overlap(tmp_rm) ) {
+ if( def != n->in(inpidx) ) {
+ n->set_req(inpidx, def);
+ }
+ continue;
+ }
+ }
+ // COPY DOWN HERE - NO DEF - NO CISC SPILL
+ maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx++; // Reset iterator to skip USE side split
+ // Check for debug-info split. Capture it for later
+ // debug splits of the same value
+ if (jvms && jvms->debug_start() <= inpidx && inpidx < oopoff)
+ debug_defs[slidx] = n->in(inpidx);
+
+ }
+ else { // DOWN, Split-UP and check register pressure
+ if( is_high_pressure( b, &lrgs(useidx), insidx ) ) {
+ // COPY UP HERE - NO DEF - CISC SPILL
+ maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,true, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ insidx++; // Reset iterator to skip USE side split
+ } else { // LRP
+ // COPY UP HERE - WITH DEF - NO CISC SPILL
+ maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,false, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ // Flag this lift-up in a low-pressure block as
+ // already-spilled, so if it spills again it will
+ // spill hard (instead of not spilling hard and
+ // coalescing away).
+ set_was_spilled(n->in(inpidx));
+ // Since this is a new DEF, update Reachblock & UP
+ Reachblock[slidx] = n->in(inpidx);
+ UPblock[slidx] = true;
+ insidx++; // Reset iterator to skip USE side split
+ }
+ } // End else DOWN
+ } // End dup != uup
+ } // End if Spill USE
+ } // End For All Inputs
+ } // End If not nullcheck
+
+ // ********** Handle DEFS **********
+ // DEFS either Split DOWN in HRP regions or when the LRG is bound, or
+ // just reset the Reaches info in LRP regions. DEFS must always update
+ // UP info.
+ if( deflrg.reg() >= LRG::SPILL_REG ) { // Spilled?
+ uint slidx = lrg2reach[defidx];
+ // Add to defs list for later assignment of new live range number
+ defs->push(n);
+ // Set a flag on the Node indicating it has already spilled.
+ // Only do it for capacity spills not conflict spills.
+ if( !deflrg._direct_conflict )
+ set_was_spilled(n);
+ assert(!n->is_Phi(),"Cannot insert Phi into DEFS list");
+ // Grab UP info for DEF
+ const RegMask &dmask = n->out_RegMask();
+ bool defup = dmask.is_UP();
+ // Only split at Def if this is a HRP block or bound (and spilled once)
+ if( !n->rematerialize() &&
+ (((dmask.is_bound1() || dmask.is_bound2() || dmask.is_misaligned_Pair()) &&
+ (deflrg._direct_conflict || deflrg._must_spill)) ||
+ // Check for LRG being up in a register and we are inside a high
+ // pressure area. Spill it down immediately.
+ (defup && is_high_pressure(b,&deflrg,insidx))) ) {
+ assert( !n->rematerialize(), "" );
+ assert( !n->is_SpillCopy(), "" );
+ // Do a split at the def site.
+ maxlrg = split_DEF( n, b, insidx, maxlrg, Reachblock, debug_defs, splits, slidx );
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ // Split DEF's Down
+ UPblock[slidx] = 0;
+#ifndef PRODUCT
+ // DEBUG
+ if( trace_spilling() ) {
+ tty->print("\nNew Split DOWN DEF of Spill Idx ");
+ tty->print("%d, UP %d:\n",slidx,false);
+ n->dump();
+ }
+#endif
+ }
+ else { // Neither bound nor HRP, must be LRP
+ // otherwise, just record the def
+ Reachblock[slidx] = n;
+ // UP should come from the outRegmask() of the DEF
+ UPblock[slidx] = defup;
+ // Update debug list of reaching down definitions, kill if DEF is UP
+ debug_defs[slidx] = defup ? NULL : n;
+#ifndef PRODUCT
+ // DEBUG
+ if( trace_spilling() ) {
+ tty->print("\nNew DEF of Spill Idx ");
+ tty->print("%d, UP %d:\n",slidx,defup);
+ n->dump();
+ }
+#endif
+ } // End else LRP
+ } // End if spill def
+
+ // ********** Split Left Over Mem-Mem Moves **********
+ // Check for mem-mem copies and split them now. Do not do this
+ // to copies about to be spilled; they will be Split shortly.
+ if( copyidx ) {
+ Node *use = n->in(copyidx);
+ uint useidx = Find_id(use);
+ if( useidx < _maxlrg && // This is not a new split
+ OptoReg::is_stack(deflrg.reg()) &&
+ deflrg.reg() < LRG::SPILL_REG ) { // And DEF is from stack
+ LRG &uselrg = lrgs(useidx);
+ if( OptoReg::is_stack(uselrg.reg()) &&
+ uselrg.reg() < LRG::SPILL_REG && // USE is from stack
+ deflrg.reg() != uselrg.reg() ) { // Not trivially removed
+ uint def_ideal_reg = Matcher::base2reg[n->bottom_type()->base()];
+ const RegMask &def_rm = *Matcher::idealreg2regmask[def_ideal_reg];
+ const RegMask &use_rm = n->in_RegMask(copyidx);
+ if( def_rm.overlap(use_rm) && n->is_SpillCopy() ) { // Bug 4707800, 'n' may be a storeSSL
+ if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) { // Check when generating nodes
+ return 0;
+ }
+ Node *spill = new (C) MachSpillCopyNode(use,use_rm,def_rm);
+ n->set_req(copyidx,spill);
+ n->as_MachSpillCopy()->set_in_RegMask(def_rm);
+ // Put the spill just before the copy
+ insert_proj( b, insidx++, spill, maxlrg++ );
+ }
+ }
+ }
+ }
+ } // End For All Instructions in Block - Non-PHI Pass
+
+ // Check if each LRG is live out of this block so as not to propagate
+ // beyond the last use of a LRG.
+ for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+ uint defidx = lidxs.at(slidx);
+ IndexSet *liveout = _live->live(b);
+ if( !liveout->member(defidx) ) {
+#ifdef ASSERT
+ // The index defidx is not live. Check the liveout array to ensure that
+ // it contains no members which compress to defidx. Finding such an
+ // instance may be a case to add liveout adjustment in compress_uf_map().
+ // See 5063219.
+ uint member;
+ IndexSetIterator isi(liveout);
+ while ((member = isi.next()) != 0) {
+ assert(defidx != Find_const(member), "Live out member has not been compressed");
+ }
+#endif
+ Reachblock[slidx] = NULL;
+ } else {
+ assert(Reachblock[slidx] != NULL,"No reaching definition for liveout value");
+ }
+ }
+#ifndef PRODUCT
+ if( trace_spilling() )
+ b->dump();
+#endif
+ } // End For All Blocks
+
+ //----------PASS 2----------
+ // Reset all DEF live range numbers here
+ for( insidx = 0; insidx < defs->size(); insidx++ ) {
+ // Grab the def
+ n1 = defs->at(insidx);
+ // Set new lidx for DEF
+ new_lrg(n1, maxlrg++);
+ }
+ //----------Phi Node Splitting----------
+ // Clean up a phi here, and assign a new live range number
+ // Cycle through this block's predecessors, collecting Reaches
+ // info for each spilled LRG and update edges.
+ // Walk the phis list to patch inputs, split phis, and name phis
+ for( insidx = 0; insidx < phis->size(); insidx++ ) {
+ Node *phi = phis->at(insidx);
+ assert(phi->is_Phi(),"This list must only contain Phi Nodes");
+ Block *b = _cfg._bbs[phi->_idx];
+ // Grab the live range number
+ uint lidx = Find_id(phi);
+ uint slidx = lrg2reach[lidx];
+ // Update node to lidx map
+ new_lrg(phi, maxlrg++);
+ // Get PASS1's up/down decision for the block.
+ int phi_up = !!UP_entry[slidx]->test(b->_pre_order);
+
+ // Force down if double-spilling live range
+ if( lrgs(lidx)._was_spilled1 )
+ phi_up = false;
+
+ // When splitting a Phi we an split it normal or "inverted".
+ // An inverted split makes the splits target the Phi's UP/DOWN
+ // sense inverted; then the Phi is followed by a final def-side
+ // split to invert back. It changes which blocks the spill code
+ // goes in.
+
+ // Walk the predecessor blocks and assign the reaching def to the Phi.
+ // Split Phi nodes by placing USE side splits wherever the reaching
+ // DEF has the wrong UP/DOWN value.
+ for( uint i = 1; i < b->num_preds(); i++ ) {
+ // Get predecessor block pre-order number
+ Block *pred = _cfg._bbs[b->pred(i)->_idx];
+ pidx = pred->_pre_order;
+ // Grab reaching def
+ Node *def = Reaches[pidx][slidx];
+ assert( def, "must have reaching def" );
+ // If input up/down sense and reg-pressure DISagree
+ if( def->rematerialize() ) {
+ def = split_Rematerialize( def, pred, pred->end_idx(), maxlrg, splits, slidx, lrg2reach, Reachblock, false );
+ if( !def ) return 0; // Bail out
+ }
+ // Update the Phi's input edge array
+ phi->set_req(i,def);
+ // Grab the UP/DOWN sense for the input
+ u1 = UP[pidx][slidx];
+ if( u1 != (phi_up != 0)) {
+ maxlrg = split_USE(def, b, phi, i, maxlrg, !u1, false, splits,slidx);
+ // If it wasn't split bail
+ if (!maxlrg) {
+ return 0;
+ }
+ }
+ } // End for all inputs to the Phi
+ } // End for all Phi Nodes
+ // Update _maxlrg to save Union asserts
+ _maxlrg = maxlrg;
+
+
+ //----------PASS 3----------
+ // Pass over all Phi's to union the live ranges
+ for( insidx = 0; insidx < phis->size(); insidx++ ) {
+ Node *phi = phis->at(insidx);
+ assert(phi->is_Phi(),"This list must only contain Phi Nodes");
+ // Walk all inputs to Phi and Union input live range with Phi live range
+ for( uint i = 1; i < phi->req(); i++ ) {
+ // Grab the input node
+ Node *n = phi->in(i);
+ assert( n, "" );
+ uint lidx = Find(n);
+ uint pidx = Find(phi);
+ if( lidx < pidx )
+ Union(n, phi);
+ else if( lidx > pidx )
+ Union(phi, n);
+ } // End for all inputs to the Phi Node
+ } // End for all Phi Nodes
+ // Now union all two address instructions
+ for( insidx = 0; insidx < defs->size(); insidx++ ) {
+ // Grab the def
+ n1 = defs->at(insidx);
+ // Set new lidx for DEF & handle 2-addr instructions
+ if( n1->is_Mach() && ((twoidx = n1->as_Mach()->two_adr()) != 0) ) {
+ assert( Find(n1->in(twoidx)) < maxlrg,"Assigning bad live range index");
+ // Union the input and output live ranges
+ uint lr1 = Find(n1);
+ uint lr2 = Find(n1->in(twoidx));
+ if( lr1 < lr2 )
+ Union(n1, n1->in(twoidx));
+ else if( lr1 > lr2 )
+ Union(n1->in(twoidx), n1);
+ } // End if two address
+ } // End for all defs
+ // DEBUG
+#ifdef ASSERT
+ // Validate all live range index assignments
+ for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
+ b = _cfg._blocks[bidx];
+ for( insidx = 0; insidx <= b->end_idx(); insidx++ ) {
+ Node *n = b->_nodes[insidx];
+ uint defidx = Find(n);
+ assert(defidx < _maxlrg,"Bad live range index in Split");
+ assert(defidx < maxlrg,"Bad live range index in Split");
+ }
+ }
+ // Issue a warning if splitting made no progress
+ int noprogress = 0;
+ for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+ if( PrintOpto && WizardMode && splits.at(slidx) == 0 ) {
+ tty->print_cr("Failed to split live range %d", lidxs.at(slidx));
+ //BREAKPOINT;
+ }
+ else {
+ noprogress++;
+ }
+ }
+ if(!noprogress) {
+ tty->print_cr("Failed to make progress in Split");
+ //BREAKPOINT;
+ }
+#endif
+ // Return updated count of live ranges
+ return maxlrg;
+}
diff --git a/src/share/vm/opto/regalloc.cpp b/src/share/vm/opto/regalloc.cpp
new file mode 100644
index 000000000..7c4d02797
--- /dev/null
+++ b/src/share/vm/opto/regalloc.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_regalloc.cpp.incl"
+
+static const int NodeRegsOverflowSize = 200;
+
+void (*PhaseRegAlloc::_alloc_statistics[MAX_REG_ALLOCATORS])();
+int PhaseRegAlloc::_num_allocators = 0;
+#ifndef PRODUCT
+int PhaseRegAlloc::_total_framesize = 0;
+int PhaseRegAlloc::_max_framesize = 0;
+#endif
+
+PhaseRegAlloc::PhaseRegAlloc( uint unique, PhaseCFG &cfg,
+ Matcher &matcher,
+ void (*pr_stats)() ):
+ Phase(Register_Allocation), _cfg(cfg), _matcher(matcher),
+ _node_oops(Thread::current()->resource_area()),
+ _node_regs(0),
+ _framesize(0xdeadbeef)
+{
+ int i;
+
+ for (i=0; i < _num_allocators; i++) {
+ if (_alloc_statistics[i] == pr_stats)
+ return;
+ }
+ assert((_num_allocators + 1) < MAX_REG_ALLOCATORS, "too many register allocators");
+ _alloc_statistics[_num_allocators++] = pr_stats;
+}
+
+
+//------------------------------reg2offset-------------------------------------
+int PhaseRegAlloc::reg2offset_unchecked( OptoReg::Name reg ) const {
+ // Slots below _max_in_arg_stack_reg are offset by the entire frame.
+ // Slots above _max_in_arg_stack_reg are frame_slots and are not offset.
+ int slot = (reg < _matcher._new_SP)
+ ? reg - OptoReg::stack0() + _framesize
+ : reg - _matcher._new_SP;
+ // Note: We use the direct formula (reg - SharedInfo::stack0) instead of
+ // OptoReg::reg2stack(reg), in order to avoid asserts in the latter
+ // function. This routine must remain unchecked, so that dump_frame()
+ // can do its work undisturbed.
+ // %%% not really clear why reg2stack would assert here
+
+ return slot*VMRegImpl::stack_slot_size;
+}
+
+int PhaseRegAlloc::reg2offset( OptoReg::Name reg ) const {
+
+ // Not allowed in the out-preserve area.
+ // In-preserve area is allowed so Intel can fetch the return pc out.
+ assert( reg < _matcher._old_SP ||
+ (reg >= OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()) &&
+ reg < _matcher._in_arg_limit) ||
+ reg >= OptoReg::add(_matcher._new_SP,C->out_preserve_stack_slots()),
+ "register allocated in a preserve area" );
+ return reg2offset_unchecked( reg );
+}
+
+//------------------------------offset2reg-------------------------------------
+OptoReg::Name PhaseRegAlloc::offset2reg(int stk_offset) const {
+ int slot = stk_offset / jintSize;
+ int reg = (slot < (int) _framesize)
+ ? slot + _matcher._new_SP
+ : OptoReg::stack2reg(slot) - _framesize;
+ assert(stk_offset == reg2offset((OptoReg::Name) reg),
+ "offset2reg does not invert properly");
+ return (OptoReg::Name) reg;
+}
+
+//------------------------------set_oop----------------------------------------
+void PhaseRegAlloc::set_oop( const Node *n, bool is_an_oop ) {
+ if( is_an_oop ) {
+ _node_oops.set(n->_idx);
+ }
+}
+
+//------------------------------is_oop-----------------------------------------
+bool PhaseRegAlloc::is_oop( const Node *n ) const {
+ return _node_oops.test(n->_idx) != 0;
+}
+
+// Allocate _node_regs table with at least "size" elements
+void PhaseRegAlloc::alloc_node_regs(int size) {
+ _node_regs_max_index = size + (size >> 1) + NodeRegsOverflowSize;
+ _node_regs = NEW_RESOURCE_ARRAY( OptoRegPair, _node_regs_max_index );
+ // We assume our caller will fill in all elements up to size-1, so
+ // only the extra space we allocate is initialized here.
+ for( uint i = size; i < _node_regs_max_index; ++i )
+ _node_regs[i].set_bad();
+}
+
+#ifndef PRODUCT
+void
+PhaseRegAlloc::print_statistics() {
+ tty->print_cr("Total frameslots = %d, Max frameslots = %d", _total_framesize, _max_framesize);
+ int i;
+
+ for (i=0; i < _num_allocators; i++) {
+ _alloc_statistics[i]();
+ }
+}
+#endif
diff --git a/src/share/vm/opto/regalloc.hpp b/src/share/vm/opto/regalloc.hpp
new file mode 100644
index 000000000..37f7ba518
--- /dev/null
+++ b/src/share/vm/opto/regalloc.hpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Node;
+class Matcher;
+class PhaseCFG;
+
+#define MAX_REG_ALLOCATORS 10
+
+//------------------------------PhaseRegAlloc------------------------------------
+// Abstract register allocator
+class PhaseRegAlloc : public Phase {
+ static void (*_alloc_statistics[MAX_REG_ALLOCATORS])();
+ static int _num_allocators;
+
+protected:
+ OptoRegPair *_node_regs;
+ uint _node_regs_max_index;
+ VectorSet _node_oops; // Mapping from node indices to oopiness
+
+ void alloc_node_regs(int size); // allocate _node_regs table with at least "size" elements
+
+ PhaseRegAlloc( uint unique, PhaseCFG &cfg, Matcher &matcher,
+ void (*pr_stats)());
+public:
+ PhaseCFG &_cfg; // Control flow graph
+ uint _framesize; // Size of frame in stack-slots. not counting preserve area
+ OptoReg::Name _max_reg; // Past largest register seen
+ Matcher &_matcher; // Convert Ideal to MachNodes
+ uint node_regs_max_index() const { return _node_regs_max_index; }
+
+ // Get the register associated with the Node
+ OptoReg::Name get_reg_first( const Node *n ) const {
+ debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
+ assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
+ return _node_regs[n->_idx].first();
+ }
+ OptoReg::Name get_reg_second( const Node *n ) const {
+ debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
+ assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
+ return _node_regs[n->_idx].second();
+ }
+
+ // Do all the real work of allocate
+ virtual void Register_Allocate() = 0;
+
+
+ // notify the register allocator that "node" is a new reference
+ // to the value produced by "old_node"
+ virtual void add_reference( const Node *node, const Node *old_node) = 0;
+
+
+ // Set the register associated with a new Node
+ void set_bad( uint idx ) {
+ assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+ _node_regs[idx].set_bad();
+ }
+ void set1( uint idx, OptoReg::Name reg ) {
+ assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+ _node_regs[idx].set1(reg);
+ }
+ void set2( uint idx, OptoReg::Name reg ) {
+ assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+ _node_regs[idx].set2(reg);
+ }
+ void set_pair( uint idx, OptoReg::Name hi, OptoReg::Name lo ) {
+ assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+ _node_regs[idx].set_pair(hi, lo);
+ }
+ void set_ptr( uint idx, OptoReg::Name reg ) {
+ assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+ _node_regs[idx].set_ptr(reg);
+ }
+ // Set and query if a node produces an oop
+ void set_oop( const Node *n, bool );
+ bool is_oop( const Node *n ) const;
+
+ // Convert a register number to a stack offset
+ int reg2offset ( OptoReg::Name reg ) const;
+ int reg2offset_unchecked( OptoReg::Name reg ) const;
+
+ // Convert a stack offset to a register number
+ OptoReg::Name offset2reg( int stk_offset ) const;
+
+ // Get the register encoding associated with the Node
+ int get_encode( const Node *n ) const {
+ assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
+ OptoReg::Name first = _node_regs[n->_idx].first();
+ OptoReg::Name second = _node_regs[n->_idx].second();
+ assert( !OptoReg::is_valid(second) || second == first+1, "" );
+ assert(OptoReg::is_reg(first), "out of range");
+ return Matcher::_regEncode[first];
+ }
+
+ // Platform dependent hook for actions prior to allocation
+ void pd_preallocate_hook();
+
+#ifdef ASSERT
+ // Platform dependent hook for verification after allocation. Will
+ // only get called when compiling with asserts.
+ void pd_postallocate_verify_hook();
+#endif
+
+#ifndef PRODUCT
+ static int _total_framesize;
+ static int _max_framesize;
+
+ virtual void dump_frame() const = 0;
+ virtual char *dump_register( const Node *n, char *buf ) const = 0;
+ static void print_statistics();
+#endif
+};
diff --git a/src/share/vm/opto/regmask.cpp b/src/share/vm/opto/regmask.cpp
new file mode 100644
index 000000000..782d1fa99
--- /dev/null
+++ b/src/share/vm/opto/regmask.cpp
@@ -0,0 +1,288 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_regmask.cpp.incl"
+
+#define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */
+
+//-------------Non-zero bit search methods used by RegMask---------------------
+// Find lowest 1, or return 32 if empty
+int find_lowest_bit( uint32 mask ) {
+ int n = 0;
+ if( (mask & 0xffff) == 0 ) {
+ mask >>= 16;
+ n += 16;
+ }
+ if( (mask & 0xff) == 0 ) {
+ mask >>= 8;
+ n += 8;
+ }
+ if( (mask & 0xf) == 0 ) {
+ mask >>= 4;
+ n += 4;
+ }
+ if( (mask & 0x3) == 0 ) {
+ mask >>= 2;
+ n += 2;
+ }
+ if( (mask & 0x1) == 0 ) {
+ mask >>= 1;
+ n += 1;
+ }
+ if( mask == 0 ) {
+ n = 32;
+ }
+ return n;
+}
+
+// Find highest 1, or return 32 if empty
+int find_hihghest_bit( uint32 mask ) {
+ int n = 0;
+ if( mask > 0xffff ) {
+ mask >>= 16;
+ n += 16;
+ }
+ if( mask > 0xff ) {
+ mask >>= 8;
+ n += 8;
+ }
+ if( mask > 0xf ) {
+ mask >>= 4;
+ n += 4;
+ }
+ if( mask > 0x3 ) {
+ mask >>= 2;
+ n += 2;
+ }
+ if( mask > 0x1 ) {
+ mask >>= 1;
+ n += 1;
+ }
+ if( mask == 0 ) {
+ n = 32;
+ }
+ return n;
+}
+
+//------------------------------dump-------------------------------------------
+
+#ifndef PRODUCT
+void OptoReg::dump( int r ) {
+ switch( r ) {
+ case Special: tty->print("r---"); break;
+ case Bad: tty->print("rBAD"); break;
+ default:
+ if( r < _last_Mach_Reg ) tty->print(Matcher::regName[r]);
+ else tty->print("rS%d",r);
+ break;
+ }
+}
+#endif
+
+
+//=============================================================================
+const RegMask RegMask::Empty(
+# define BODY(I) 0,
+ FORALL_BODY
+# undef BODY
+ 0
+);
+
+//------------------------------find_first_pair--------------------------------
+// Find the lowest-numbered register pair in the mask. Return the
+// HIGHEST register number in the pair, or BAD if no pairs.
+OptoReg::Name RegMask::find_first_pair() const {
+ VerifyPairs();
+ for( int i = 0; i < RM_SIZE; i++ ) {
+ if( _A[i] ) { // Found some bits
+ int bit = _A[i] & -_A[i]; // Extract low bit
+ // Convert to bit number, return hi bit in pair
+ return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+1);
+ }
+ }
+ return OptoReg::Bad;
+}
+
+//------------------------------ClearToPairs-----------------------------------
+// Clear out partial bits; leave only bit pairs
+void RegMask::ClearToPairs() {
+ for( int i = 0; i < RM_SIZE; i++ ) {
+ int bits = _A[i];
+ bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
+ bits |= (bits>>1); // Smear 1 hi-bit into a pair
+ _A[i] = bits;
+ }
+ VerifyPairs();
+}
+
+//------------------------------SmearToPairs-----------------------------------
+// Smear out partial bits; leave only bit pairs
+void RegMask::SmearToPairs() {
+ for( int i = 0; i < RM_SIZE; i++ ) {
+ int bits = _A[i];
+ bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
+ bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
+ _A[i] = bits;
+ }
+ VerifyPairs();
+}
+
+//------------------------------is_aligned_pairs-------------------------------
+bool RegMask::is_aligned_Pairs() const {
+ // Assert that the register mask contains only bit pairs.
+ for( int i = 0; i < RM_SIZE; i++ ) {
+ int bits = _A[i];
+ while( bits ) { // Check bits for pairing
+ int bit = bits & -bits; // Extract low bit
+ // Low bit is not odd means its mis-aligned.
+ if( (bit & 0x55555555) == 0 ) return false;
+ bits -= bit; // Remove bit from mask
+ // Check for aligned adjacent bit
+ if( (bits & (bit<<1)) == 0 ) return false;
+ bits -= (bit<<1); // Remove other halve of pair
+ }
+ }
+ return true;
+}
+
+//------------------------------is_bound1--------------------------------------
+// Return TRUE if the mask contains a single bit
+int RegMask::is_bound1() const {
+ if( is_AllStack() ) return false;
+ int bit = -1; // Set to hold the one bit allowed
+ for( int i = 0; i < RM_SIZE; i++ ) {
+ if( _A[i] ) { // Found some bits
+ if( bit != -1 ) return false; // Already had bits, so fail
+ bit = _A[i] & -_A[i]; // Extract 1 bit from mask
+ if( bit != _A[i] ) return false; // Found many bits, so fail
+ }
+ }
+ // True for both the empty mask and for a single bit
+ return true;
+}
+
+//------------------------------is_bound2--------------------------------------
+// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
+int RegMask::is_bound2() const {
+ if( is_AllStack() ) return false;
+
+ int bit = -1; // Set to hold the one bit allowed
+ for( int i = 0; i < RM_SIZE; i++ ) {
+ if( _A[i] ) { // Found some bits
+ if( bit != -1 ) return false; // Already had bits, so fail
+ bit = _A[i] & -(_A[i]); // Extract 1 bit from mask
+ if( (bit << 1) != 0 ) { // Bit pair stays in same word?
+ if( (bit | (bit<<1)) != _A[i] )
+ return false; // Require adjacent bit pair and no more bits
+ } else { // Else its a split-pair case
+ if( bit != _A[i] ) return false; // Found many bits, so fail
+ i++; // Skip iteration forward
+ if( _A[i] != 1 ) return false; // Require 1 lo bit in next word
+ }
+ }
+ }
+ // True for both the empty mask and for a bit pair
+ return true;
+}
+
+//------------------------------is_UP------------------------------------------
+// UP means register only, Register plus stack, or stack only is DOWN
+bool RegMask::is_UP() const {
+ // Quick common case check for DOWN (any stack slot is legal)
+ if( is_AllStack() )
+ return false;
+ // Slower check for any stack bits set (also DOWN)
+ if( overlap(Matcher::STACK_ONLY_mask) )
+ return false;
+ // Not DOWN, so must be UP
+ return true;
+}
+
+//------------------------------Size-------------------------------------------
+// Compute size of register mask in bits
+uint RegMask::Size() const {
+ extern uint8 bitsInByte[256];
+ uint sum = 0;
+ for( int i = 0; i < RM_SIZE; i++ )
+ sum +=
+ bitsInByte[(_A[i]>>24) & 0xff] +
+ bitsInByte[(_A[i]>>16) & 0xff] +
+ bitsInByte[(_A[i]>> 8) & 0xff] +
+ bitsInByte[ _A[i] & 0xff];
+ return sum;
+}
+
+#ifndef PRODUCT
+//------------------------------print------------------------------------------
+void RegMask::dump( ) const {
+ tty->print("[");
+ RegMask rm = *this; // Structure copy into local temp
+
+ OptoReg::Name start = rm.find_first_elem(); // Get a register
+ if( OptoReg::is_valid(start) ) { // Check for empty mask
+ rm.Remove(start); // Yank from mask
+ OptoReg::dump(start); // Print register
+ OptoReg::Name last = start;
+
+ // Now I have printed an initial register.
+ // Print adjacent registers as "rX-rZ" instead of "rX,rY,rZ".
+ // Begin looping over the remaining registers.
+ while( 1 ) { //
+ OptoReg::Name reg = rm.find_first_elem(); // Get a register
+ if( !OptoReg::is_valid(reg) )
+ break; // Empty mask, end loop
+ rm.Remove(reg); // Yank from mask
+
+ if( last+1 == reg ) { // See if they are adjacent
+ // Adjacent registers just collect into long runs, no printing.
+ last = reg;
+ } else { // Ending some kind of run
+ if( start == last ) { // 1-register run; no special printing
+ } else if( start+1 == last ) {
+ tty->print(","); // 2-register run; print as "rX,rY"
+ OptoReg::dump(last);
+ } else { // Multi-register run; print as "rX-rZ"
+ tty->print("-");
+ OptoReg::dump(last);
+ }
+ tty->print(","); // Seperate start of new run
+ start = last = reg; // Start a new register run
+ OptoReg::dump(start); // Print register
+ } // End of if ending a register run or not
+ } // End of while regmask not empty
+
+ if( start == last ) { // 1-register run; no special printing
+ } else if( start+1 == last ) {
+ tty->print(","); // 2-register run; print as "rX,rY"
+ OptoReg::dump(last);
+ } else { // Multi-register run; print as "rX-rZ"
+ tty->print("-");
+ OptoReg::dump(last);
+ }
+ if( rm.is_AllStack() ) tty->print("...");
+ }
+ tty->print("]");
+}
+#endif
diff --git a/src/share/vm/opto/regmask.hpp b/src/share/vm/opto/regmask.hpp
new file mode 100644
index 000000000..e34c8354f
--- /dev/null
+++ b/src/share/vm/opto/regmask.hpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Some fun naming (textual) substitutions:
+//
+// RegMask::get_low_elem() ==> RegMask::find_first_elem()
+// RegMask::Special ==> RegMask::Empty
+// RegMask::_flags ==> RegMask::is_AllStack()
+// RegMask::operator<<=() ==> RegMask::Insert()
+// RegMask::operator>>=() ==> RegMask::Remove()
+// RegMask::Union() ==> RegMask::OR
+// RegMask::Inter() ==> RegMask::AND
+//
+// OptoRegister::RegName ==> OptoReg::Name
+//
+// OptoReg::stack0() ==> _last_Mach_Reg or ZERO in core version
+//
+// numregs in chaitin ==> proper degree in chaitin
+
+//-------------Non-zero bit search methods used by RegMask---------------------
+// Find lowest 1, or return 32 if empty
+int find_lowest_bit( uint32 mask );
+// Find highest 1, or return 32 if empty
+int find_hihghest_bit( uint32 mask );
+
+//------------------------------RegMask----------------------------------------
+// The ADL file describes how to print the machine-specific registers, as well
+// as any notion of register classes. We provide a register mask, which is
+// just a collection of Register numbers.
+
+// The ADLC defines 2 macros, RM_SIZE and FORALL_BODY.
+// RM_SIZE is the size of a register mask in words.
+// FORALL_BODY replicates a BODY macro once per word in the register mask.
+// The usage is somewhat clumsy and limited to the regmask.[h,c]pp files.
+// However, it means the ADLC can redefine the unroll macro and all loops
+// over register masks will be unrolled by the correct amount.
+
+class RegMask VALUE_OBJ_CLASS_SPEC {
+ union {
+ double _dummy_force_double_alignment[RM_SIZE>>1];
+ // Array of Register Mask bits. This array is large enough to cover
+ // all the machine registers and all parameters that need to be passed
+ // on the stack (stack registers) up to some interesting limit. Methods
+ // that need more parameters will NOT be compiled. On Intel, the limit
+ // is something like 90+ parameters.
+ int _A[RM_SIZE];
+ };
+
+ enum {
+ _WordBits = BitsPerInt,
+ _LogWordBits = LogBitsPerInt,
+ _RM_SIZE = RM_SIZE // local constant, imported, then hidden by #undef
+ };
+
+public:
+ enum { CHUNK_SIZE = RM_SIZE*_WordBits };
+
+ // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
+ // Also, consider the maximum alignment size for a normally allocated
+ // value. Since we allocate register pairs but not register quads (at
+ // present), this alignment is SlotsPerLong (== 2). A normally
+ // aligned allocated register is either a single register, or a pair
+ // of adjacent registers, the lower-numbered being even.
+ // See also is_aligned_Pairs() below, and the padding added before
+ // Matcher::_new_SP to keep allocated pairs aligned properly.
+ // If we ever go to quad-word allocations, SlotsPerQuad will become
+ // the controlling alignment constraint. Note that this alignment
+ // requirement is internal to the allocator, and independent of any
+ // particular platform.
+ enum { SlotsPerLong = 2 };
+
+ // A constructor only used by the ADLC output. All mask fields are filled
+ // in directly. Calls to this look something like RM(1,2,3,4);
+ RegMask(
+# define BODY(I) int a##I,
+ FORALL_BODY
+# undef BODY
+ int dummy = 0 ) {
+# define BODY(I) _A[I] = a##I;
+ FORALL_BODY
+# undef BODY
+ }
+
+ // Handy copying constructor
+ RegMask( RegMask *rm ) {
+# define BODY(I) _A[I] = rm->_A[I];
+ FORALL_BODY
+# undef BODY
+ }
+
+ // Construct an empty mask
+ RegMask( ) { Clear(); }
+
+ // Construct a mask with a single bit
+ RegMask( OptoReg::Name reg ) { Clear(); Insert(reg); }
+
+ // Check for register being in mask
+ int Member( OptoReg::Name reg ) const {
+ assert( reg < CHUNK_SIZE, "" );
+ return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1)));
+ }
+
+ // The last bit in the register mask indicates that the mask should repeat
+ // indefinitely with ONE bits. Returns TRUE if mask is infinite or
+ // unbounded in size. Returns FALSE if mask is finite size.
+ int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); }
+
+ // Work around an -xO3 optimization problme in WS6U1. The old way:
+ // void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); }
+ // will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack()
+ // follows an Insert() loop, like the one found in init_spill_mask(). Using
+ // Insert() instead works because the index into _A in computed instead of
+ // constant. See bug 4665841.
+ void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); }
+
+ // Test for being a not-empty mask.
+ int is_NotEmpty( ) const {
+ int tmp = 0;
+# define BODY(I) tmp |= _A[I];
+ FORALL_BODY
+# undef BODY
+ return tmp;
+ }
+
+ // Find lowest-numbered register from mask, or BAD if mask is empty.
+ OptoReg::Name find_first_elem() const {
+ int base, bits;
+# define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
+ FORALL_BODY
+# undef BODY
+ { base = OptoReg::Bad; bits = 1<<0; }
+ return OptoReg::Name(base + find_lowest_bit(bits));
+ }
+ // Get highest-numbered register from mask, or BAD if mask is empty.
+ OptoReg::Name find_last_elem() const {
+ int base, bits;
+# define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
+ FORALL_BODY
+# undef BODY
+ { base = OptoReg::Bad; bits = 1<<0; }
+ return OptoReg::Name(base + find_hihghest_bit(bits));
+ }
+
+ // Find the lowest-numbered register pair in the mask. Return the
+ // HIGHEST register number in the pair, or BAD if no pairs.
+ // Assert that the mask contains only bit pairs.
+ OptoReg::Name find_first_pair() const;
+
+ // Clear out partial bits; leave only aligned adjacent bit pairs.
+ void ClearToPairs();
+ // Smear out partial bits; leave only aligned adjacent bit pairs.
+ void SmearToPairs();
+ // Verify that the mask contains only aligned adjacent bit pairs
+ void VerifyPairs() const { assert( is_aligned_Pairs(), "mask is not aligned, adjacent pairs" ); }
+ // Test that the mask contains only aligned adjacent bit pairs
+ bool is_aligned_Pairs() const;
+
+ // mask is a pair of misaligned registers
+ bool is_misaligned_Pair() const { return Size()==2 && !is_aligned_Pairs();}
+ // Test for single register
+ int is_bound1() const;
+ // Test for a single adjacent pair
+ int is_bound2() const;
+
+ // Fast overlap test. Non-zero if any registers in common.
+ int overlap( const RegMask &rm ) const {
+ return
+# define BODY(I) (_A[I] & rm._A[I]) |
+ FORALL_BODY
+# undef BODY
+ 0 ;
+ }
+
+ // Special test for register pressure based splitting
+ // UP means register only, Register plus stack, or stack only is DOWN
+ bool is_UP() const;
+
+ // Clear a register mask
+ void Clear( ) {
+# define BODY(I) _A[I] = 0;
+ FORALL_BODY
+# undef BODY
+ }
+
+ // Fill a register mask with 1's
+ void Set_All( ) {
+# define BODY(I) _A[I] = -1;
+ FORALL_BODY
+# undef BODY
+ }
+
+ // Insert register into mask
+ void Insert( OptoReg::Name reg ) {
+ assert( reg < CHUNK_SIZE, "" );
+ _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
+ }
+
+ // Remove register from mask
+ void Remove( OptoReg::Name reg ) {
+ assert( reg < CHUNK_SIZE, "" );
+ _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1)));
+ }
+
+ // OR 'rm' into 'this'
+ void OR( const RegMask &rm ) {
+# define BODY(I) this->_A[I] |= rm._A[I];
+ FORALL_BODY
+# undef BODY
+ }
+
+ // AND 'rm' into 'this'
+ void AND( const RegMask &rm ) {
+# define BODY(I) this->_A[I] &= rm._A[I];
+ FORALL_BODY
+# undef BODY
+ }
+
+ // Subtract 'rm' from 'this'
+ void SUBTRACT( const RegMask &rm ) {
+# define BODY(I) _A[I] &= ~rm._A[I];
+ FORALL_BODY
+# undef BODY
+ }
+
+ // Compute size of register mask: number of bits
+ uint Size() const;
+
+#ifndef PRODUCT
+ void print() const { dump(); }
+ void dump() const; // Print a mask
+#endif
+
+ static const RegMask Empty; // Common empty mask
+
+ static bool can_represent(OptoReg::Name reg) {
+ // NOTE: -1 in computation reflects the usage of the last
+ // bit of the regmask as an infinite stack flag.
+ return (int)reg < (int)(CHUNK_SIZE-1);
+ }
+};
+
+// Do not use this constant directly in client code!
+#undef RM_SIZE
diff --git a/src/share/vm/opto/rootnode.cpp b/src/share/vm/opto/rootnode.cpp
new file mode 100644
index 000000000..44e0118ba
--- /dev/null
+++ b/src/share/vm/opto/rootnode.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_rootnode.cpp.incl"
+
+//------------------------------Ideal------------------------------------------
+// Remove dead inputs
+Node *RootNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ for( uint i = 1; i < req(); i++ ) { // For all inputs
+ // Check for and remove dead inputs
+ if( phase->type(in(i)) == Type::TOP ) {
+ del_req(i--); // Delete TOP inputs
+ }
+ }
+
+ // I used to do tail-splitting in the Ideal graph here, but it does not
+ // work. The tail-splitting forces values live into the Return to be
+ // ready at a point which dominates the split returns. This forces Stores
+ // to be hoisted high. The "proper" fix would be to split Stores down
+ // each path, but this makes the split unprofitable. If we want to do this
+ // optimization, it needs to be done after allocation so we can count all
+ // the instructions needing to be cloned in the cost metric.
+
+ // There used to be a spoof here for caffeine marks which completely
+ // eliminated very simple self-recursion recursions, but it's not worth it.
+ // Deep inlining of self-calls gets nearly all of the same benefits.
+ // If we want to get the rest of the win later, we should pattern match
+ // simple recursive call trees to closed-form solutions.
+
+ return NULL; // No further opportunities exposed
+}
+
+//=============================================================================
+HaltNode::HaltNode( Node *ctrl, Node *frameptr ) : Node(TypeFunc::Parms) {
+ Node* top = Compile::current()->top();
+ init_req(TypeFunc::Control, ctrl );
+ init_req(TypeFunc::I_O, top);
+ init_req(TypeFunc::Memory, top);
+ init_req(TypeFunc::FramePtr, frameptr );
+ init_req(TypeFunc::ReturnAdr,top);
+}
+
+const Type *HaltNode::bottom_type() const { return Type::BOTTOM; }
+
+//------------------------------Ideal------------------------------------------
+Node *HaltNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *HaltNode::Value( PhaseTransform *phase ) const {
+ return ( phase->type(in(TypeFunc::Control)) == Type::TOP)
+ ? Type::TOP
+ : Type::BOTTOM;
+}
+
+const RegMask &HaltNode::out_RegMask() const {
+ return RegMask::Empty;
+}
diff --git a/src/share/vm/opto/rootnode.hpp b/src/share/vm/opto/rootnode.hpp
new file mode 100644
index 000000000..369bd9d6d
--- /dev/null
+++ b/src/share/vm/opto/rootnode.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------RootNode---------------------------------------
+// The one-and-only before-all-else and after-all-else RootNode. The RootNode
+// represents what happens if the user runs the whole program repeatedly. The
+// RootNode produces the initial values of I/O and memory for the program or
+// procedure start.
+class RootNode : public LoopNode {
+public:
+ RootNode( ) : LoopNode(0,0) {
+ init_class_id(Class_Root);
+ del_req(2);
+ del_req(1);
+ }
+ virtual int Opcode() const;
+ virtual const Node *is_block_proj() const { return this; }
+ virtual const Type *bottom_type() const { return Type::BOTTOM; }
+ virtual Node *Identity( PhaseTransform *phase ) { return this; }
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const { return Type::BOTTOM; }
+};
+
+//------------------------------HaltNode---------------------------------------
+// Throw an exception & die
+class HaltNode : public Node {
+public:
+ HaltNode( Node *ctrl, Node *frameptr );
+ virtual int Opcode() const;
+ virtual bool pinned() const { return true; };
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const;
+ virtual bool is_CFG() const { return true; }
+ virtual uint hash() const { return NO_HASH; } // CFG nodes do not hash
+ virtual bool depends_only_on_test() const { return false; }
+ virtual const Node *is_block_proj() const { return this; }
+ virtual const RegMask &out_RegMask() const;
+ virtual uint ideal_reg() const { return NotAMachineReg; }
+ virtual uint match_edge(uint idx) const { return 0; }
+};
diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp
new file mode 100644
index 000000000..aedfceb51
--- /dev/null
+++ b/src/share/vm/opto/runtime.cpp
@@ -0,0 +1,1177 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_runtime.cpp.incl"
+
+
+// For debugging purposes:
+// To force FullGCALot inside a runtime function, add the following two lines
+//
+// Universe::release_fullgc_alot_dummy();
+// MarkSweep::invoke(0, "Debugging");
+//
+// At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000
+
+
+
+
+// Compiled code entry points
+address OptoRuntime::_new_instance_Java = NULL;
+address OptoRuntime::_new_array_Java = NULL;
+address OptoRuntime::_multianewarray2_Java = NULL;
+address OptoRuntime::_multianewarray3_Java = NULL;
+address OptoRuntime::_multianewarray4_Java = NULL;
+address OptoRuntime::_multianewarray5_Java = NULL;
+address OptoRuntime::_vtable_must_compile_Java = NULL;
+address OptoRuntime::_complete_monitor_locking_Java = NULL;
+address OptoRuntime::_rethrow_Java = NULL;
+
+address OptoRuntime::_slow_arraycopy_Java = NULL;
+address OptoRuntime::_register_finalizer_Java = NULL;
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+address OptoRuntime::_zap_dead_Java_locals_Java = NULL;
+address OptoRuntime::_zap_dead_native_locals_Java = NULL;
+# endif
+
+
+// This should be called in an assertion at the start of OptoRuntime routines
+// which are entered from compiled code (all of them)
+#ifndef PRODUCT
+static bool check_compiled_frame(JavaThread* thread) {
+ assert(thread->last_frame().is_runtime_frame(), "cannot call runtime directly from compiled code");
+#ifdef ASSERT
+ RegisterMap map(thread, false);
+ frame caller = thread->last_frame().sender(&map);
+ assert(caller.is_compiled_frame(), "not being called from compiled like code");
+#endif /* ASSERT */
+ return true;
+}
+#endif
+
+
+#define gen(env, var, type_func_gen, c_func, fancy_jump, pass_tls, save_arg_regs, return_pc) \
+ var = generate_stub(env, type_func_gen, CAST_FROM_FN_PTR(address, c_func), #var, fancy_jump, pass_tls, save_arg_regs, return_pc)
+
+void OptoRuntime::generate(ciEnv* env) {
+
+ generate_exception_blob();
+
+ // Note: tls: Means fetching the return oop out of the thread-local storage
+ //
+ // variable/name type-function-gen , runtime method ,fncy_jp, tls,save_args,retpc
+ // -------------------------------------------------------------------------------------------------------------------------------
+ gen(env, _new_instance_Java , new_instance_Type , new_instance_C , 0 , true , false, false);
+ gen(env, _new_array_Java , new_array_Type , new_array_C , 0 , true , false, false);
+ gen(env, _multianewarray2_Java , multianewarray2_Type , multianewarray2_C , 0 , true , false, false);
+ gen(env, _multianewarray3_Java , multianewarray3_Type , multianewarray3_C , 0 , true , false, false);
+ gen(env, _multianewarray4_Java , multianewarray4_Type , multianewarray4_C , 0 , true , false, false);
+ gen(env, _multianewarray5_Java , multianewarray5_Type , multianewarray5_C , 0 , true , false, false);
+ gen(env, _complete_monitor_locking_Java , complete_monitor_enter_Type , SharedRuntime::complete_monitor_locking_C , 0 , false, false, false);
+ gen(env, _rethrow_Java , rethrow_Type , rethrow_C , 2 , true , false, true );
+
+ gen(env, _slow_arraycopy_Java , slow_arraycopy_Type , SharedRuntime::slow_arraycopy_C , 0 , false, false, false);
+ gen(env, _register_finalizer_Java , register_finalizer_Type , register_finalizer , 0 , false, false, false);
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ gen(env, _zap_dead_Java_locals_Java , zap_dead_locals_Type , zap_dead_Java_locals_C , 0 , false, true , false );
+ gen(env, _zap_dead_native_locals_Java , zap_dead_locals_Type , zap_dead_native_locals_C , 0 , false, true , false );
+# endif
+
+}
+
+#undef gen
+
+
+// Helper method to do generation of RunTimeStub's
+address OptoRuntime::generate_stub( ciEnv* env,
+ TypeFunc_generator gen, address C_function,
+ const char *name, int is_fancy_jump,
+ bool pass_tls,
+ bool save_argument_registers,
+ bool return_pc ) {
+ ResourceMark rm;
+ Compile C( env, gen, C_function, name, is_fancy_jump, pass_tls, save_argument_registers, return_pc );
+ return C.stub_entry_point();
+}
+
+const char* OptoRuntime::stub_name(address entry) {
+#ifndef PRODUCT
+ CodeBlob* cb = CodeCache::find_blob(entry);
+ RuntimeStub* rs =(RuntimeStub *)cb;
+ assert(rs != NULL && rs->is_runtime_stub(), "not a runtime stub");
+ return rs->name();
+#else
+ // Fast implementation for product mode (maybe it should be inlined too)
+ return "runtime stub";
+#endif
+}
+
+
+//=============================================================================
+// Opto compiler runtime routines
+//=============================================================================
+
+
+//=============================allocation======================================
+// We failed the fast-path allocation. Now we need to do a scavenge or GC
+// and try allocation again.
+
+void OptoRuntime::do_eager_card_mark(JavaThread* thread) {
+ // After any safepoint, just before going back to compiled code,
+ // we perform a card mark. This lets the compiled code omit
+ // card marks for initialization of new objects.
+ // Keep this code consistent with GraphKit::store_barrier.
+
+ oop new_obj = thread->vm_result();
+ if (new_obj == NULL) return;
+
+ assert(Universe::heap()->can_elide_tlab_store_barriers(),
+ "compiler must check this first");
+ new_obj = Universe::heap()->new_store_barrier(new_obj);
+ thread->set_vm_result(new_obj);
+}
+
+// object allocation
+JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(klassOopDesc* klass, JavaThread* thread))
+ JRT_BLOCK;
+#ifndef PRODUCT
+ SharedRuntime::_new_instance_ctr++; // new instance requires GC
+#endif
+ assert(check_compiled_frame(thread), "incorrect caller");
+
+ // These checks are cheap to make and support reflective allocation.
+ int lh = Klass::cast(klass)->layout_helper();
+ if (Klass::layout_helper_needs_slow_path(lh)
+ || !instanceKlass::cast(klass)->is_initialized()) {
+ KlassHandle kh(THREAD, klass);
+ kh->check_valid_for_instantiation(false, THREAD);
+ if (!HAS_PENDING_EXCEPTION) {
+ instanceKlass::cast(kh())->initialize(THREAD);
+ }
+ if (!HAS_PENDING_EXCEPTION) {
+ klass = kh();
+ } else {
+ klass = NULL;
+ }
+ }
+
+ if (klass != NULL) {
+ // Scavenge and allocate an instance.
+ oop result = instanceKlass::cast(klass)->allocate_instance(THREAD);
+ thread->set_vm_result(result);
+
+ // Pass oops back through thread local storage. Our apparent type to Java
+ // is that we return an oop, but we can block on exit from this routine and
+ // a GC can trash the oop in C's return register. The generated stub will
+ // fetch the oop from TLS after any possible GC.
+ }
+
+ deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+ JRT_BLOCK_END;
+
+ if (GraphKit::use_ReduceInitialCardMarks()) {
+ // do them now so we don't have to do them on the fast path
+ do_eager_card_mark(thread);
+ }
+JRT_END
+
+
+// array allocation
+JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len, JavaThread *thread))
+ JRT_BLOCK;
+#ifndef PRODUCT
+ SharedRuntime::_new_array_ctr++; // new array requires GC
+#endif
+ assert(check_compiled_frame(thread), "incorrect caller");
+
+ // Scavenge and allocate an instance.
+ oop result;
+
+ if (Klass::cast(array_type)->oop_is_typeArray()) {
+ // The oopFactory likes to work with the element type.
+ // (We could bypass the oopFactory, since it doesn't add much value.)
+ BasicType elem_type = typeArrayKlass::cast(array_type)->element_type();
+ result = oopFactory::new_typeArray(elem_type, len, THREAD);
+ } else {
+ // Although the oopFactory likes to work with the elem_type,
+ // the compiler prefers the array_type, since it must already have
+ // that latter value in hand for the fast path.
+ klassOopDesc* elem_type = objArrayKlass::cast(array_type)->element_klass();
+ result = oopFactory::new_objArray(elem_type, len, THREAD);
+ }
+
+ // Pass oops back through thread local storage. Our apparent type to Java
+ // is that we return an oop, but we can block on exit from this routine and
+ // a GC can trash the oop in C's return register. The generated stub will
+ // fetch the oop from TLS after any possible GC.
+ deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+ thread->set_vm_result(result);
+ JRT_BLOCK_END;
+
+ if (GraphKit::use_ReduceInitialCardMarks()) {
+ // do them now so we don't have to do them on the fast path
+ do_eager_card_mark(thread);
+ }
+JRT_END
+
+// Note: multianewarray for one dimension is handled inline by GraphKit::new_array.
+
+// multianewarray for 2 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray2_C(klassOopDesc* elem_type, int len1, int len2, JavaThread *thread))
+#ifndef PRODUCT
+ SharedRuntime::_multi2_ctr++; // multianewarray for 1 dimension
+#endif
+ assert(check_compiled_frame(thread), "incorrect caller");
+ assert(oop(elem_type)->is_klass(), "not a class");
+ jint dims[2];
+ dims[0] = len1;
+ dims[1] = len2;
+ oop obj = arrayKlass::cast(elem_type)->multi_allocate(2, dims, THREAD);
+ deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+ thread->set_vm_result(obj);
+JRT_END
+
+// multianewarray for 3 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray3_C(klassOopDesc* elem_type, int len1, int len2, int len3, JavaThread *thread))
+#ifndef PRODUCT
+ SharedRuntime::_multi3_ctr++; // multianewarray for 1 dimension
+#endif
+ assert(check_compiled_frame(thread), "incorrect caller");
+ assert(oop(elem_type)->is_klass(), "not a class");
+ jint dims[3];
+ dims[0] = len1;
+ dims[1] = len2;
+ dims[2] = len3;
+ oop obj = arrayKlass::cast(elem_type)->multi_allocate(3, dims, THREAD);
+ deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+ thread->set_vm_result(obj);
+JRT_END
+
+// multianewarray for 4 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray4_C(klassOopDesc* elem_type, int len1, int len2, int len3, int len4, JavaThread *thread))
+#ifndef PRODUCT
+ SharedRuntime::_multi4_ctr++; // multianewarray for 1 dimension
+#endif
+ assert(check_compiled_frame(thread), "incorrect caller");
+ assert(oop(elem_type)->is_klass(), "not a class");
+ jint dims[4];
+ dims[0] = len1;
+ dims[1] = len2;
+ dims[2] = len3;
+ dims[3] = len4;
+ oop obj = arrayKlass::cast(elem_type)->multi_allocate(4, dims, THREAD);
+ deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+ thread->set_vm_result(obj);
+JRT_END
+
+// multianewarray for 5 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray5_C(klassOopDesc* elem_type, int len1, int len2, int len3, int len4, int len5, JavaThread *thread))
+#ifndef PRODUCT
+ SharedRuntime::_multi5_ctr++; // multianewarray for 1 dimension
+#endif
+ assert(check_compiled_frame(thread), "incorrect caller");
+ assert(oop(elem_type)->is_klass(), "not a class");
+ jint dims[5];
+ dims[0] = len1;
+ dims[1] = len2;
+ dims[2] = len3;
+ dims[3] = len4;
+ dims[4] = len5;
+ oop obj = arrayKlass::cast(elem_type)->multi_allocate(5, dims, THREAD);
+ deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+ thread->set_vm_result(obj);
+JRT_END
+
+const TypeFunc *OptoRuntime::new_instance_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+
+const TypeFunc *OptoRuntime::athrow_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+
+const TypeFunc *OptoRuntime::new_array_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // element klass
+ fields[TypeFunc::Parms+1] = TypeInt::INT; // array size
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::multianewarray_Type(int ndim) {
+ // create input type (domain)
+ const int nargs = ndim + 1;
+ const Type **fields = TypeTuple::fields(nargs);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // element klass
+ for( int i = 1; i < nargs; i++ )
+ fields[TypeFunc::Parms + i] = TypeInt::INT; // array size
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+nargs, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::multianewarray2_Type() {
+ return multianewarray_Type(2);
+}
+
+const TypeFunc *OptoRuntime::multianewarray3_Type() {
+ return multianewarray_Type(3);
+}
+
+const TypeFunc *OptoRuntime::multianewarray4_Type() {
+ return multianewarray_Type(4);
+}
+
+const TypeFunc *OptoRuntime::multianewarray5_Type() {
+ return multianewarray_Type(5);
+}
+
+const TypeFunc *OptoRuntime::uncommon_trap_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(1);
+ // symbolOop name of class to be loaded
+ fields[TypeFunc::Parms+0] = TypeInt::INT;
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+// Type used for stub generation for zap_dead_locals.
+// No inputs or outputs
+const TypeFunc *OptoRuntime::zap_dead_locals_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(0);
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms,fields);
+
+ return TypeFunc::make(domain,range);
+}
+# endif
+
+
+//-----------------------------------------------------------------------------
+// Monitor Handling
+const TypeFunc *OptoRuntime::complete_monitor_enter_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked
+ fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+ return TypeFunc::make(domain,range);
+}
+
+
+//-----------------------------------------------------------------------------
+const TypeFunc *OptoRuntime::complete_monitor_exit_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked
+ fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+ return TypeFunc::make(domain,range);
+}
+
+const TypeFunc* OptoRuntime::flush_windows_Type() {
+ // create input type (domain)
+ const Type** fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms, fields);
+
+ // create result type
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::l2f_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeLong::LONG;
+ fields[TypeFunc::Parms+1] = Type::HALF;
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = Type::FLOAT;
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::modf_Type() {
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = Type::FLOAT;
+ fields[TypeFunc::Parms+1] = Type::FLOAT;
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = Type::FLOAT;
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::Math_D_D_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ // symbolOop name of class to be loaded
+ fields[TypeFunc::Parms+0] = Type::DOUBLE;
+ fields[TypeFunc::Parms+1] = Type::HALF;
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = Type::DOUBLE;
+ fields[TypeFunc::Parms+1] = Type::HALF;
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::Math_DD_D_Type() {
+ const Type **fields = TypeTuple::fields(4);
+ fields[TypeFunc::Parms+0] = Type::DOUBLE;
+ fields[TypeFunc::Parms+1] = Type::HALF;
+ fields[TypeFunc::Parms+2] = Type::DOUBLE;
+ fields[TypeFunc::Parms+3] = Type::HALF;
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+4, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = Type::DOUBLE;
+ fields[TypeFunc::Parms+1] = Type::HALF;
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+//-------------- currentTimeMillis
+
+const TypeFunc* OptoRuntime::current_time_millis_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(0);
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeLong::LONG;
+ fields[TypeFunc::Parms+1] = Type::HALF;
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+// arraycopy stub variations:
+enum ArrayCopyType {
+ ac_fast, // void(ptr, ptr, size_t)
+ ac_checkcast, // int(ptr, ptr, size_t, size_t, ptr)
+ ac_slow, // void(ptr, int, ptr, int, int)
+ ac_generic // int(ptr, int, ptr, int, int)
+};
+
+static const TypeFunc* make_arraycopy_Type(ArrayCopyType act) {
+ // create input type (domain)
+ int num_args = (act == ac_fast ? 3 : 5);
+ int num_size_args = (act == ac_fast ? 1 : act == ac_checkcast ? 2 : 0);
+ int argcnt = num_args;
+ LP64_ONLY(argcnt += num_size_args); // halfwords for lengths
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypePtr::NOTNULL; // src
+ if (num_size_args == 0) {
+ fields[argp++] = TypeInt::INT; // src_pos
+ }
+ fields[argp++] = TypePtr::NOTNULL; // dest
+ if (num_size_args == 0) {
+ fields[argp++] = TypeInt::INT; // dest_pos
+ fields[argp++] = TypeInt::INT; // length
+ }
+ while (num_size_args-- > 0) {
+ fields[argp++] = TypeX_X; // size in whatevers (size_t)
+ LP64_ONLY(fields[argp++] = Type::HALF); // other half of long length
+ }
+ if (act == ac_checkcast) {
+ fields[argp++] = TypePtr::NOTNULL; // super_klass
+ }
+ assert(argp == TypeFunc::Parms+argcnt, "correct decoding of act");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+ // create result type if needed
+ int retcnt = (act == ac_checkcast || act == ac_generic ? 1 : 0);
+ fields = TypeTuple::fields(1);
+ if (retcnt == 0)
+ fields[TypeFunc::Parms+0] = NULL; // void
+ else
+ fields[TypeFunc::Parms+0] = TypeInt::INT; // status result, if needed
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+retcnt, fields);
+ return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::fast_arraycopy_Type() {
+ // This signature is simple: Two base pointers and a size_t.
+ return make_arraycopy_Type(ac_fast);
+}
+
+const TypeFunc* OptoRuntime::checkcast_arraycopy_Type() {
+ // An extension of fast_arraycopy_Type which adds type checking.
+ return make_arraycopy_Type(ac_checkcast);
+}
+
+const TypeFunc* OptoRuntime::slow_arraycopy_Type() {
+ // This signature is exactly the same as System.arraycopy.
+ // There are no intptr_t (int/long) arguments.
+ return make_arraycopy_Type(ac_slow);
+}
+
+const TypeFunc* OptoRuntime::generic_arraycopy_Type() {
+ // This signature is like System.arraycopy, except that it returns status.
+ return make_arraycopy_Type(ac_generic);
+}
+
+
+//------------- Interpreter state access for on stack replacement
+const TypeFunc* OptoRuntime::osr_end_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // OSR temp buf
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ // create result type
+ fields = TypeTuple::fields(1);
+ // fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // locked oop
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain, range);
+}
+
+//-------------- methodData update helpers
+
+const TypeFunc* OptoRuntime::profile_receiver_type_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeAryPtr::NOTNULL; // methodData pointer
+ fields[TypeFunc::Parms+1] = TypeInstPtr::BOTTOM; // receiver oop
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+ // create result type
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+ return TypeFunc::make(domain,range);
+}
+
+JRT_LEAF(void, OptoRuntime::profile_receiver_type_C(DataLayout* data, oopDesc* receiver))
+ if (receiver == NULL) return;
+ klassOop receiver_klass = receiver->klass();
+
+ intptr_t* mdp = ((intptr_t*)(data)) + DataLayout::header_size_in_cells();
+ int empty_row = -1; // free row, if any is encountered
+
+ // ReceiverTypeData* vc = new ReceiverTypeData(mdp);
+ for (uint row = 0; row < ReceiverTypeData::row_limit(); row++) {
+ // if (vc->receiver(row) == receiver_klass)
+ int receiver_off = ReceiverTypeData::receiver_cell_index(row);
+ intptr_t row_recv = *(mdp + receiver_off);
+ if (row_recv == (intptr_t) receiver_klass) {
+ // vc->set_receiver_count(row, vc->receiver_count(row) + DataLayout::counter_increment);
+ int count_off = ReceiverTypeData::receiver_count_cell_index(row);
+ *(mdp + count_off) += DataLayout::counter_increment;
+ return;
+ } else if (row_recv == 0) {
+ // else if (vc->receiver(row) == NULL)
+ empty_row = (int) row;
+ }
+ }
+
+ if (empty_row != -1) {
+ int receiver_off = ReceiverTypeData::receiver_cell_index(empty_row);
+ // vc->set_receiver(empty_row, receiver_klass);
+ *(mdp + receiver_off) = (intptr_t) receiver_klass;
+ // vc->set_receiver_count(empty_row, DataLayout::counter_increment);
+ int count_off = ReceiverTypeData::receiver_count_cell_index(empty_row);
+ *(mdp + count_off) = DataLayout::counter_increment;
+ }
+JRT_END
+
+//-----------------------------------------------------------------------------
+// implicit exception support.
+
+static void report_null_exception_in_code_cache(address exception_pc) {
+ ResourceMark rm;
+ CodeBlob* n = CodeCache::find_blob(exception_pc);
+ if (n != NULL) {
+ tty->print_cr("#");
+ tty->print_cr("# HotSpot Runtime Error, null exception in generated code");
+ tty->print_cr("#");
+ tty->print_cr("# pc where exception happened = " INTPTR_FORMAT, exception_pc);
+
+ if (n->is_nmethod()) {
+ methodOop method = ((nmethod*)n)->method();
+ tty->print_cr("# Method where it happened %s.%s ", Klass::cast(method->method_holder())->name()->as_C_string(), method->name()->as_C_string());
+ tty->print_cr("#");
+ if (ShowMessageBoxOnError && UpdateHotSpotCompilerFileOnError) {
+ const char* title = "HotSpot Runtime Error";
+ const char* question = "Do you want to exclude compilation of this method in future runs?";
+ if (os::message_box(title, question)) {
+ CompilerOracle::append_comment_to_file("");
+ CompilerOracle::append_comment_to_file("Null exception in compiled code resulted in the following exclude");
+ CompilerOracle::append_comment_to_file("");
+ CompilerOracle::append_exclude_to_file(method);
+ tty->print_cr("#");
+ tty->print_cr("# %s has been updated to exclude the specified method", CompileCommandFile);
+ tty->print_cr("#");
+ }
+ }
+ fatal("Implicit null exception happened in compiled method");
+ } else {
+ n->print();
+ fatal("Implicit null exception happened in generated stub");
+ }
+ }
+ fatal("Implicit null exception at wrong place");
+}
+
+
+//-------------------------------------------------------------------------------------
+// register policy
+
+bool OptoRuntime::is_callee_saved_register(MachRegisterNumbers reg) {
+ assert(reg >= 0 && reg < _last_Mach_Reg, "must be a machine register");
+ switch (register_save_policy[reg]) {
+ case 'C': return false; //SOC
+ case 'E': return true ; //SOE
+ case 'N': return false; //NS
+ case 'A': return false; //AS
+ }
+ ShouldNotReachHere();
+ return false;
+}
+
+//-----------------------------------------------------------------------
+// Exceptions
+//
+
+static void trace_exception(oop exception_oop, address exception_pc, const char* msg) PRODUCT_RETURN;
+
+// The method is an entry that is always called by a C++ method not
+// directly from compiled code. Compiled code will call the C++ method following.
+// We can't allow async exception to be installed during exception processing.
+JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* thread, nmethod* &nm))
+
+ // Do not confuse exception_oop with pending_exception. The exception_oop
+ // is only used to pass arguments into the method. Not for general
+ // exception handling. DO NOT CHANGE IT to use pending_exception, since
+ // the runtime stubs checks this on exit.
+ assert(thread->exception_oop() != NULL, "exception oop is found");
+ address handler_address = NULL;
+
+ Handle exception(thread, thread->exception_oop());
+
+ if (TraceExceptions) {
+ trace_exception(exception(), thread->exception_pc(), "");
+ }
+ // for AbortVMOnException flag
+ NOT_PRODUCT(Exceptions::debug_check_abort(exception));
+
+ #ifdef ASSERT
+ if (!(exception->is_a(SystemDictionary::throwable_klass()))) {
+ // should throw an exception here
+ ShouldNotReachHere();
+ }
+ #endif
+
+
+ // new exception handling: this method is entered only from adapters
+ // exceptions from compiled java methods are handled in compiled code
+ // using rethrow node
+
+ address pc = thread->exception_pc();
+ nm = CodeCache::find_nmethod(pc);
+ assert(nm != NULL, "No NMethod found");
+ if (nm->is_native_method()) {
+ fatal("Native mathod should not have path to exception handling");
+ } else {
+ // we are switching to old paradigm: search for exception handler in caller_frame
+ // instead in exception handler of caller_frame.sender()
+
+ if (JvmtiExport::can_post_exceptions()) {
+ // "Full-speed catching" is not necessary here,
+ // since we're notifying the VM on every catch.
+ // Force deoptimization and the rest of the lookup
+ // will be fine.
+ deoptimize_caller_frame(thread, true);
+ }
+
+ // Check the stack guard pages. If enabled, look for handler in this frame;
+ // otherwise, forcibly unwind the frame.
+ //
+ // 4826555: use default current sp for reguard_stack instead of &nm: it's more accurate.
+ bool force_unwind = !thread->reguard_stack();
+ bool deopting = false;
+ if (nm->is_deopt_pc(pc)) {
+ deopting = true;
+ RegisterMap map(thread, false);
+ frame deoptee = thread->last_frame().sender(&map);
+ assert(deoptee.is_deoptimized_frame(), "must be deopted");
+ // Adjust the pc back to the original throwing pc
+ pc = deoptee.pc();
+ }
+
+ // If we are forcing an unwind because of stack overflow then deopt is
+ // irrelevant sice we are throwing the frame away anyway.
+
+ if (deopting && !force_unwind) {
+ handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
+ } else {
+
+ handler_address =
+ force_unwind ? NULL : nm->handler_for_exception_and_pc(exception, pc);
+
+ if (handler_address == NULL) {
+ handler_address = SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true);
+ assert (handler_address != NULL, "must have compiled handler");
+ // Update the exception cache only when the unwind was not forced.
+ if (!force_unwind) {
+ nm->add_handler_for_exception_and_pc(exception,pc,handler_address);
+ }
+ } else {
+ assert(handler_address == SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true), "Must be the same");
+ }
+ }
+
+ thread->set_exception_pc(pc);
+ thread->set_exception_handler_pc(handler_address);
+ thread->set_exception_stack_size(0);
+ }
+
+ // Restore correct return pc. Was saved above.
+ thread->set_exception_oop(exception());
+ return handler_address;
+
+JRT_END
+
+// We are entering here from exception_blob
+// If there is a compiled exception handler in this method, we will continue there;
+// otherwise we will unwind the stack and continue at the caller of top frame method
+// Note we enter without the usual JRT wrapper. We will call a helper routine that
+// will do the normal VM entry. We do it this way so that we can see if the nmethod
+// we looked up the handler for has been deoptimized in the meantime. If it has been
+// we must not use the handler and instread return the deopt blob.
+address OptoRuntime::handle_exception_C(JavaThread* thread) {
+//
+// We are in Java not VM and in debug mode we have a NoHandleMark
+//
+#ifndef PRODUCT
+ SharedRuntime::_find_handler_ctr++; // find exception handler
+#endif
+ debug_only(NoHandleMark __hm;)
+ nmethod* nm = NULL;
+ address handler_address = NULL;
+ {
+ // Enter the VM
+
+ ResetNoHandleMark rnhm;
+ handler_address = handle_exception_C_helper(thread, nm);
+ }
+
+ // Back in java: Use no oops, DON'T safepoint
+
+ // Now check to see if the handler we are returning is in a now
+ // deoptimized frame
+
+ if (nm != NULL) {
+ RegisterMap map(thread, false);
+ frame caller = thread->last_frame().sender(&map);
+#ifdef ASSERT
+ assert(caller.is_compiled_frame(), "must be");
+#endif // ASSERT
+ if (caller.is_deoptimized_frame()) {
+ handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
+ }
+ }
+ return handler_address;
+}
+
+//------------------------------rethrow----------------------------------------
+// We get here after compiled code has executed a 'RethrowNode'. The callee
+// is either throwing or rethrowing an exception. The callee-save registers
+// have been restored, synchronized objects have been unlocked and the callee
+// stack frame has been removed. The return address was passed in.
+// Exception oop is passed as the 1st argument. This routine is then called
+// from the stub. On exit, we know where to jump in the caller's code.
+// After this C code exits, the stub will pop his frame and end in a jump
+// (instead of a return). We enter the caller's default handler.
+//
+// This must be JRT_LEAF:
+// - caller will not change its state as we cannot block on exit,
+// therefore raw_exception_handler_for_return_address is all it takes
+// to handle deoptimized blobs
+//
+// However, there needs to be a safepoint check in the middle! So compiled
+// safepoints are completely watertight.
+//
+// Thus, it cannot be a leaf since it contains the No_GC_Verifier.
+//
+// *THIS IS NOT RECOMMENDED PROGRAMMING STYLE*
+//
+address OptoRuntime::rethrow_C(oopDesc* exception, JavaThread* thread, address ret_pc) {
+#ifndef PRODUCT
+ SharedRuntime::_rethrow_ctr++; // count rethrows
+#endif
+ assert (exception != NULL, "should have thrown a NULLPointerException");
+#ifdef ASSERT
+ if (!(exception->is_a(SystemDictionary::throwable_klass()))) {
+ // should throw an exception here
+ ShouldNotReachHere();
+ }
+#endif
+
+ thread->set_vm_result(exception);
+ // Frame not compiled (handles deoptimization blob)
+ return SharedRuntime::raw_exception_handler_for_return_address(ret_pc);
+}
+
+
+const TypeFunc *OptoRuntime::rethrow_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
+
+void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
+ // Deoptimize frame
+ if (doit) {
+ // Called from within the owner thread, so no need for safepoint
+ RegisterMap reg_map(thread);
+ frame stub_frame = thread->last_frame();
+ assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
+ frame caller_frame = stub_frame.sender(&reg_map);
+
+ VM_DeoptimizeFrame deopt(thread, caller_frame.id());
+ VMThread::execute(&deopt);
+ }
+}
+
+
+const TypeFunc *OptoRuntime::register_finalizer_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // oop; Receiver
+ // // The JavaThread* is passed to each routine as the last argument
+ // fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // JavaThread *; Executing thread
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+ return TypeFunc::make(domain,range);
+}
+
+
+//-----------------------------------------------------------------------------
+// Dtrace support. entry and exit probes have the same signature
+const TypeFunc *OptoRuntime::dtrace_method_entry_exit_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
+ fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL; // methodOop; Method we are entering
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+ return TypeFunc::make(domain,range);
+}
+
+const TypeFunc *OptoRuntime::dtrace_object_alloc_Type() {
+ // create input type (domain)
+ const Type **fields = TypeTuple::fields(2);
+ fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
+ fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL; // oop; newly allocated object
+
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+ // create result type (range)
+ fields = TypeTuple::fields(0);
+
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+ return TypeFunc::make(domain,range);
+}
+
+
+JRT_ENTRY_NO_ASYNC(void, OptoRuntime::register_finalizer(oopDesc* obj, JavaThread* thread))
+ assert(obj->is_oop(), "must be a valid oop");
+ assert(obj->klass()->klass_part()->has_finalizer(), "shouldn't be here otherwise");
+ instanceKlass::register_finalizer(instanceOop(obj), CHECK);
+JRT_END
+
+//-----------------------------------------------------------------------------
+
+NamedCounter * volatile OptoRuntime::_named_counters = NULL;
+
+//
+// dump the collected NamedCounters.
+//
+void OptoRuntime::print_named_counters() {
+ int total_lock_count = 0;
+ int eliminated_lock_count = 0;
+
+ NamedCounter* c = _named_counters;
+ while (c) {
+ if (c->tag() == NamedCounter::LockCounter || c->tag() == NamedCounter::EliminatedLockCounter) {
+ int count = c->count();
+ if (count > 0) {
+ bool eliminated = c->tag() == NamedCounter::EliminatedLockCounter;
+ if (Verbose) {
+ tty->print_cr("%d %s%s", count, c->name(), eliminated ? " (eliminated)" : "");
+ }
+ total_lock_count += count;
+ if (eliminated) {
+ eliminated_lock_count += count;
+ }
+ }
+ } else if (c->tag() == NamedCounter::BiasedLockingCounter) {
+ BiasedLockingCounters* blc = ((BiasedLockingNamedCounter*)c)->counters();
+ if (blc->nonzero()) {
+ tty->print_cr("%s", c->name());
+ blc->print_on(tty);
+ }
+ }
+ c = c->next();
+ }
+ if (total_lock_count > 0) {
+ tty->print_cr("dynamic locks: %d", total_lock_count);
+ if (eliminated_lock_count) {
+ tty->print_cr("eliminated locks: %d (%d%%)", eliminated_lock_count,
+ (int)(eliminated_lock_count * 100.0 / total_lock_count));
+ }
+ }
+}
+
+//
+// Allocate a new NamedCounter. The JVMState is used to generate the
+// name which consists of method@line for the inlining tree.
+//
+
+NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCounter::CounterTag tag) {
+ int max_depth = youngest_jvms->depth();
+
+ // Visit scopes from youngest to oldest.
+ bool first = true;
+ stringStream st;
+ for (int depth = max_depth; depth >= 1; depth--) {
+ JVMState* jvms = youngest_jvms->of_depth(depth);
+ ciMethod* m = jvms->has_method() ? jvms->method() : NULL;
+ if (!first) {
+ st.print(" ");
+ } else {
+ first = false;
+ }
+ int bci = jvms->bci();
+ if (bci < 0) bci = 0;
+ st.print("%s.%s@%d", m->holder()->name()->as_utf8(), m->name()->as_utf8(), bci);
+ // To print linenumbers instead of bci use: m->line_number_from_bci(bci)
+ }
+ NamedCounter* c;
+ if (tag == NamedCounter::BiasedLockingCounter) {
+ c = new BiasedLockingNamedCounter(strdup(st.as_string()));
+ } else {
+ c = new NamedCounter(strdup(st.as_string()), tag);
+ }
+
+ // atomically add the new counter to the head of the list. We only
+ // add counters so this is safe.
+ NamedCounter* head;
+ do {
+ head = _named_counters;
+ c->set_next(head);
+ } while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head);
+ return c;
+}
+
+//-----------------------------------------------------------------------------
+// Non-product code
+#ifndef PRODUCT
+
+int trace_exception_counter = 0;
+static void trace_exception(oop exception_oop, address exception_pc, const char* msg) {
+ ttyLocker ttyl;
+ trace_exception_counter++;
+ tty->print("%d [Exception (%s): ", trace_exception_counter, msg);
+ exception_oop->print_value();
+ tty->print(" in ");
+ CodeBlob* blob = CodeCache::find_blob(exception_pc);
+ if (blob->is_nmethod()) {
+ ((nmethod*)blob)->method()->print_value();
+ } else if (blob->is_runtime_stub()) {
+ tty->print("<runtime-stub>");
+ } else {
+ tty->print("<unknown>");
+ }
+ tty->print(" at " INTPTR_FORMAT, exception_pc);
+ tty->print_cr("]");
+}
+
+#endif // PRODUCT
+
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+// Called from call sites in compiled code with oop maps (actually safepoints)
+// Zaps dead locals in first java frame.
+// Is entry because may need to lock to generate oop maps
+// Currently, only used for compiler frames, but someday may be used
+// for interpreter frames, too.
+
+int OptoRuntime::ZapDeadCompiledLocals_count = 0;
+
+// avoid pointers to member funcs with these helpers
+static bool is_java_frame( frame* f) { return f->is_java_frame(); }
+static bool is_native_frame(frame* f) { return f->is_native_frame(); }
+
+
+void OptoRuntime::zap_dead_java_or_native_locals(JavaThread* thread,
+ bool (*is_this_the_right_frame_to_zap)(frame*)) {
+ assert(JavaThread::current() == thread, "is this needed?");
+
+ if ( !ZapDeadCompiledLocals ) return;
+
+ bool skip = false;
+
+ if ( ZapDeadCompiledLocalsFirst == 0 ) ; // nothing special
+ else if ( ZapDeadCompiledLocalsFirst > ZapDeadCompiledLocals_count ) skip = true;
+ else if ( ZapDeadCompiledLocalsFirst == ZapDeadCompiledLocals_count )
+ warning("starting zapping after skipping");
+
+ if ( ZapDeadCompiledLocalsLast == -1 ) ; // nothing special
+ else if ( ZapDeadCompiledLocalsLast < ZapDeadCompiledLocals_count ) skip = true;
+ else if ( ZapDeadCompiledLocalsLast == ZapDeadCompiledLocals_count )
+ warning("about to zap last zap");
+
+ ++ZapDeadCompiledLocals_count; // counts skipped zaps, too
+
+ if ( skip ) return;
+
+ // find java frame and zap it
+
+ for (StackFrameStream sfs(thread); !sfs.is_done(); sfs.next()) {
+ if (is_this_the_right_frame_to_zap(sfs.current()) ) {
+ sfs.current()->zap_dead_locals(thread, sfs.register_map());
+ return;
+ }
+ }
+ warning("no frame found to zap in zap_dead_Java_locals_C");
+}
+
+JRT_LEAF(void, OptoRuntime::zap_dead_Java_locals_C(JavaThread* thread))
+ zap_dead_java_or_native_locals(thread, is_java_frame);
+JRT_END
+
+// The following does not work because for one thing, the
+// thread state is wrong; it expects java, but it is native.
+// Also, the invarients in a native stub are different and
+// I'm not sure it is safe to have a MachCalRuntimeDirectNode
+// in there.
+// So for now, we do not zap in native stubs.
+
+JRT_LEAF(void, OptoRuntime::zap_dead_native_locals_C(JavaThread* thread))
+ zap_dead_java_or_native_locals(thread, is_native_frame);
+JRT_END
+
+# endif
diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp
new file mode 100644
index 000000000..50f117120
--- /dev/null
+++ b/src/share/vm/opto/runtime.hpp
@@ -0,0 +1,289 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------OptoRuntime------------------------------------
+// Opto compiler runtime routines
+//
+// These are all generated from Ideal graphs. They are called with the
+// Java calling convention. Internally they call C++. They are made once at
+// startup time and Opto compiles calls to them later.
+// Things are broken up into quads: the signature they will be called with,
+// the address of the generated code, the corresponding C++ code and an
+// nmethod.
+
+// The signature (returned by "xxx_Type()") is used at startup time by the
+// Generator to make the generated code "xxx_Java". Opto compiles calls
+// to the generated code "xxx_Java". When the compiled code gets executed,
+// it calls the C++ code "xxx_C". The generated nmethod is saved in the
+// CodeCache. Exception handlers use the nmethod to get the callee-save
+// register OopMaps.
+class CallInfo;
+
+//
+// NamedCounters are tagged counters which can be used for profiling
+// code in various ways. Currently they are used by the lock coarsening code
+//
+
+class NamedCounter : public CHeapObj {
+public:
+ enum CounterTag {
+ NoTag,
+ LockCounter,
+ EliminatedLockCounter,
+ BiasedLockingCounter
+ };
+
+private:
+ const char * _name;
+ int _count;
+ CounterTag _tag;
+ NamedCounter* _next;
+
+ public:
+ NamedCounter(const char *n, CounterTag tag = NoTag):
+ _name(n),
+ _count(0),
+ _next(NULL),
+ _tag(tag) {}
+
+ const char * name() const { return _name; }
+ int count() const { return _count; }
+ address addr() { return (address)&_count; }
+ CounterTag tag() const { return _tag; }
+ void set_tag(CounterTag tag) { _tag = tag; }
+
+ NamedCounter* next() const { return _next; }
+ void set_next(NamedCounter* next) {
+ assert(_next == NULL, "already set");
+ _next = next;
+ }
+
+};
+
+class BiasedLockingNamedCounter : public NamedCounter {
+ private:
+ BiasedLockingCounters _counters;
+
+ public:
+ BiasedLockingNamedCounter(const char *n) :
+ NamedCounter(n, BiasedLockingCounter), _counters() {}
+
+ BiasedLockingCounters* counters() { return &_counters; }
+};
+
+typedef const TypeFunc*(*TypeFunc_generator)();
+
+class OptoRuntime : public AllStatic {
+ friend class Matcher; // allow access to stub names
+
+ private:
+ // define stubs
+ static address generate_stub(ciEnv* ci_env, TypeFunc_generator gen, address C_function, const char *name, int is_fancy_jump, bool pass_tls, bool save_arguments, bool return_pc);
+
+ // References to generated stubs
+ static address _new_instance_Java;
+ static address _new_array_Java;
+ static address _multianewarray2_Java;
+ static address _multianewarray3_Java;
+ static address _multianewarray4_Java;
+ static address _multianewarray5_Java;
+ static address _vtable_must_compile_Java;
+ static address _complete_monitor_locking_Java;
+ static address _rethrow_Java;
+
+ static address _slow_arraycopy_Java;
+ static address _register_finalizer_Java;
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ static address _zap_dead_Java_locals_Java;
+ static address _zap_dead_native_locals_Java;
+# endif
+
+
+ //
+ // Implementation of runtime methods
+ // =================================
+
+ // Allocate storage for a Java instance.
+ static void new_instance_C(klassOopDesc* instance_klass, JavaThread *thread);
+
+ // Allocate storage for a objArray or typeArray
+ static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
+
+ // Post-allocation step for implementing ReduceInitialCardMarks:
+ static void do_eager_card_mark(JavaThread* thread);
+
+ // Allocate storage for a multi-dimensional arrays
+ // Note: needs to be fixed for arbitrary number of dimensions
+ static void multianewarray2_C(klassOopDesc* klass, int len1, int len2, JavaThread *thread);
+ static void multianewarray3_C(klassOopDesc* klass, int len1, int len2, int len3, JavaThread *thread);
+ static void multianewarray4_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
+ static void multianewarray5_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
+
+public:
+ // Slow-path Locking and Unlocking
+ static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
+ static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock);
+
+private:
+
+ // Implicit exception support
+ static void throw_null_exception_C(JavaThread* thread);
+
+ // Exception handling
+ static address handle_exception_C (JavaThread* thread);
+ static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
+ static address rethrow_C (oopDesc* exception, JavaThread *thread, address return_pc );
+ static void deoptimize_caller_frame (JavaThread *thread, bool doit);
+
+ // CodeBlob support
+ // ===================================================================
+
+ static ExceptionBlob* _exception_blob;
+ static void generate_exception_blob();
+
+ static void register_finalizer(oopDesc* obj, JavaThread* thread);
+
+ // zaping dead locals, either from Java frames or from native frames
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ static void zap_dead_Java_locals_C( JavaThread* thread);
+ static void zap_dead_native_locals_C( JavaThread* thread);
+
+ static void zap_dead_java_or_native_locals( JavaThread*, bool (*)(frame*));
+
+ public:
+ static int ZapDeadCompiledLocals_count;
+
+# endif
+
+
+ public:
+
+ static bool is_callee_saved_register(MachRegisterNumbers reg);
+
+ // One time only generate runtime code stubs
+ static void generate(ciEnv* env);
+
+ // Returns the name of a stub
+ static const char* stub_name(address entry);
+
+ // access to runtime stubs entry points for java code
+ static address new_instance_Java() { return _new_instance_Java; }
+ static address new_array_Java() { return _new_array_Java; }
+ static address multianewarray2_Java() { return _multianewarray2_Java; }
+ static address multianewarray3_Java() { return _multianewarray3_Java; }
+ static address multianewarray4_Java() { return _multianewarray4_Java; }
+ static address multianewarray5_Java() { return _multianewarray5_Java; }
+ static address vtable_must_compile_stub() { return _vtable_must_compile_Java; }
+ static address complete_monitor_locking_Java() { return _complete_monitor_locking_Java; }
+
+ static address slow_arraycopy_Java() { return _slow_arraycopy_Java; }
+ static address register_finalizer_Java() { return _register_finalizer_Java; }
+
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ static address zap_dead_locals_stub(bool is_native) { return is_native
+ ? _zap_dead_native_locals_Java
+ : _zap_dead_Java_locals_Java; }
+ static MachNode* node_to_call_zap_dead_locals(Node* n, int block_num, bool is_native);
+# endif
+
+ static ExceptionBlob* exception_blob() { return _exception_blob; }
+
+ // Leaf routines helping with method data update
+ static void profile_receiver_type_C(DataLayout* data, oopDesc* receiver);
+
+ // Implicit exception support
+ static void throw_div0_exception_C (JavaThread* thread);
+ static void throw_stack_overflow_error_C(JavaThread* thread);
+
+ // Exception handling
+ static address rethrow_stub() { return _rethrow_Java; }
+
+
+ // Type functions
+ // ======================================================
+
+ static const TypeFunc* new_instance_Type(); // object allocation (slow case)
+ static const TypeFunc* new_array_Type (); // [a]newarray (slow case)
+ static const TypeFunc* multianewarray_Type(int ndim); // multianewarray
+ static const TypeFunc* multianewarray2_Type(); // multianewarray
+ static const TypeFunc* multianewarray3_Type(); // multianewarray
+ static const TypeFunc* multianewarray4_Type(); // multianewarray
+ static const TypeFunc* multianewarray5_Type(); // multianewarray
+ static const TypeFunc* complete_monitor_enter_Type();
+ static const TypeFunc* complete_monitor_exit_Type();
+ static const TypeFunc* uncommon_trap_Type();
+ static const TypeFunc* athrow_Type();
+ static const TypeFunc* rethrow_Type();
+ static const TypeFunc* Math_D_D_Type(); // sin,cos & friends
+ static const TypeFunc* Math_DD_D_Type(); // mod,pow & friends
+ static const TypeFunc* modf_Type();
+ static const TypeFunc* l2f_Type();
+ static const TypeFunc* current_time_millis_Type();
+
+ static const TypeFunc* flush_windows_Type();
+
+ // arraycopy routine types
+ static const TypeFunc* fast_arraycopy_Type(); // bit-blasters
+ static const TypeFunc* checkcast_arraycopy_Type();
+ static const TypeFunc* generic_arraycopy_Type();
+ static const TypeFunc* slow_arraycopy_Type(); // the full routine
+
+ // leaf on stack replacement interpreter accessor types
+ static const TypeFunc* osr_end_Type();
+
+ // leaf methodData routine types
+ static const TypeFunc* profile_receiver_type_Type();
+
+ // leaf on stack replacement interpreter accessor types
+ static const TypeFunc* fetch_int_Type();
+ static const TypeFunc* fetch_long_Type();
+ static const TypeFunc* fetch_float_Type();
+ static const TypeFunc* fetch_double_Type();
+ static const TypeFunc* fetch_oop_Type();
+ static const TypeFunc* fetch_monitor_Type();
+
+ static const TypeFunc* register_finalizer_Type();
+
+ // Dtrace support
+ static const TypeFunc* dtrace_method_entry_exit_Type();
+ static const TypeFunc* dtrace_object_alloc_Type();
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+ static const TypeFunc* zap_dead_locals_Type();
+# endif
+
+ private:
+ static NamedCounter * volatile _named_counters;
+
+ public:
+ // helper function which creates a named counter labeled with the
+ // if they are available
+ static NamedCounter* new_named_counter(JVMState* jvms, NamedCounter::CounterTag tag);
+
+ // dumps all the named counters
+ static void print_named_counters();
+
+};
diff --git a/src/share/vm/opto/split_if.cpp b/src/share/vm/opto/split_if.cpp
new file mode 100644
index 000000000..130b26675
--- /dev/null
+++ b/src/share/vm/opto/split_if.cpp
@@ -0,0 +1,536 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_split_if.cpp.incl"
+
+
+//------------------------------split_thru_region------------------------------
+// Split Node 'n' through merge point.
+Node *PhaseIdealLoop::split_thru_region( Node *n, Node *region ) {
+ uint wins = 0;
+ assert( n->is_CFG(), "" );
+ assert( region->is_Region(), "" );
+ Node *r = new (C, region->req()) RegionNode( region->req() );
+ IdealLoopTree *loop = get_loop( n );
+ for( uint i = 1; i < region->req(); i++ ) {
+ Node *x = n->clone();
+ Node *in0 = n->in(0);
+ if( in0->in(0) == region ) x->set_req( 0, in0->in(i) );
+ for( uint j = 1; j < n->req(); j++ ) {
+ Node *in = n->in(j);
+ if( get_ctrl(in) == region )
+ x->set_req( j, in->in(i) );
+ }
+ _igvn.register_new_node_with_optimizer(x);
+ set_loop(x, loop);
+ set_idom(x, x->in(0), dom_depth(x->in(0))+1);
+ r->init_req(i, x);
+ }
+
+ // Record region
+ r->set_req(0,region); // Not a TRUE RegionNode
+ _igvn.register_new_node_with_optimizer(r);
+ set_loop(r, loop);
+ if( !loop->_child )
+ loop->_body.push(r);
+ return r;
+}
+
+//------------------------------split_up---------------------------------------
+// Split block-local op up through the phis to empty the current block
+bool PhaseIdealLoop::split_up( Node *n, Node *blk1, Node *blk2 ) {
+ if( n->is_CFG() ) {
+ assert( n->in(0) != blk1, "Lousy candidate for split-if" );
+ return false;
+ }
+ if( get_ctrl(n) != blk1 && get_ctrl(n) != blk2 )
+ return false; // Not block local
+ if( n->is_Phi() ) return false; // Local PHIs are expected
+
+ // Recursively split-up inputs
+ for (uint i = 1; i < n->req(); i++) {
+ if( split_up( n->in(i), blk1, blk2 ) ) {
+ // Got split recursively and self went dead?
+ if (n->outcnt() == 0)
+ _igvn.remove_dead_node(n);
+ return true;
+ }
+ }
+
+ // Check for needing to clone-up a compare. Can't do that, it forces
+ // another (nested) split-if transform. Instead, clone it "down".
+ if( n->is_Cmp() ) {
+ assert(get_ctrl(n) == blk2 || get_ctrl(n) == blk1, "must be in block with IF");
+ // Check for simple Cmp/Bool/CMove which we can clone-up. Cmp/Bool/CMove
+ // sequence can have no other users and it must all reside in the split-if
+ // block. Non-simple Cmp/Bool/CMove sequences are 'cloned-down' below -
+ // private, per-use versions of the Cmp and Bool are made. These sink to
+ // the CMove block. If the CMove is in the split-if block, then in the
+ // next iteration this will become a simple Cmp/Bool/CMove set to clone-up.
+ Node *bol, *cmov;
+ if( !(n->outcnt() == 1 && n->unique_out()->is_Bool() &&
+ (bol = n->unique_out()->as_Bool()) &&
+ (get_ctrl(bol) == blk1 ||
+ get_ctrl(bol) == blk2) &&
+ bol->outcnt() == 1 &&
+ bol->unique_out()->is_CMove() &&
+ (cmov = bol->unique_out()->as_CMove()) &&
+ (get_ctrl(cmov) == blk1 ||
+ get_ctrl(cmov) == blk2) ) ) {
+
+ // Must clone down
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) {
+ tty->print("Cloning down: ");
+ n->dump();
+ }
+#endif
+ // Clone down any block-local BoolNode uses of this CmpNode
+ for (DUIterator i = n->outs(); n->has_out(i); i++) {
+ Node* bol = n->out(i);
+ assert( bol->is_Bool(), "" );
+ if (bol->outcnt() == 1) {
+ Node* use = bol->unique_out();
+ Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use);
+ if (use_c == blk1 || use_c == blk2) {
+ continue;
+ }
+ }
+ if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) {
+ // Recursively sink any BoolNode
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) {
+ tty->print("Cloning down: ");
+ bol->dump();
+ }
+#endif
+ for (DUIterator_Last jmin, j = bol->last_outs(jmin); j >= jmin; --j) {
+ // Uses are either IfNodes or CMoves
+ Node* iff = bol->last_out(j);
+ assert( iff->in(1) == bol, "" );
+ // Get control block of either the CMove or the If input
+ Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
+ Node *x = bol->clone();
+ register_new_node(x, iff_ctrl);
+ _igvn.hash_delete(iff);
+ iff->set_req(1, x);
+ _igvn._worklist.push(iff);
+ }
+ _igvn.remove_dead_node( bol );
+ --i;
+ }
+ }
+ // Clone down this CmpNode
+ for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) {
+ Node* bol = n->last_out(j);
+ assert( bol->in(1) == n, "" );
+ Node *x = n->clone();
+ register_new_node(x, get_ctrl(bol));
+ _igvn.hash_delete(bol);
+ bol->set_req(1, x);
+ _igvn._worklist.push(bol);
+ }
+ _igvn.remove_dead_node( n );
+
+ return true;
+ }
+ }
+
+ // See if splitting-up a Store. Any anti-dep loads must go up as
+ // well. An anti-dep load might be in the wrong block, because in
+ // this particular layout/schedule we ignored anti-deps and allow
+ // memory to be alive twice. This only works if we do the same
+ // operations on anti-dep loads as we do their killing stores.
+ if( n->is_Store() && n->in(MemNode::Memory)->in(0) == n->in(0) ) {
+ // Get store's memory slice
+ int alias_idx = C->get_alias_index(_igvn.type(n->in(MemNode::Address))->is_ptr());
+
+ // Get memory-phi anti-dep loads will be using
+ Node *memphi = n->in(MemNode::Memory);
+ assert( memphi->is_Phi(), "" );
+ // Hoist any anti-dep load to the splitting block;
+ // it will then "split-up".
+ for (DUIterator_Fast imax,i = memphi->fast_outs(imax); i < imax; i++) {
+ Node *load = memphi->fast_out(i);
+ if( load->is_Load() && alias_idx == C->get_alias_index(_igvn.type(load->in(MemNode::Address))->is_ptr()) )
+ set_ctrl(load,blk1);
+ }
+ }
+
+ // Found some other Node; must clone it up
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations ) {
+ tty->print("Cloning up: ");
+ n->dump();
+ }
+#endif
+
+ // Now actually split-up this guy. One copy per control path merging.
+ Node *phi = PhiNode::make_blank(blk1, n);
+ for( uint j = 1; j < blk1->req(); j++ ) {
+ Node *x = n->clone();
+ if( n->in(0) && n->in(0) == blk1 )
+ x->set_req( 0, blk1->in(j) );
+ for( uint i = 1; i < n->req(); i++ ) {
+ Node *m = n->in(i);
+ if( get_ctrl(m) == blk1 ) {
+ assert( m->in(0) == blk1, "" );
+ x->set_req( i, m->in(j) );
+ }
+ }
+ register_new_node( x, blk1->in(j) );
+ phi->init_req( j, x );
+ }
+ // Announce phi to optimizer
+ register_new_node(phi, blk1);
+
+ // Remove cloned-up value from optimizer; use phi instead
+ _igvn.hash_delete(n);
+ _igvn.subsume_node( n, phi );
+
+ // (There used to be a self-recursive call to split_up() here,
+ // but it is not needed. All necessary forward walking is done
+ // by do_split_if() below.)
+
+ return true;
+}
+
+//------------------------------register_new_node------------------------------
+void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) {
+ _igvn.register_new_node_with_optimizer(n);
+ set_ctrl(n, blk);
+ IdealLoopTree *loop = get_loop(blk);
+ if( !loop->_child )
+ loop->_body.push(n);
+}
+
+//------------------------------small_cache------------------------------------
+struct small_cache : public Dict {
+
+ small_cache() : Dict( cmpkey, hashptr ) {}
+ Node *probe( Node *use_blk ) { return (Node*)((*this)[use_blk]); }
+ void lru_insert( Node *use_blk, Node *new_def ) { Insert(use_blk,new_def); }
+};
+
+//------------------------------spinup-----------------------------------------
+// "Spin up" the dominator tree, starting at the use site and stopping when we
+// find the post-dominating point.
+
+// We must be at the merge point which post-dominates 'new_false' and
+// 'new_true'. Figure out which edges into the RegionNode eventually lead up
+// to false and which to true. Put in a PhiNode to merge values; plug in
+// the appropriate false-arm or true-arm values. If some path leads to the
+// original IF, then insert a Phi recursively.
+Node *PhaseIdealLoop::spinup( Node *iff_dom, Node *new_false, Node *new_true, Node *use_blk, Node *def, small_cache *cache ) {
+ if (use_blk->is_top()) // Handle dead uses
+ return use_blk;
+ Node *prior_n = (Node*)0xdeadbeef;
+ Node *n = use_blk; // Get path input
+ assert( use_blk != iff_dom, "" );
+ // Here's the "spinup" the dominator tree loop. Do a cache-check
+ // along the way, in case we've come this way before.
+ while( n != iff_dom ) { // Found post-dominating point?
+ prior_n = n;
+ n = idom(n); // Search higher
+ Node *s = cache->probe( prior_n ); // Check cache
+ if( s ) return s; // Cache hit!
+ }
+
+ Node *phi_post;
+ if( prior_n == new_false || prior_n == new_true ) {
+ phi_post = def->clone();
+ phi_post->set_req(0, prior_n );
+ register_new_node(phi_post, prior_n);
+ } else {
+ // This method handles both control uses (looking for Regions) or data
+ // uses (looking for Phis). If looking for a control use, then we need
+ // to insert a Region instead of a Phi; however Regions always exist
+ // previously (the hash_find_insert below would always hit) so we can
+ // return the existing Region.
+ if( def->is_CFG() ) {
+ phi_post = prior_n; // If looking for CFG, return prior
+ } else {
+ assert( def->is_Phi(), "" );
+ assert( prior_n->is_Region(), "must be a post-dominating merge point" );
+
+ // Need a Phi here
+ phi_post = PhiNode::make_blank(prior_n, def);
+ // Search for both true and false on all paths till find one.
+ for( uint i = 1; i < phi_post->req(); i++ ) // For all paths
+ phi_post->init_req( i, spinup( iff_dom, new_false, new_true, prior_n->in(i), def, cache ) );
+ Node *t = _igvn.hash_find_insert(phi_post);
+ if( t ) { // See if we already have this one
+ // phi_post will not be used, so kill it
+ _igvn.remove_dead_node(phi_post);
+ phi_post->destruct();
+ phi_post = t;
+ } else {
+ register_new_node( phi_post, prior_n );
+ }
+ }
+ }
+
+ // Update cache everywhere
+ prior_n = (Node*)0xdeadbeef; // Reset IDOM walk
+ n = use_blk; // Get path input
+ // Spin-up the idom tree again, basically doing path-compression.
+ // Insert cache entries along the way, so that if we ever hit this
+ // point in the IDOM tree again we'll stop immediately on a cache hit.
+ while( n != iff_dom ) { // Found post-dominating point?
+ prior_n = n;
+ n = idom(n); // Search higher
+ cache->lru_insert( prior_n, phi_post ); // Fill cache
+ } // End of while not gone high enough
+
+ return phi_post;
+}
+
+//------------------------------find_use_block---------------------------------
+// Find the block a USE is in. Normally USE's are in the same block as the
+// using instruction. For Phi-USE's, the USE is in the predecessor block
+// along the corresponding path.
+Node *PhaseIdealLoop::find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ) {
+ // CFG uses are their own block
+ if( use->is_CFG() )
+ return use;
+
+ if( use->is_Phi() ) { // Phi uses in prior block
+ // Grab the first Phi use; there may be many.
+ // Each will be handled as a seperate iteration of
+ // the "while( phi->outcnt() )" loop.
+ uint j;
+ for( j = 1; j < use->req(); j++ )
+ if( use->in(j) == def )
+ break;
+ assert( j < use->req(), "def should be among use's inputs" );
+ return use->in(0)->in(j);
+ }
+ // Normal (non-phi) use
+ Node *use_blk = get_ctrl(use);
+ // Some uses are directly attached to the old (and going away)
+ // false and true branches.
+ if( use_blk == old_false ) {
+ use_blk = new_false;
+ set_ctrl(use, new_false);
+ }
+ if( use_blk == old_true ) {
+ use_blk = new_true;
+ set_ctrl(use, new_true);
+ }
+
+ if (use_blk == NULL) { // He's dead, Jim
+ _igvn.hash_delete(use);
+ _igvn.subsume_node(use, C->top());
+ }
+
+ return use_blk;
+}
+
+//------------------------------handle_use-------------------------------------
+// Handle uses of the merge point. Basically, split-if makes the merge point
+// go away so all uses of the merge point must go away as well. Most block
+// local uses have already been split-up, through the merge point. Uses from
+// far below the merge point can't always be split up (e.g., phi-uses are
+// pinned) and it makes too much stuff live. Instead we use a path-based
+// solution to move uses down.
+//
+// If the use is along the pre-split-CFG true branch, then the new use will
+// be from the post-split-CFG true merge point. Vice-versa for the false
+// path. Some uses will be along both paths; then we sink the use to the
+// post-dominating location; we may need to insert a Phi there.
+void PhaseIdealLoop::handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ) {
+
+ Node *use_blk = find_use_block(use,def,old_false,new_false,old_true,new_true);
+ if( !use_blk ) return; // He's dead, Jim
+
+ // Walk up the dominator tree until I hit either the old IfFalse, the old
+ // IfTrue or the old If. Insert Phis where needed.
+ Node *new_def = spinup( region_dom, new_false, new_true, use_blk, def, cache );
+
+ // Found where this USE goes. Re-point him.
+ uint i;
+ for( i = 0; i < use->req(); i++ )
+ if( use->in(i) == def )
+ break;
+ assert( i < use->req(), "def should be among use's inputs" );
+ _igvn.hash_delete(use);
+ use->set_req(i, new_def);
+ _igvn._worklist.push(use);
+}
+
+//------------------------------do_split_if------------------------------------
+// Found an If getting its condition-code input from a Phi in the same block.
+// Split thru the Region.
+void PhaseIdealLoop::do_split_if( Node *iff ) {
+#ifndef PRODUCT
+ if( PrintOpto && VerifyLoopOptimizations )
+ tty->print_cr("Split-if");
+#endif
+ C->set_major_progress();
+ Node *region = iff->in(0);
+ Node *region_dom = idom(region);
+
+ // We are going to clone this test (and the control flow with it) up through
+ // the incoming merge point. We need to empty the current basic block.
+ // Clone any instructions which must be in this block up through the merge
+ // point.
+ DUIterator i, j;
+ bool progress = true;
+ while (progress) {
+ progress = false;
+ for (i = region->outs(); region->has_out(i); i++) {
+ Node* n = region->out(i);
+ if( n == region ) continue;
+ // The IF to be split is OK.
+ if( n == iff ) continue;
+ if( !n->is_Phi() ) { // Found pinned memory op or such
+ if (split_up(n, region, iff)) {
+ i = region->refresh_out_pos(i);
+ progress = true;
+ }
+ continue;
+ }
+ assert( n->in(0) == region, "" );
+
+ // Recursively split up all users of a Phi
+ for (j = n->outs(); n->has_out(j); j++) {
+ Node* m = n->out(j);
+ // If m is dead, throw it away, and declare progress
+ if (_nodes[m->_idx] == NULL) {
+ _igvn.remove_dead_node(m);
+ // fall through
+ }
+ else if (m != iff && split_up(m, region, iff)) {
+ // fall through
+ } else {
+ continue;
+ }
+ // Something unpredictable changed.
+ // Tell the iterators to refresh themselves, and rerun the loop.
+ i = region->refresh_out_pos(i);
+ j = region->refresh_out_pos(j);
+ progress = true;
+ }
+ }
+ }
+
+ // Now we have no instructions in the block containing the IF.
+ // Split the IF.
+ Node *new_iff = split_thru_region( iff, region );
+
+ // Replace both uses of 'new_iff' with Regions merging True/False
+ // paths. This makes 'new_iff' go dead.
+ Node *old_false, *old_true;
+ Node *new_false, *new_true;
+ for (DUIterator_Last j2min, j2 = iff->last_outs(j2min); j2 >= j2min; --j2) {
+ Node *ifp = iff->last_out(j2);
+ assert( ifp->Opcode() == Op_IfFalse || ifp->Opcode() == Op_IfTrue, "" );
+ ifp->set_req(0, new_iff);
+ Node *ifpx = split_thru_region( ifp, region );
+
+ // Replace 'If' projection of a Region with a Region of
+ // 'If' projections.
+ ifpx->set_req(0, ifpx); // A TRUE RegionNode
+
+ // Setup dominator info
+ set_idom(ifpx, region_dom, dom_depth(region_dom) + 1);
+
+ // Check for splitting loop tails
+ if( get_loop(iff)->tail() == ifp )
+ get_loop(iff)->_tail = ifpx;
+
+ // Replace in the graph with lazy-update mechanism
+ new_iff->set_req(0, new_iff); // hook self so it does not go dead
+ lazy_replace_proj( ifp, ifpx );
+ new_iff->set_req(0, region);
+
+ // Record bits for later xforms
+ if( ifp->Opcode() == Op_IfFalse ) {
+ old_false = ifp;
+ new_false = ifpx;
+ } else {
+ old_true = ifp;
+ new_true = ifpx;
+ }
+ }
+ _igvn.remove_dead_node(new_iff);
+ // Lazy replace IDOM info with the region's dominator
+ lazy_replace( iff, region_dom );
+
+ // Now make the original merge point go dead, by handling all its uses.
+ small_cache region_cache;
+ // Preload some control flow in region-cache
+ region_cache.lru_insert( new_false, new_false );
+ region_cache.lru_insert( new_true , new_true );
+ // Now handle all uses of the splitting block
+ for (DUIterator_Last kmin, k = region->last_outs(kmin); k >= kmin; --k) {
+ Node* phi = region->last_out(k);
+ if( !phi->in(0) ) { // Dead phi? Remove it
+ _igvn.remove_dead_node(phi);
+ continue;
+ }
+ assert( phi->in(0) == region, "" );
+ if( phi == region ) { // Found the self-reference
+ phi->set_req(0, NULL);
+ continue; // Break the self-cycle
+ }
+ // Expected common case: Phi hanging off of Region
+ if( phi->is_Phi() ) {
+ // Need a per-def cache. Phi represents a def, so make a cache
+ small_cache phi_cache;
+
+ // Inspect all Phi uses to make the Phi go dead
+ for (DUIterator_Last lmin, l = phi->last_outs(lmin); l >= lmin; --l) {
+ Node* use = phi->last_out(l);
+ // Compute the new DEF for this USE. New DEF depends on the path
+ // taken from the original DEF to the USE. The new DEF may be some
+ // collection of PHI's merging values from different paths. The Phis
+ // inserted depend only on the location of the USE. We use a
+ // 2-element cache to handle multiple uses from the same block.
+ handle_use( use, phi, &phi_cache, region_dom, new_false, new_true, old_false, old_true );
+ } // End of while phi has uses
+
+ // Because handle_use might relocate region->_out,
+ // we must refresh the iterator.
+ k = region->last_outs(kmin);
+
+ // Remove the dead Phi
+ _igvn.remove_dead_node( phi );
+
+ } else {
+ // Random memory op guarded by Region. Compute new DEF for USE.
+ handle_use( phi, region, &region_cache, region_dom, new_false, new_true, old_false, old_true );
+ }
+
+ } // End of while merge point has phis
+
+ // Any leftover bits in the splitting block must not have depended on local
+ // Phi inputs (these have already been split-up). Hence it's safe to hoist
+ // these guys to the dominating point.
+ lazy_replace( region, region_dom );
+#ifndef PRODUCT
+ if( VerifyLoopOptimizations ) verify();
+#endif
+}
diff --git a/src/share/vm/opto/subnode.cpp b/src/share/vm/opto/subnode.cpp
new file mode 100644
index 000000000..1344197ca
--- /dev/null
+++ b/src/share/vm/opto/subnode.cpp
@@ -0,0 +1,1206 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_subnode.cpp.incl"
+#include "math.h"
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If right input is a constant 0, return the left input.
+Node *SubNode::Identity( PhaseTransform *phase ) {
+ assert(in(1) != this, "Must already have called Value");
+ assert(in(2) != this, "Must already have called Value");
+
+ // Remove double negation
+ const Type *zero = add_id();
+ if( phase->type( in(1) )->higher_equal( zero ) &&
+ in(2)->Opcode() == Opcode() &&
+ phase->type( in(2)->in(1) )->higher_equal( zero ) ) {
+ return in(2)->in(2);
+ }
+
+ // Convert "(X+Y) - Y" into X
+ if( in(1)->Opcode() == Op_AddI ) {
+ if( phase->eqv(in(1)->in(2),in(2)) )
+ return in(1)->in(1);
+ // Also catch: "(X + Opaque2(Y)) - Y". In this case, 'Y' is a loop-varying
+ // trip counter and X is likely to be loop-invariant (that's how O2 Nodes
+ // are originally used, although the optimizer sometimes jiggers things).
+ // This folding through an O2 removes a loop-exit use of a loop-varying
+ // value and generally lowers register pressure in and around the loop.
+ if( in(1)->in(2)->Opcode() == Op_Opaque2 &&
+ phase->eqv(in(1)->in(2)->in(1),in(2)) )
+ return in(1)->in(1);
+ }
+
+ return ( phase->type( in(2) )->higher_equal( zero ) ) ? in(1) : this;
+}
+
+//------------------------------Value------------------------------------------
+// A subtract node differences it's two inputs.
+const Type *SubNode::Value( PhaseTransform *phase ) const {
+ const Node* in1 = in(1);
+ const Node* in2 = in(2);
+ // Either input is TOP ==> the result is TOP
+ const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+ if( t1 == Type::TOP ) return Type::TOP;
+ const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Not correct for SubFnode and AddFNode (must check for infinity)
+ // Equal? Subtract is zero
+ if (phase->eqv_uncast(in1, in2)) return add_id();
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
+ return bottom_type();
+
+ return sub(t1,t2); // Local flavor of type subtraction
+
+}
+
+//=============================================================================
+
+//------------------------------Helper function--------------------------------
+static bool ok_to_convert(Node* inc, Node* iv) {
+ // Do not collapse (x+c0)-y if "+" is a loop increment, because the
+ // "-" is loop invariant and collapsing extends the live-range of "x"
+ // to overlap with the "+", forcing another register to be used in
+ // the loop.
+ // This test will be clearer with '&&' (apply DeMorgan's rule)
+ // but I like the early cutouts that happen here.
+ const PhiNode *phi;
+ if( ( !inc->in(1)->is_Phi() ||
+ !(phi=inc->in(1)->as_Phi()) ||
+ phi->is_copy() ||
+ !phi->region()->is_CountedLoop() ||
+ inc != phi->region()->as_CountedLoop()->incr() )
+ &&
+ // Do not collapse (x+c0)-iv if "iv" is a loop induction variable,
+ // because "x" maybe invariant.
+ ( !iv->is_loop_iv() )
+ ) {
+ return true;
+ } else {
+ return false;
+ }
+}
+//------------------------------Ideal------------------------------------------
+Node *SubINode::Ideal(PhaseGVN *phase, bool can_reshape){
+ Node *in1 = in(1);
+ Node *in2 = in(2);
+ uint op1 = in1->Opcode();
+ uint op2 = in2->Opcode();
+
+#ifdef ASSERT
+ // Check for dead loop
+ if( phase->eqv( in1, this ) || phase->eqv( in2, this ) ||
+ ( op1 == Op_AddI || op1 == Op_SubI ) &&
+ ( phase->eqv( in1->in(1), this ) || phase->eqv( in1->in(2), this ) ||
+ phase->eqv( in1->in(1), in1 ) || phase->eqv( in1->in(2), in1 ) ) )
+ assert(false, "dead loop in SubINode::Ideal");
+#endif
+
+ const Type *t2 = phase->type( in2 );
+ if( t2 == Type::TOP ) return NULL;
+ // Convert "x-c0" into "x+ -c0".
+ if( t2->base() == Type::Int ){ // Might be bottom or top...
+ const TypeInt *i = t2->is_int();
+ if( i->is_con() )
+ return new (phase->C, 3) AddINode(in1, phase->intcon(-i->get_con()));
+ }
+
+ // Convert "(x+c0) - y" into (x-y) + c0"
+ // Do not collapse (x+c0)-y if "+" is a loop increment or
+ // if "y" is a loop induction variable.
+ if( op1 == Op_AddI && ok_to_convert(in1, in2) ) {
+ const Type *tadd = phase->type( in1->in(2) );
+ if( tadd->singleton() && tadd != Type::TOP ) {
+ Node *sub2 = phase->transform( new (phase->C, 3) SubINode( in1->in(1), in2 ));
+ return new (phase->C, 3) AddINode( sub2, in1->in(2) );
+ }
+ }
+
+
+ // Convert "x - (y+c0)" into "(x-y) - c0"
+ // Need the same check as in above optimization but reversed.
+ if (op2 == Op_AddI && ok_to_convert(in2, in1)) {
+ Node* in21 = in2->in(1);
+ Node* in22 = in2->in(2);
+ const TypeInt* tcon = phase->type(in22)->isa_int();
+ if (tcon != NULL && tcon->is_con()) {
+ Node* sub2 = phase->transform( new (phase->C, 3) SubINode(in1, in21) );
+ Node* neg_c0 = phase->intcon(- tcon->get_con());
+ return new (phase->C, 3) AddINode(sub2, neg_c0);
+ }
+ }
+
+ const Type *t1 = phase->type( in1 );
+ if( t1 == Type::TOP ) return NULL;
+
+#ifdef ASSERT
+ // Check for dead loop
+ if( ( op2 == Op_AddI || op2 == Op_SubI ) &&
+ ( phase->eqv( in2->in(1), this ) || phase->eqv( in2->in(2), this ) ||
+ phase->eqv( in2->in(1), in2 ) || phase->eqv( in2->in(2), in2 ) ) )
+ assert(false, "dead loop in SubINode::Ideal");
+#endif
+
+ // Convert "x - (x+y)" into "-y"
+ if( op2 == Op_AddI &&
+ phase->eqv( in1, in2->in(1) ) )
+ return new (phase->C, 3) SubINode( phase->intcon(0),in2->in(2));
+ // Convert "(x-y) - x" into "-y"
+ if( op1 == Op_SubI &&
+ phase->eqv( in1->in(1), in2 ) )
+ return new (phase->C, 3) SubINode( phase->intcon(0),in1->in(2));
+ // Convert "x - (y+x)" into "-y"
+ if( op2 == Op_AddI &&
+ phase->eqv( in1, in2->in(2) ) )
+ return new (phase->C, 3) SubINode( phase->intcon(0),in2->in(1));
+
+ // Convert "0 - (x-y)" into "y-x"
+ if( t1 == TypeInt::ZERO && op2 == Op_SubI )
+ return new (phase->C, 3) SubINode( in2->in(2), in2->in(1) );
+
+ // Convert "0 - (x+con)" into "-con-x"
+ jint con;
+ if( t1 == TypeInt::ZERO && op2 == Op_AddI &&
+ (con = in2->in(2)->find_int_con(0)) != 0 )
+ return new (phase->C, 3) SubINode( phase->intcon(-con), in2->in(1) );
+
+ // Convert "(X+A) - (X+B)" into "A - B"
+ if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(1) )
+ return new (phase->C, 3) SubINode( in1->in(2), in2->in(2) );
+
+ // Convert "(A+X) - (B+X)" into "A - B"
+ if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) )
+ return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) );
+
+ // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally
+ // nicer to optimize than subtract.
+ if( op2 == Op_SubI && in2->outcnt() == 1) {
+ Node *add1 = phase->transform( new (phase->C, 3) AddINode( in1, in2->in(2) ) );
+ return new (phase->C, 3) SubINode( add1, in2->in(1) );
+ }
+
+ return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences it's two inputs.
+const Type *SubINode::sub( const Type *t1, const Type *t2 ) const {
+ const TypeInt *r0 = t1->is_int(); // Handy access
+ const TypeInt *r1 = t2->is_int();
+ int32 lo = r0->_lo - r1->_hi;
+ int32 hi = r0->_hi - r1->_lo;
+
+ // We next check for 32-bit overflow.
+ // If that happens, we just assume all integers are possible.
+ if( (((r0->_lo ^ r1->_hi) >= 0) || // lo ends have same signs OR
+ ((r0->_lo ^ lo) >= 0)) && // lo results have same signs AND
+ (((r0->_hi ^ r1->_lo) >= 0) || // hi ends have same signs OR
+ ((r0->_hi ^ hi) >= 0)) ) // hi results have same signs
+ return TypeInt::make(lo,hi,MAX2(r0->_widen,r1->_widen));
+ else // Overflow; assume all integers
+ return TypeInt::INT;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+Node *SubLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ Node *in1 = in(1);
+ Node *in2 = in(2);
+ uint op1 = in1->Opcode();
+ uint op2 = in2->Opcode();
+
+#ifdef ASSERT
+ // Check for dead loop
+ if( phase->eqv( in1, this ) || phase->eqv( in2, this ) ||
+ ( op1 == Op_AddL || op1 == Op_SubL ) &&
+ ( phase->eqv( in1->in(1), this ) || phase->eqv( in1->in(2), this ) ||
+ phase->eqv( in1->in(1), in1 ) || phase->eqv( in1->in(2), in1 ) ) )
+ assert(false, "dead loop in SubLNode::Ideal");
+#endif
+
+ if( phase->type( in2 ) == Type::TOP ) return NULL;
+ const TypeLong *i = phase->type( in2 )->isa_long();
+ // Convert "x-c0" into "x+ -c0".
+ if( i && // Might be bottom or top...
+ i->is_con() )
+ return new (phase->C, 3) AddLNode(in1, phase->longcon(-i->get_con()));
+
+ // Convert "(x+c0) - y" into (x-y) + c0"
+ // Do not collapse (x+c0)-y if "+" is a loop increment or
+ // if "y" is a loop induction variable.
+ if( op1 == Op_AddL && ok_to_convert(in1, in2) ) {
+ Node *in11 = in1->in(1);
+ const Type *tadd = phase->type( in1->in(2) );
+ if( tadd->singleton() && tadd != Type::TOP ) {
+ Node *sub2 = phase->transform( new (phase->C, 3) SubLNode( in11, in2 ));
+ return new (phase->C, 3) AddLNode( sub2, in1->in(2) );
+ }
+ }
+
+ // Convert "x - (y+c0)" into "(x-y) - c0"
+ // Need the same check as in above optimization but reversed.
+ if (op2 == Op_AddL && ok_to_convert(in2, in1)) {
+ Node* in21 = in2->in(1);
+ Node* in22 = in2->in(2);
+ const TypeLong* tcon = phase->type(in22)->isa_long();
+ if (tcon != NULL && tcon->is_con()) {
+ Node* sub2 = phase->transform( new (phase->C, 3) SubLNode(in1, in21) );
+ Node* neg_c0 = phase->longcon(- tcon->get_con());
+ return new (phase->C, 3) AddLNode(sub2, neg_c0);
+ }
+ }
+
+ const Type *t1 = phase->type( in1 );
+ if( t1 == Type::TOP ) return NULL;
+
+#ifdef ASSERT
+ // Check for dead loop
+ if( ( op2 == Op_AddL || op2 == Op_SubL ) &&
+ ( phase->eqv( in2->in(1), this ) || phase->eqv( in2->in(2), this ) ||
+ phase->eqv( in2->in(1), in2 ) || phase->eqv( in2->in(2), in2 ) ) )
+ assert(false, "dead loop in SubLNode::Ideal");
+#endif
+
+ // Convert "x - (x+y)" into "-y"
+ if( op2 == Op_AddL &&
+ phase->eqv( in1, in2->in(1) ) )
+ return new (phase->C, 3) SubLNode( phase->makecon(TypeLong::ZERO), in2->in(2));
+ // Convert "x - (y+x)" into "-y"
+ if( op2 == Op_AddL &&
+ phase->eqv( in1, in2->in(2) ) )
+ return new (phase->C, 3) SubLNode( phase->makecon(TypeLong::ZERO),in2->in(1));
+
+ // Convert "0 - (x-y)" into "y-x"
+ if( phase->type( in1 ) == TypeLong::ZERO && op2 == Op_SubL )
+ return new (phase->C, 3) SubLNode( in2->in(2), in2->in(1) );
+
+ // Convert "(X+A) - (X+B)" into "A - B"
+ if( op1 == Op_AddL && op2 == Op_AddL && in1->in(1) == in2->in(1) )
+ return new (phase->C, 3) SubLNode( in1->in(2), in2->in(2) );
+
+ // Convert "(A+X) - (B+X)" into "A - B"
+ if( op1 == Op_AddL && op2 == Op_AddL && in1->in(2) == in2->in(2) )
+ return new (phase->C, 3) SubLNode( in1->in(1), in2->in(1) );
+
+ // Convert "A-(B-C)" into (A+C)-B"
+ if( op2 == Op_SubL && in2->outcnt() == 1) {
+ Node *add1 = phase->transform( new (phase->C, 3) AddLNode( in1, in2->in(2) ) );
+ return new (phase->C, 3) SubLNode( add1, in2->in(1) );
+ }
+
+ return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences it's two inputs.
+const Type *SubLNode::sub( const Type *t1, const Type *t2 ) const {
+ const TypeLong *r0 = t1->is_long(); // Handy access
+ const TypeLong *r1 = t2->is_long();
+ jlong lo = r0->_lo - r1->_hi;
+ jlong hi = r0->_hi - r1->_lo;
+
+ // We next check for 32-bit overflow.
+ // If that happens, we just assume all integers are possible.
+ if( (((r0->_lo ^ r1->_hi) >= 0) || // lo ends have same signs OR
+ ((r0->_lo ^ lo) >= 0)) && // lo results have same signs AND
+ (((r0->_hi ^ r1->_lo) >= 0) || // hi ends have same signs OR
+ ((r0->_hi ^ hi) >= 0)) ) // hi results have same signs
+ return TypeLong::make(lo,hi,MAX2(r0->_widen,r1->_widen));
+ else // Overflow; assume all integers
+ return TypeLong::LONG;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// A subtract node differences its two inputs.
+const Type *SubFPNode::Value( PhaseTransform *phase ) const {
+ const Node* in1 = in(1);
+ const Node* in2 = in(2);
+ // Either input is TOP ==> the result is TOP
+ const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+ if( t1 == Type::TOP ) return Type::TOP;
+ const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // if both operands are infinity of same sign, the result is NaN; do
+ // not replace with zero
+ if( (t1->is_finite() && t2->is_finite()) ) {
+ if( phase->eqv(in1, in2) ) return add_id();
+ }
+
+ // Either input is BOTTOM ==> the result is the local BOTTOM
+ const Type *bot = bottom_type();
+ if( (t1 == bot) || (t2 == bot) ||
+ (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+ return bot;
+
+ return sub(t1,t2); // Local flavor of type subtraction
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+Node *SubFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ const Type *t2 = phase->type( in(2) );
+ // Convert "x-c0" into "x+ -c0".
+ if( t2->base() == Type::FloatCon ) { // Might be bottom or top...
+ // return new (phase->C, 3) AddFNode(in(1), phase->makecon( TypeF::make(-t2->getf()) ) );
+ }
+
+ // Not associative because of boundary conditions (infinity)
+ if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+ // Convert "x - (x+y)" into "-y"
+ if( in(2)->is_Add() &&
+ phase->eqv(in(1),in(2)->in(1) ) )
+ return new (phase->C, 3) SubFNode( phase->makecon(TypeF::ZERO),in(2)->in(2));
+ }
+
+ // Cannot replace 0.0-X with -X because a 'fsub' bytecode computes
+ // 0.0-0.0 as +0.0, while a 'fneg' bytecode computes -0.0.
+ //if( phase->type(in(1)) == TypeF::ZERO )
+ //return new (phase->C, 2) NegFNode(in(2));
+
+ return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences its two inputs.
+const Type *SubFNode::sub( const Type *t1, const Type *t2 ) const {
+ // no folding if one of operands is infinity or NaN, do not do constant folding
+ if( g_isfinite(t1->getf()) && g_isfinite(t2->getf()) ) {
+ return TypeF::make( t1->getf() - t2->getf() );
+ }
+ else if( g_isnan(t1->getf()) ) {
+ return t1;
+ }
+ else if( g_isnan(t2->getf()) ) {
+ return t2;
+ }
+ else {
+ return Type::FLOAT;
+ }
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+Node *SubDNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ const Type *t2 = phase->type( in(2) );
+ // Convert "x-c0" into "x+ -c0".
+ if( t2->base() == Type::DoubleCon ) { // Might be bottom or top...
+ // return new (phase->C, 3) AddDNode(in(1), phase->makecon( TypeD::make(-t2->getd()) ) );
+ }
+
+ // Not associative because of boundary conditions (infinity)
+ if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+ // Convert "x - (x+y)" into "-y"
+ if( in(2)->is_Add() &&
+ phase->eqv(in(1),in(2)->in(1) ) )
+ return new (phase->C, 3) SubDNode( phase->makecon(TypeD::ZERO),in(2)->in(2));
+ }
+
+ // Cannot replace 0.0-X with -X because a 'dsub' bytecode computes
+ // 0.0-0.0 as +0.0, while a 'dneg' bytecode computes -0.0.
+ //if( phase->type(in(1)) == TypeD::ZERO )
+ //return new (phase->C, 2) NegDNode(in(2));
+
+ return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences its two inputs.
+const Type *SubDNode::sub( const Type *t1, const Type *t2 ) const {
+ // no folding if one of operands is infinity or NaN, do not do constant folding
+ if( g_isfinite(t1->getd()) && g_isfinite(t2->getd()) ) {
+ return TypeD::make( t1->getd() - t2->getd() );
+ }
+ else if( g_isnan(t1->getd()) ) {
+ return t1;
+ }
+ else if( g_isnan(t2->getd()) ) {
+ return t2;
+ }
+ else {
+ return Type::DOUBLE;
+ }
+}
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+// Unlike SubNodes, compare must still flatten return value to the
+// range -1, 0, 1.
+// And optimizations like those for (X + Y) - X fail if overflow happens.
+Node *CmpNode::Identity( PhaseTransform *phase ) {
+ return this;
+}
+
+//=============================================================================
+//------------------------------cmp--------------------------------------------
+// Simplify a CmpI (compare 2 integers) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpINode::sub( const Type *t1, const Type *t2 ) const {
+ const TypeInt *r0 = t1->is_int(); // Handy access
+ const TypeInt *r1 = t2->is_int();
+
+ if( r0->_hi < r1->_lo ) // Range is always low?
+ return TypeInt::CC_LT;
+ else if( r0->_lo > r1->_hi ) // Range is always high?
+ return TypeInt::CC_GT;
+
+ else if( r0->is_con() && r1->is_con() ) { // comparing constants?
+ assert(r0->get_con() == r1->get_con(), "must be equal");
+ return TypeInt::CC_EQ; // Equal results.
+ } else if( r0->_hi == r1->_lo ) // Range is never high?
+ return TypeInt::CC_LE;
+ else if( r0->_lo == r1->_hi ) // Range is never low?
+ return TypeInt::CC_GE;
+ return TypeInt::CC; // else use worst case results
+}
+
+// Simplify a CmpU (compare 2 integers) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpUNode::sub( const Type *t1, const Type *t2 ) const {
+ assert(!t1->isa_ptr(), "obsolete usage of CmpU");
+
+ // comparing two unsigned ints
+ const TypeInt *r0 = t1->is_int(); // Handy access
+ const TypeInt *r1 = t2->is_int();
+
+ // Current installed version
+ // Compare ranges for non-overlap
+ juint lo0 = r0->_lo;
+ juint hi0 = r0->_hi;
+ juint lo1 = r1->_lo;
+ juint hi1 = r1->_hi;
+
+ // If either one has both negative and positive values,
+ // it therefore contains both 0 and -1, and since [0..-1] is the
+ // full unsigned range, the type must act as an unsigned bottom.
+ bool bot0 = ((jint)(lo0 ^ hi0) < 0);
+ bool bot1 = ((jint)(lo1 ^ hi1) < 0);
+
+ if (bot0 || bot1) {
+ // All unsigned values are LE -1 and GE 0.
+ if (lo0 == 0 && hi0 == 0) {
+ return TypeInt::CC_LE; // 0 <= bot
+ } else if (lo1 == 0 && hi1 == 0) {
+ return TypeInt::CC_GE; // bot >= 0
+ }
+ } else {
+ // We can use ranges of the form [lo..hi] if signs are the same.
+ assert(lo0 <= hi0 && lo1 <= hi1, "unsigned ranges are valid");
+ // results are reversed, '-' > '+' for unsigned compare
+ if (hi0 < lo1) {
+ return TypeInt::CC_LT; // smaller
+ } else if (lo0 > hi1) {
+ return TypeInt::CC_GT; // greater
+ } else if (hi0 == lo1 && lo0 == hi1) {
+ return TypeInt::CC_EQ; // Equal results
+ } else if (lo0 >= hi1) {
+ return TypeInt::CC_GE;
+ } else if (hi0 <= lo1) {
+ // Check for special case in Hashtable::get. (See below.)
+ if ((jint)lo0 >= 0 && (jint)lo1 >= 0 &&
+ in(1)->Opcode() == Op_ModI &&
+ in(1)->in(2) == in(2) )
+ return TypeInt::CC_LT;
+ return TypeInt::CC_LE;
+ }
+ }
+ // Check for special case in Hashtable::get - the hash index is
+ // mod'ed to the table size so the following range check is useless.
+ // Check for: (X Mod Y) CmpU Y, where the mod result and Y both have
+ // to be positive.
+ // (This is a gross hack, since the sub method never
+ // looks at the structure of the node in any other case.)
+ if ((jint)lo0 >= 0 && (jint)lo1 >= 0 &&
+ in(1)->Opcode() == Op_ModI &&
+ in(1)->in(2)->uncast() == in(2)->uncast())
+ return TypeInt::CC_LT;
+ return TypeInt::CC; // else use worst case results
+}
+
+//------------------------------Idealize---------------------------------------
+Node *CmpINode::Ideal( PhaseGVN *phase, bool can_reshape ) {
+ if (phase->type(in(2))->higher_equal(TypeInt::ZERO)) {
+ switch (in(1)->Opcode()) {
+ case Op_CmpL3: // Collapse a CmpL3/CmpI into a CmpL
+ return new (phase->C, 3) CmpLNode(in(1)->in(1),in(1)->in(2));
+ case Op_CmpF3: // Collapse a CmpF3/CmpI into a CmpF
+ return new (phase->C, 3) CmpFNode(in(1)->in(1),in(1)->in(2));
+ case Op_CmpD3: // Collapse a CmpD3/CmpI into a CmpD
+ return new (phase->C, 3) CmpDNode(in(1)->in(1),in(1)->in(2));
+ //case Op_SubI:
+ // If (x - y) cannot overflow, then ((x - y) <?> 0)
+ // can be turned into (x <?> y).
+ // This is handled (with more general cases) by Ideal_sub_algebra.
+ }
+ }
+ return NULL; // No change
+}
+
+
+//=============================================================================
+// Simplify a CmpL (compare 2 longs ) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpLNode::sub( const Type *t1, const Type *t2 ) const {
+ const TypeLong *r0 = t1->is_long(); // Handy access
+ const TypeLong *r1 = t2->is_long();
+
+ if( r0->_hi < r1->_lo ) // Range is always low?
+ return TypeInt::CC_LT;
+ else if( r0->_lo > r1->_hi ) // Range is always high?
+ return TypeInt::CC_GT;
+
+ else if( r0->is_con() && r1->is_con() ) { // comparing constants?
+ assert(r0->get_con() == r1->get_con(), "must be equal");
+ return TypeInt::CC_EQ; // Equal results.
+ } else if( r0->_hi == r1->_lo ) // Range is never high?
+ return TypeInt::CC_LE;
+ else if( r0->_lo == r1->_hi ) // Range is never low?
+ return TypeInt::CC_GE;
+ return TypeInt::CC; // else use worst case results
+}
+
+//=============================================================================
+//------------------------------sub--------------------------------------------
+// Simplify an CmpP (compare 2 pointers) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpPNode::sub( const Type *t1, const Type *t2 ) const {
+ const TypePtr *r0 = t1->is_ptr(); // Handy access
+ const TypePtr *r1 = t2->is_ptr();
+
+ // Undefined inputs makes for an undefined result
+ if( TypePtr::above_centerline(r0->_ptr) ||
+ TypePtr::above_centerline(r1->_ptr) )
+ return Type::TOP;
+
+ if (r0 == r1 && r0->singleton()) {
+ // Equal pointer constants (klasses, nulls, etc.)
+ return TypeInt::CC_EQ;
+ }
+
+ // See if it is 2 unrelated classes.
+ const TypeOopPtr* p0 = r0->isa_oopptr();
+ const TypeOopPtr* p1 = r1->isa_oopptr();
+ if (p0 && p1) {
+ ciKlass* klass0 = p0->klass();
+ bool xklass0 = p0->klass_is_exact();
+ ciKlass* klass1 = p1->klass();
+ bool xklass1 = p1->klass_is_exact();
+ int kps = (p0->isa_klassptr()?1:0) + (p1->isa_klassptr()?1:0);
+ if (klass0 && klass1 &&
+ kps != 1 && // both or neither are klass pointers
+ !klass0->is_interface() && // do not trust interfaces
+ !klass1->is_interface()) {
+ // See if neither subclasses the other, or if the class on top
+ // is precise. In either of these cases, the compare must fail.
+ if (klass0->equals(klass1) || // if types are unequal but klasses are
+ !klass0->is_java_klass() || // types not part of Java language?
+ !klass1->is_java_klass()) { // types not part of Java language?
+ // Do nothing; we know nothing for imprecise types
+ } else if (klass0->is_subtype_of(klass1)) {
+ // If klass1's type is PRECISE, then we can fail.
+ if (xklass1) return TypeInt::CC_GT;
+ } else if (klass1->is_subtype_of(klass0)) {
+ // If klass0's type is PRECISE, then we can fail.
+ if (xklass0) return TypeInt::CC_GT;
+ } else { // Neither subtypes the other
+ return TypeInt::CC_GT; // ...so always fail
+ }
+ }
+ }
+
+ // Known constants can be compared exactly
+ // Null can be distinguished from any NotNull pointers
+ // Unknown inputs makes an unknown result
+ if( r0->singleton() ) {
+ intptr_t bits0 = r0->get_con();
+ if( r1->singleton() )
+ return bits0 == r1->get_con() ? TypeInt::CC_EQ : TypeInt::CC_GT;
+ return ( r1->_ptr == TypePtr::NotNull && bits0==0 ) ? TypeInt::CC_GT : TypeInt::CC;
+ } else if( r1->singleton() ) {
+ intptr_t bits1 = r1->get_con();
+ return ( r0->_ptr == TypePtr::NotNull && bits1==0 ) ? TypeInt::CC_GT : TypeInt::CC;
+ } else
+ return TypeInt::CC;
+}
+
+//------------------------------Ideal------------------------------------------
+// Check for the case of comparing an unknown klass loaded from the primary
+// super-type array vs a known klass with no subtypes. This amounts to
+// checking to see an unknown klass subtypes a known klass with no subtypes;
+// this only happens on an exact match. We can shorten this test by 1 load.
+Node *CmpPNode::Ideal( PhaseGVN *phase, bool can_reshape ) {
+ // Constant pointer on right?
+ const TypeKlassPtr* t2 = phase->type(in(2))->isa_klassptr();
+ if (t2 == NULL || !t2->klass_is_exact())
+ return NULL;
+ // Get the constant klass we are comparing to.
+ ciKlass* superklass = t2->klass();
+
+ // Now check for LoadKlass on left.
+ Node* ldk1 = in(1);
+ if (ldk1->Opcode() != Op_LoadKlass)
+ return NULL;
+ // Take apart the address of the LoadKlass:
+ Node* adr1 = ldk1->in(MemNode::Address);
+ intptr_t con2 = 0;
+ Node* ldk2 = AddPNode::Ideal_base_and_offset(adr1, phase, con2);
+ if (ldk2 == NULL)
+ return NULL;
+ if (con2 == oopDesc::klass_offset_in_bytes()) {
+ // We are inspecting an object's concrete class.
+ // Short-circuit the check if the query is abstract.
+ if (superklass->is_interface() ||
+ superklass->is_abstract()) {
+ // Make it come out always false:
+ this->set_req(2, phase->makecon(TypePtr::NULL_PTR));
+ return this;
+ }
+ }
+
+ // Check for a LoadKlass from primary supertype array.
+ // Any nested loadklass from loadklass+con must be from the p.s. array.
+ if (ldk2->Opcode() != Op_LoadKlass)
+ return NULL;
+
+ // Verify that we understand the situation
+ if (con2 != (intptr_t) superklass->super_check_offset())
+ return NULL; // Might be element-klass loading from array klass
+
+ // If 'superklass' has no subklasses and is not an interface, then we are
+ // assured that the only input which will pass the type check is
+ // 'superklass' itself.
+ //
+ // We could be more liberal here, and allow the optimization on interfaces
+ // which have a single implementor. This would require us to increase the
+ // expressiveness of the add_dependency() mechanism.
+ // %%% Do this after we fix TypeOopPtr: Deps are expressive enough now.
+
+ // Object arrays must have their base element have no subtypes
+ while (superklass->is_obj_array_klass()) {
+ ciType* elem = superklass->as_obj_array_klass()->element_type();
+ superklass = elem->as_klass();
+ }
+ if (superklass->is_instance_klass()) {
+ ciInstanceKlass* ik = superklass->as_instance_klass();
+ if (ik->has_subklass() || ik->is_interface()) return NULL;
+ // Add a dependency if there is a chance that a subclass will be added later.
+ if (!ik->is_final()) {
+ phase->C->dependencies()->assert_leaf_type(ik);
+ }
+ }
+
+ // Bypass the dependent load, and compare directly
+ this->set_req(1,ldk2);
+
+ return this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Simplify an CmpF (compare 2 floats ) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpFNode::Value( PhaseTransform *phase ) const {
+ const Node* in1 = in(1);
+ const Node* in2 = in(2);
+ // Either input is TOP ==> the result is TOP
+ const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+ if( t1 == Type::TOP ) return Type::TOP;
+ const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Not constants? Don't know squat - even if they are the same
+ // value! If they are NaN's they compare to LT instead of EQ.
+ const TypeF *tf1 = t1->isa_float_constant();
+ const TypeF *tf2 = t2->isa_float_constant();
+ if( !tf1 || !tf2 ) return TypeInt::CC;
+
+ // This implements the Java bytecode fcmpl, so unordered returns -1.
+ if( tf1->is_nan() || tf2->is_nan() )
+ return TypeInt::CC_LT;
+
+ if( tf1->_f < tf2->_f ) return TypeInt::CC_LT;
+ if( tf1->_f > tf2->_f ) return TypeInt::CC_GT;
+ assert( tf1->_f == tf2->_f, "do not understand FP behavior" );
+ return TypeInt::CC_EQ;
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Simplify an CmpD (compare 2 doubles ) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpDNode::Value( PhaseTransform *phase ) const {
+ const Node* in1 = in(1);
+ const Node* in2 = in(2);
+ // Either input is TOP ==> the result is TOP
+ const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+ if( t1 == Type::TOP ) return Type::TOP;
+ const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+ if( t2 == Type::TOP ) return Type::TOP;
+
+ // Not constants? Don't know squat - even if they are the same
+ // value! If they are NaN's they compare to LT instead of EQ.
+ const TypeD *td1 = t1->isa_double_constant();
+ const TypeD *td2 = t2->isa_double_constant();
+ if( !td1 || !td2 ) return TypeInt::CC;
+
+ // This implements the Java bytecode dcmpl, so unordered returns -1.
+ if( td1->is_nan() || td2->is_nan() )
+ return TypeInt::CC_LT;
+
+ if( td1->_d < td2->_d ) return TypeInt::CC_LT;
+ if( td1->_d > td2->_d ) return TypeInt::CC_GT;
+ assert( td1->_d == td2->_d, "do not understand FP behavior" );
+ return TypeInt::CC_EQ;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *CmpDNode::Ideal(PhaseGVN *phase, bool can_reshape){
+ // Check if we can change this to a CmpF and remove a ConvD2F operation.
+ // Change (CMPD (F2D (float)) (ConD value))
+ // To (CMPF (float) (ConF value))
+ // Valid when 'value' does not lose precision as a float.
+ // Benefits: eliminates conversion, does not require 24-bit mode
+
+ // NaNs prevent commuting operands. This transform works regardless of the
+ // order of ConD and ConvF2D inputs by preserving the original order.
+ int idx_f2d = 1; // ConvF2D on left side?
+ if( in(idx_f2d)->Opcode() != Op_ConvF2D )
+ idx_f2d = 2; // No, swap to check for reversed args
+ int idx_con = 3-idx_f2d; // Check for the constant on other input
+
+ if( ConvertCmpD2CmpF &&
+ in(idx_f2d)->Opcode() == Op_ConvF2D &&
+ in(idx_con)->Opcode() == Op_ConD ) {
+ const TypeD *t2 = in(idx_con)->bottom_type()->is_double_constant();
+ double t2_value_as_double = t2->_d;
+ float t2_value_as_float = (float)t2_value_as_double;
+ if( t2_value_as_double == (double)t2_value_as_float ) {
+ // Test value can be represented as a float
+ // Eliminate the conversion to double and create new comparison
+ Node *new_in1 = in(idx_f2d)->in(1);
+ Node *new_in2 = phase->makecon( TypeF::make(t2_value_as_float) );
+ if( idx_f2d != 1 ) { // Must flip args to match original order
+ Node *tmp = new_in1;
+ new_in1 = new_in2;
+ new_in2 = tmp;
+ }
+ CmpFNode *new_cmp = (Opcode() == Op_CmpD3)
+ ? new (phase->C, 3) CmpF3Node( new_in1, new_in2 )
+ : new (phase->C, 3) CmpFNode ( new_in1, new_in2 ) ;
+ return new_cmp; // Changed to CmpFNode
+ }
+ // Testing value required the precision of a double
+ }
+ return NULL; // No change
+}
+
+
+//=============================================================================
+//------------------------------cc2logical-------------------------------------
+// Convert a condition code type to a logical type
+const Type *BoolTest::cc2logical( const Type *CC ) const {
+ if( CC == Type::TOP ) return Type::TOP;
+ if( CC->base() != Type::Int ) return TypeInt::BOOL; // Bottom or worse
+ const TypeInt *ti = CC->is_int();
+ if( ti->is_con() ) { // Only 1 kind of condition codes set?
+ // Match low order 2 bits
+ int tmp = ((ti->get_con()&3) == (_test&3)) ? 1 : 0;
+ if( _test & 4 ) tmp = 1-tmp; // Optionally complement result
+ return TypeInt::make(tmp); // Boolean result
+ }
+
+ if( CC == TypeInt::CC_GE ) {
+ if( _test == ge ) return TypeInt::ONE;
+ if( _test == lt ) return TypeInt::ZERO;
+ }
+ if( CC == TypeInt::CC_LE ) {
+ if( _test == le ) return TypeInt::ONE;
+ if( _test == gt ) return TypeInt::ZERO;
+ }
+
+ return TypeInt::BOOL;
+}
+
+//------------------------------dump_spec-------------------------------------
+// Print special per-node info
+#ifndef PRODUCT
+void BoolTest::dump_on(outputStream *st) const {
+ const char *msg[] = {"eq","gt","??","lt","ne","le","??","ge"};
+ st->print(msg[_test]);
+}
+#endif
+
+//=============================================================================
+uint BoolNode::hash() const { return (Node::hash() << 3)|(_test._test+1); }
+uint BoolNode::size_of() const { return sizeof(BoolNode); }
+
+//------------------------------operator==-------------------------------------
+uint BoolNode::cmp( const Node &n ) const {
+ const BoolNode *b = (const BoolNode *)&n; // Cast up
+ return (_test._test == b->_test._test);
+}
+
+//------------------------------clone_cmp--------------------------------------
+// Clone a compare/bool tree
+static Node *clone_cmp( Node *cmp, Node *cmp1, Node *cmp2, PhaseGVN *gvn, BoolTest::mask test ) {
+ Node *ncmp = cmp->clone();
+ ncmp->set_req(1,cmp1);
+ ncmp->set_req(2,cmp2);
+ ncmp = gvn->transform( ncmp );
+ return new (gvn->C, 2) BoolNode( ncmp, test );
+}
+
+//-------------------------------make_predicate--------------------------------
+Node* BoolNode::make_predicate(Node* test_value, PhaseGVN* phase) {
+ if (test_value->is_Con()) return test_value;
+ if (test_value->is_Bool()) return test_value;
+ Compile* C = phase->C;
+ if (test_value->is_CMove() &&
+ test_value->in(CMoveNode::Condition)->is_Bool()) {
+ BoolNode* bol = test_value->in(CMoveNode::Condition)->as_Bool();
+ const Type* ftype = phase->type(test_value->in(CMoveNode::IfFalse));
+ const Type* ttype = phase->type(test_value->in(CMoveNode::IfTrue));
+ if (ftype == TypeInt::ZERO && !TypeInt::ZERO->higher_equal(ttype)) {
+ return bol;
+ } else if (ttype == TypeInt::ZERO && !TypeInt::ZERO->higher_equal(ftype)) {
+ return phase->transform( bol->negate(phase) );
+ }
+ // Else fall through. The CMove gets in the way of the test.
+ // It should be the case that make_predicate(bol->as_int_value()) == bol.
+ }
+ Node* cmp = new (C, 3) CmpINode(test_value, phase->intcon(0));
+ cmp = phase->transform(cmp);
+ Node* bol = new (C, 2) BoolNode(cmp, BoolTest::ne);
+ return phase->transform(bol);
+}
+
+//--------------------------------as_int_value---------------------------------
+Node* BoolNode::as_int_value(PhaseGVN* phase) {
+ // Inverse to make_predicate. The CMove probably boils down to a Conv2B.
+ Node* cmov = CMoveNode::make(phase->C, NULL, this,
+ phase->intcon(0), phase->intcon(1),
+ TypeInt::BOOL);
+ return phase->transform(cmov);
+}
+
+//----------------------------------negate-------------------------------------
+BoolNode* BoolNode::negate(PhaseGVN* phase) {
+ Compile* C = phase->C;
+ return new (C, 2) BoolNode(in(1), _test.negate());
+}
+
+
+//------------------------------Ideal------------------------------------------
+Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ // Change "bool tst (cmp con x)" into "bool ~tst (cmp x con)".
+ // This moves the constant to the right. Helps value-numbering.
+ Node *cmp = in(1);
+ if( !cmp->is_Sub() ) return NULL;
+ int cop = cmp->Opcode();
+ if( cop == Op_FastLock || cop == Op_FastUnlock ) return NULL;
+ Node *cmp1 = cmp->in(1);
+ Node *cmp2 = cmp->in(2);
+ if( !cmp1 ) return NULL;
+
+ // Constant on left?
+ Node *con = cmp1;
+ uint op2 = cmp2->Opcode();
+ // Move constants to the right of compare's to canonicalize.
+ // Do not muck with Opaque1 nodes, as this indicates a loop
+ // guard that cannot change shape.
+ if( con->is_Con() && !cmp2->is_Con() && op2 != Op_Opaque1 &&
+ // Because of NaN's, CmpD and CmpF are not commutative
+ cop != Op_CmpD && cop != Op_CmpF &&
+ // Protect against swapping inputs to a compare when it is used by a
+ // counted loop exit, which requires maintaining the loop-limit as in(2)
+ !is_counted_loop_exit_test() ) {
+ // Ok, commute the constant to the right of the cmp node.
+ // Clone the Node, getting a new Node of the same class
+ cmp = cmp->clone();
+ // Swap inputs to the clone
+ cmp->swap_edges(1, 2);
+ cmp = phase->transform( cmp );
+ return new (phase->C, 2) BoolNode( cmp, _test.commute() );
+ }
+
+ // Change "bool eq/ne (cmp (xor X 1) 0)" into "bool ne/eq (cmp X 0)".
+ // The XOR-1 is an idiom used to flip the sense of a bool. We flip the
+ // test instead.
+ int cmp1_op = cmp1->Opcode();
+ const TypeInt* cmp2_type = phase->type(cmp2)->isa_int();
+ if (cmp2_type == NULL) return NULL;
+ Node* j_xor = cmp1;
+ if( cmp2_type == TypeInt::ZERO &&
+ cmp1_op == Op_XorI &&
+ j_xor->in(1) != j_xor && // An xor of itself is dead
+ phase->type( j_xor->in(2) ) == TypeInt::ONE &&
+ (_test._test == BoolTest::eq ||
+ _test._test == BoolTest::ne) ) {
+ Node *ncmp = phase->transform(new (phase->C, 3) CmpINode(j_xor->in(1),cmp2));
+ return new (phase->C, 2) BoolNode( ncmp, _test.negate() );
+ }
+
+ // Change "bool eq/ne (cmp (Conv2B X) 0)" into "bool eq/ne (cmp X 0)".
+ // This is a standard idiom for branching on a boolean value.
+ Node *c2b = cmp1;
+ if( cmp2_type == TypeInt::ZERO &&
+ cmp1_op == Op_Conv2B &&
+ (_test._test == BoolTest::eq ||
+ _test._test == BoolTest::ne) ) {
+ Node *ncmp = phase->transform(phase->type(c2b->in(1))->isa_int()
+ ? (Node*)new (phase->C, 3) CmpINode(c2b->in(1),cmp2)
+ : (Node*)new (phase->C, 3) CmpPNode(c2b->in(1),phase->makecon(TypePtr::NULL_PTR))
+ );
+ return new (phase->C, 2) BoolNode( ncmp, _test._test );
+ }
+
+ // Comparing a SubI against a zero is equal to comparing the SubI
+ // arguments directly. This only works for eq and ne comparisons
+ // due to possible integer overflow.
+ if ((_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&
+ (cop == Op_CmpI) &&
+ (cmp1->Opcode() == Op_SubI) &&
+ ( cmp2_type == TypeInt::ZERO ) ) {
+ Node *ncmp = phase->transform( new (phase->C, 3) CmpINode(cmp1->in(1),cmp1->in(2)));
+ return new (phase->C, 2) BoolNode( ncmp, _test._test );
+ }
+
+ // Change (-A vs 0) into (A vs 0) by commuting the test. Disallow in the
+ // most general case because negating 0x80000000 does nothing. Needed for
+ // the CmpF3/SubI/CmpI idiom.
+ if( cop == Op_CmpI &&
+ cmp1->Opcode() == Op_SubI &&
+ cmp2_type == TypeInt::ZERO &&
+ phase->type( cmp1->in(1) ) == TypeInt::ZERO &&
+ phase->type( cmp1->in(2) )->higher_equal(TypeInt::SYMINT) ) {
+ Node *ncmp = phase->transform( new (phase->C, 3) CmpINode(cmp1->in(2),cmp2));
+ return new (phase->C, 2) BoolNode( ncmp, _test.commute() );
+ }
+
+ // The transformation below is not valid for either signed or unsigned
+ // comparisons due to wraparound concerns at MAX_VALUE and MIN_VALUE.
+ // This transformation can be resurrected when we are able to
+ // make inferences about the range of values being subtracted from
+ // (or added to) relative to the wraparound point.
+ //
+ // // Remove +/-1's if possible.
+ // // "X <= Y-1" becomes "X < Y"
+ // // "X+1 <= Y" becomes "X < Y"
+ // // "X < Y+1" becomes "X <= Y"
+ // // "X-1 < Y" becomes "X <= Y"
+ // // Do not this to compares off of the counted-loop-end. These guys are
+ // // checking the trip counter and they want to use the post-incremented
+ // // counter. If they use the PRE-incremented counter, then the counter has
+ // // to be incremented in a private block on a loop backedge.
+ // if( du && du->cnt(this) && du->out(this)[0]->Opcode() == Op_CountedLoopEnd )
+ // return NULL;
+ // #ifndef PRODUCT
+ // // Do not do this in a wash GVN pass during verification.
+ // // Gets triggered by too many simple optimizations to be bothered with
+ // // re-trying it again and again.
+ // if( !phase->allow_progress() ) return NULL;
+ // #endif
+ // // Not valid for unsigned compare because of corner cases in involving zero.
+ // // For example, replacing "X-1 <u Y" with "X <=u Y" fails to throw an
+ // // exception in case X is 0 (because 0-1 turns into 4billion unsigned but
+ // // "0 <=u Y" is always true).
+ // if( cmp->Opcode() == Op_CmpU ) return NULL;
+ // int cmp2_op = cmp2->Opcode();
+ // if( _test._test == BoolTest::le ) {
+ // if( cmp1_op == Op_AddI &&
+ // phase->type( cmp1->in(2) ) == TypeInt::ONE )
+ // return clone_cmp( cmp, cmp1->in(1), cmp2, phase, BoolTest::lt );
+ // else if( cmp2_op == Op_AddI &&
+ // phase->type( cmp2->in(2) ) == TypeInt::MINUS_1 )
+ // return clone_cmp( cmp, cmp1, cmp2->in(1), phase, BoolTest::lt );
+ // } else if( _test._test == BoolTest::lt ) {
+ // if( cmp1_op == Op_AddI &&
+ // phase->type( cmp1->in(2) ) == TypeInt::MINUS_1 )
+ // return clone_cmp( cmp, cmp1->in(1), cmp2, phase, BoolTest::le );
+ // else if( cmp2_op == Op_AddI &&
+ // phase->type( cmp2->in(2) ) == TypeInt::ONE )
+ // return clone_cmp( cmp, cmp1, cmp2->in(1), phase, BoolTest::le );
+ // }
+
+ return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// Simplify a Bool (convert condition codes to boolean (1 or 0)) node,
+// based on local information. If the input is constant, do it.
+const Type *BoolNode::Value( PhaseTransform *phase ) const {
+ return _test.cc2logical( phase->type( in(1) ) );
+}
+
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void BoolNode::dump_spec(outputStream *st) const {
+ st->print("[");
+ _test.dump_on(st);
+ st->print("]");
+}
+#endif
+
+//------------------------------is_counted_loop_exit_test--------------------------------------
+// Returns true if node is used by a counted loop node.
+bool BoolNode::is_counted_loop_exit_test() {
+ for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
+ Node* use = fast_out(i);
+ if (use->is_CountedLoopEnd()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+//=============================================================================
+//------------------------------NegNode----------------------------------------
+Node *NegFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( in(1)->Opcode() == Op_SubF )
+ return new (phase->C, 3) SubFNode( in(1)->in(2), in(1)->in(1) );
+ return NULL;
+}
+
+Node *NegDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+ if( in(1)->Opcode() == Op_SubD )
+ return new (phase->C, 3) SubDNode( in(1)->in(2), in(1)->in(1) );
+ return NULL;
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute sqrt
+const Type *SqrtDNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d = t1->getd();
+ if( d < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( sqrt( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute cos
+const Type *CosDNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d = t1->getd();
+ if( d < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( SharedRuntime::dcos( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute sin
+const Type *SinDNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d = t1->getd();
+ if( d < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( SharedRuntime::dsin( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute tan
+const Type *TanDNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d = t1->getd();
+ if( d < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( SharedRuntime::dtan( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute log
+const Type *LogDNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d = t1->getd();
+ if( d < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( SharedRuntime::dlog( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute log10
+const Type *Log10DNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d = t1->getd();
+ if( d < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( SharedRuntime::dlog10( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute exp
+const Type *ExpDNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d = t1->getd();
+ if( d < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( SharedRuntime::dexp( d ) );
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute pow
+const Type *PowDNode::Value( PhaseTransform *phase ) const {
+ const Type *t1 = phase->type( in(1) );
+ if( t1 == Type::TOP ) return Type::TOP;
+ if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+ const Type *t2 = phase->type( in(2) );
+ if( t2 == Type::TOP ) return Type::TOP;
+ if( t2->base() != Type::DoubleCon ) return Type::DOUBLE;
+ double d1 = t1->getd();
+ double d2 = t2->getd();
+ if( d1 < 0.0 ) return Type::DOUBLE;
+ if( d2 < 0.0 ) return Type::DOUBLE;
+ return TypeD::make( SharedRuntime::dpow( d1, d2 ) );
+}
diff --git a/src/share/vm/opto/subnode.hpp b/src/share/vm/opto/subnode.hpp
new file mode 100644
index 000000000..4992a59c5
--- /dev/null
+++ b/src/share/vm/opto/subnode.hpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+//------------------------------SUBNode----------------------------------------
+// Class SUBTRACTION functionality. This covers all the usual 'subtract'
+// behaviors. Subtract-integer, -float, -double, binary xor, compare-integer,
+// -float, and -double are all inherited from this class. The compare
+// functions behave like subtract functions, except that all negative answers
+// are compressed into -1, and all positive answers compressed to 1.
+class SubNode : public Node {
+public:
+ SubNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
+ init_class_id(Class_Sub);
+ }
+
+ // Handle algebraic identities here. If we have an identity, return the Node
+ // we are equivalent to. We look for "add of zero" as an identity.
+ virtual Node *Identity( PhaseTransform *phase );
+
+ // Compute a new Type for this node. Basically we just do the pre-check,
+ // then call the virtual add() to set the type.
+ virtual const Type *Value( PhaseTransform *phase ) const;
+
+ // Supplied function returns the subtractend of the inputs.
+ // This also type-checks the inputs for sanity. Guaranteed never to
+ // be passed a TOP or BOTTOM type, these are filtered out by a pre-check.
+ virtual const Type *sub( const Type *, const Type * ) const = 0;
+
+ // Supplied function to return the additive identity type.
+ // This is returned whenever the subtracts inputs are the same.
+ virtual const Type *add_id() const = 0;
+
+};
+
+
+// NOTE: SubINode should be taken away and replaced by add and negate
+//------------------------------SubINode---------------------------------------
+// Subtract 2 integers
+class SubINode : public SubNode {
+public:
+ SubINode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *sub( const Type *, const Type * ) const;
+ const Type *add_id() const { return TypeInt::ZERO; }
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------SubLNode---------------------------------------
+// Subtract 2 integers
+class SubLNode : public SubNode {
+public:
+ SubLNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *sub( const Type *, const Type * ) const;
+ const Type *add_id() const { return TypeLong::ZERO; }
+ const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+// NOTE: SubFPNode should be taken away and replaced by add and negate
+//------------------------------SubFPNode--------------------------------------
+// Subtract 2 floats or doubles
+class SubFPNode : public SubNode {
+protected:
+ SubFPNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
+public:
+ const Type *Value( PhaseTransform *phase ) const;
+};
+
+// NOTE: SubFNode should be taken away and replaced by add and negate
+//------------------------------SubFNode---------------------------------------
+// Subtract 2 doubles
+class SubFNode : public SubFPNode {
+public:
+ SubFNode( Node *in1, Node *in2 ) : SubFPNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *sub( const Type *, const Type * ) const;
+ const Type *add_id() const { return TypeF::ZERO; }
+ const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+// NOTE: SubDNode should be taken away and replaced by add and negate
+//------------------------------SubDNode---------------------------------------
+// Subtract 2 doubles
+class SubDNode : public SubFPNode {
+public:
+ SubDNode( Node *in1, Node *in2 ) : SubFPNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *sub( const Type *, const Type * ) const;
+ const Type *add_id() const { return TypeD::ZERO; }
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------CmpNode---------------------------------------
+// Compare 2 values, returning condition codes (-1, 0 or 1).
+class CmpNode : public SubNode {
+public:
+ CmpNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {
+ init_class_id(Class_Cmp);
+ }
+ virtual Node *Identity( PhaseTransform *phase );
+ const Type *add_id() const { return TypeInt::ZERO; }
+ const Type *bottom_type() const { return TypeInt::CC; }
+ virtual uint ideal_reg() const { return Op_RegFlags; }
+};
+
+//------------------------------CmpINode---------------------------------------
+// Compare 2 signed values, returning condition codes (-1, 0 or 1).
+class CmpINode : public CmpNode {
+public:
+ CmpINode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpUNode---------------------------------------
+// Compare 2 unsigned values (integer or pointer), returning condition codes (-1, 0 or 1).
+class CmpUNode : public CmpNode {
+public:
+ CmpUNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpPNode---------------------------------------
+// Compare 2 pointer values, returning condition codes (-1, 0 or 1).
+class CmpPNode : public CmpNode {
+public:
+ CmpPNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpLNode---------------------------------------
+// Compare 2 long values, returning condition codes (-1, 0 or 1).
+class CmpLNode : public CmpNode {
+public:
+ CmpLNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpL3Node--------------------------------------
+// Compare 2 long values, returning integer value (-1, 0 or 1).
+class CmpL3Node : public CmpLNode {
+public:
+ CmpL3Node( Node *in1, Node *in2 ) : CmpLNode(in1,in2) {
+ // Since it is not consumed by Bools, it is not really a Cmp.
+ init_class_id(Class_Sub);
+ }
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------CmpFNode---------------------------------------
+// Compare 2 float values, returning condition codes (-1, 0 or 1).
+// This implements the Java bytecode fcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpFNode : public CmpNode {
+public:
+ CmpFNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *sub( const Type *, const Type * ) const { ShouldNotReachHere(); return NULL; }
+ const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------CmpF3Node--------------------------------------
+// Compare 2 float values, returning integer value (-1, 0 or 1).
+// This implements the Java bytecode fcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpF3Node : public CmpFNode {
+public:
+ CmpF3Node( Node *in1, Node *in2 ) : CmpFNode(in1,in2) {
+ // Since it is not consumed by Bools, it is not really a Cmp.
+ init_class_id(Class_Sub);
+ }
+ virtual int Opcode() const;
+ // Since it is not consumed by Bools, it is not really a Cmp.
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+
+//------------------------------CmpDNode---------------------------------------
+// Compare 2 double values, returning condition codes (-1, 0 or 1).
+// This implements the Java bytecode dcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpDNode : public CmpNode {
+public:
+ CmpDNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+ virtual int Opcode() const;
+ virtual const Type *sub( const Type *, const Type * ) const { ShouldNotReachHere(); return NULL; }
+ const Type *Value( PhaseTransform *phase ) const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CmpD3Node--------------------------------------
+// Compare 2 double values, returning integer value (-1, 0 or 1).
+// This implements the Java bytecode dcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpD3Node : public CmpDNode {
+public:
+ CmpD3Node( Node *in1, Node *in2 ) : CmpDNode(in1,in2) {
+ // Since it is not consumed by Bools, it is not really a Cmp.
+ init_class_id(Class_Sub);
+ }
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+
+//------------------------------BoolTest---------------------------------------
+// Convert condition codes to a boolean test value (0 or -1).
+// We pick the values as 3 bits; the low order 2 bits we compare against the
+// condition codes, the high bit flips the sense of the result.
+struct BoolTest VALUE_OBJ_CLASS_SPEC {
+ enum mask { eq = 0, ne = 4, le = 5, ge = 7, lt = 3, gt = 1, illegal = 8 };
+ mask _test;
+ BoolTest( mask btm ) : _test(btm) {}
+ const Type *cc2logical( const Type *CC ) const;
+ // Commute the test. I use a small table lookup. The table is created as
+ // a simple char array where each element is the ASCII version of a 'mask'
+ // enum from above.
+ mask commute( ) const { return mask("038147858"[_test]-'0'); }
+ mask negate( ) const { return mask(_test^4); }
+ bool is_canonical( ) const { return (_test == BoolTest::ne || _test == BoolTest::lt || _test == BoolTest::le); }
+#ifndef PRODUCT
+ void dump_on(outputStream *st) const;
+#endif
+};
+
+//------------------------------BoolNode---------------------------------------
+// A Node to convert a Condition Codes to a Logical result.
+class BoolNode : public Node {
+ virtual uint hash() const;
+ virtual uint cmp( const Node &n ) const;
+ virtual uint size_of() const;
+public:
+ const BoolTest _test;
+ BoolNode( Node *cc, BoolTest::mask t): _test(t), Node(0,cc) {
+ init_class_id(Class_Bool);
+ }
+ // Convert an arbitrary int value to a Bool or other suitable predicate.
+ static Node* make_predicate(Node* test_value, PhaseGVN* phase);
+ // Convert self back to an integer value.
+ Node* as_int_value(PhaseGVN* phase);
+ // Invert sense of self, returning new Bool.
+ BoolNode* negate(PhaseGVN* phase);
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ virtual const Type *Value( PhaseTransform *phase ) const;
+ virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+ uint match_edge(uint idx) const { return 0; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+
+ bool is_counted_loop_exit_test();
+#ifndef PRODUCT
+ virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------AbsNode----------------------------------------
+// Abstract class for absolute value. Mostly used to get a handy wrapper
+// for finding this pattern in the graph.
+class AbsNode : public Node {
+public:
+ AbsNode( Node *value ) : Node(0,value) {}
+};
+
+//------------------------------AbsINode---------------------------------------
+// Absolute value an integer. Since a naive graph involves control flow, we
+// "match" it in the ideal world (so the control flow can be removed).
+class AbsINode : public AbsNode {
+public:
+ AbsINode( Node *in1 ) : AbsNode(in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------AbsFNode---------------------------------------
+// Absolute value a float, a common float-point idiom with a cheap hardware
+// implemention on most chips. Since a naive graph involves control flow, we
+// "match" it in the ideal world (so the control flow can be removed).
+class AbsFNode : public AbsNode {
+public:
+ AbsFNode( Node *in1 ) : AbsNode(in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------AbsDNode---------------------------------------
+// Absolute value a double, a common float-point idiom with a cheap hardware
+// implemention on most chips. Since a naive graph involves control flow, we
+// "match" it in the ideal world (so the control flow can be removed).
+class AbsDNode : public AbsNode {
+public:
+ AbsDNode( Node *in1 ) : AbsNode(in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+
+//------------------------------CmpLTMaskNode----------------------------------
+// If p < q, return -1 else return 0. Nice for flow-free idioms.
+class CmpLTMaskNode : public Node {
+public:
+ CmpLTMaskNode( Node *p, Node *q ) : Node(0, p, q) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+
+//------------------------------NegNode----------------------------------------
+class NegNode : public Node {
+public:
+ NegNode( Node *in1 ) : Node(0,in1) {}
+};
+
+//------------------------------NegFNode---------------------------------------
+// Negate value a float. Negating 0.0 returns -0.0, but subtracting from
+// zero returns +0.0 (per JVM spec on 'fneg' bytecode). As subtraction
+// cannot be used to replace negation we have to implement negation as ideal
+// node; note that negation and addition can replace subtraction.
+class NegFNode : public NegNode {
+public:
+ NegFNode( Node *in1 ) : NegNode(in1) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------NegDNode---------------------------------------
+// Negate value a double. Negating 0.0 returns -0.0, but subtracting from
+// zero returns +0.0 (per JVM spec on 'dneg' bytecode). As subtraction
+// cannot be used to replace negation we have to implement negation as ideal
+// node; note that negation and addition can replace subtraction.
+class NegDNode : public NegNode {
+public:
+ NegDNode( Node *in1 ) : NegNode(in1) {}
+ virtual int Opcode() const;
+ virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------CosDNode---------------------------------------
+// Cosinus of a double
+class CosDNode : public Node {
+public:
+ CosDNode( Node *in1 ) : Node(0, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------CosDNode---------------------------------------
+// Sinus of a double
+class SinDNode : public Node {
+public:
+ SinDNode( Node *in1 ) : Node(0, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+
+//------------------------------TanDNode---------------------------------------
+// tangens of a double
+class TanDNode : public Node {
+public:
+ TanDNode(Node *in1 ) : Node(0, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+
+//------------------------------AtanDNode--------------------------------------
+// arcus tangens of a double
+class AtanDNode : public Node {
+public:
+ AtanDNode(Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+
+//------------------------------SqrtDNode--------------------------------------
+// square root a double
+class SqrtDNode : public Node {
+public:
+ SqrtDNode(Node *c, Node *in1 ) : Node(c, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------ExpDNode---------------------------------------
+// Exponentiate a double
+class ExpDNode : public Node {
+public:
+ ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------LogDNode---------------------------------------
+// Log_e of a double
+class LogDNode : public Node {
+public:
+ LogDNode( Node *in1 ) : Node(0, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------Log10DNode---------------------------------------
+// Log_10 of a double
+class Log10DNode : public Node {
+public:
+ Log10DNode( Node *in1 ) : Node(0, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------PowDNode---------------------------------------
+// Raise a double to a double power
+class PowDNode : public Node {
+public:
+ PowDNode(Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+ virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//-------------------------------ReverseBytesINode--------------------------------
+// reverse bytes of an integer
+class ReverseBytesINode : public Node {
+public:
+ ReverseBytesINode(Node *c, Node *in1) : Node(c, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//-------------------------------ReverseBytesLNode--------------------------------
+// reverse bytes of a long
+class ReverseBytesLNode : public Node {
+public:
+ ReverseBytesLNode(Node *c, Node *in1) : Node(c, in1) {}
+ virtual int Opcode() const;
+ const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
new file mode 100644
index 000000000..b1467fc9e
--- /dev/null
+++ b/src/share/vm/opto/superword.cpp
@@ -0,0 +1,2025 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_superword.cpp.incl"
+
+//
+// S U P E R W O R D T R A N S F O R M
+//=============================================================================
+
+//------------------------------SuperWord---------------------------
+SuperWord::SuperWord(PhaseIdealLoop* phase) :
+ _phase(phase),
+ _igvn(phase->_igvn),
+ _arena(phase->C->comp_arena()),
+ _packset(arena(), 8, 0, NULL), // packs for the current block
+ _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
+ _block(arena(), 8, 0, NULL), // nodes in current block
+ _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside
+ _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads
+ _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails
+ _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node
+ _align_to_ref(NULL), // memory reference to align vectors to
+ _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs
+ _dg(_arena), // dependence graph
+ _visited(arena()), // visited node set
+ _post_visited(arena()), // post visited node set
+ _n_idx_list(arena(), 8), // scratch list of (node,index) pairs
+ _stk(arena(), 8, 0, NULL), // scratch stack of nodes
+ _nlist(arena(), 8, 0, NULL), // scratch list of nodes
+ _lpt(NULL), // loop tree node
+ _lp(NULL), // LoopNode
+ _bb(NULL), // basic block
+ _iv(NULL) // induction var
+{}
+
+//------------------------------transform_loop---------------------------
+void SuperWord::transform_loop(IdealLoopTree* lpt) {
+ assert(lpt->_head->is_CountedLoop(), "must be");
+ CountedLoopNode *cl = lpt->_head->as_CountedLoop();
+
+ if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
+
+ // Check for no control flow in body (other than exit)
+ Node *cl_exit = cl->loopexit();
+ if (cl_exit->in(0) != lpt->_head) return;
+
+ // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
+ CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
+ if (pre_end == NULL) return;
+ Node *pre_opaq1 = pre_end->limit();
+ if (pre_opaq1->Opcode() != Op_Opaque1) return;
+
+ // Do vectors exist on this architecture?
+ if (vector_width_in_bytes() == 0) return;
+
+ init(); // initialize data structures
+
+ set_lpt(lpt);
+ set_lp(cl);
+
+ // For now, define one block which is the entire loop body
+ set_bb(cl);
+
+ assert(_packset.length() == 0, "packset must be empty");
+ SLP_extract();
+}
+
+//------------------------------SLP_extract---------------------------
+// Extract the superword level parallelism
+//
+// 1) A reverse post-order of nodes in the block is constructed. By scanning
+// this list from first to last, all definitions are visited before their uses.
+//
+// 2) A point-to-point dependence graph is constructed between memory references.
+// This simplies the upcoming "independence" checker.
+//
+// 3) The maximum depth in the node graph from the beginning of the block
+// to each node is computed. This is used to prune the graph search
+// in the independence checker.
+//
+// 4) For integer types, the necessary bit width is propagated backwards
+// from stores to allow packed operations on byte, char, and short
+// integers. This reverses the promotion to type "int" that javac
+// did for operations like: char c1,c2,c3; c1 = c2 + c3.
+//
+// 5) One of the memory references is picked to be an aligned vector reference.
+// The pre-loop trip count is adjusted to align this reference in the
+// unrolled body.
+//
+// 6) The initial set of pack pairs is seeded with memory references.
+//
+// 7) The set of pack pairs is extended by following use->def and def->use links.
+//
+// 8) The pairs are combined into vector sized packs.
+//
+// 9) Reorder the memory slices to co-locate members of the memory packs.
+//
+// 10) Generate ideal vector nodes for the final set of packs and where necessary,
+// inserting scalar promotion, vector creation from multiple scalars, and
+// extraction of scalar values from vectors.
+//
+void SuperWord::SLP_extract() {
+
+ // Ready the block
+
+ construct_bb();
+
+ dependence_graph();
+
+ compute_max_depth();
+
+ compute_vector_element_type();
+
+ // Attempt vectorization
+
+ find_adjacent_refs();
+
+ extend_packlist();
+
+ combine_packs();
+
+ construct_my_pack_map();
+
+ filter_packs();
+
+ schedule();
+
+ output();
+}
+
+//------------------------------find_adjacent_refs---------------------------
+// Find the adjacent memory references and create pack pairs for them.
+// This is the initial set of packs that will then be extended by
+// following use->def and def->use links. The align positions are
+// assigned relative to the reference "align_to_ref"
+void SuperWord::find_adjacent_refs() {
+ // Get list of memory operations
+ Node_List memops;
+ for (int i = 0; i < _block.length(); i++) {
+ Node* n = _block.at(i);
+ if (n->is_Mem() && in_bb(n)) {
+ int align = memory_alignment(n->as_Mem(), 0);
+ if (align != bottom_align) {
+ memops.push(n);
+ }
+ }
+ }
+ if (memops.size() == 0) return;
+
+ // Find a memory reference to align to. The pre-loop trip count
+ // is modified to align this reference to a vector-aligned address
+ find_align_to_ref(memops);
+ if (align_to_ref() == NULL) return;
+
+ SWPointer align_to_ref_p(align_to_ref(), this);
+ int offset = align_to_ref_p.offset_in_bytes();
+ int scale = align_to_ref_p.scale_in_bytes();
+ int vw = vector_width_in_bytes();
+ int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1;
+ int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw;
+
+#ifndef PRODUCT
+ if (TraceSuperWord)
+ tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d",
+ offset, iv_adjustment, align_to_ref_p.memory_size());
+#endif
+
+ // Set alignment relative to "align_to_ref"
+ for (int i = memops.size() - 1; i >= 0; i--) {
+ MemNode* s = memops.at(i)->as_Mem();
+ SWPointer p2(s, this);
+ if (p2.comparable(align_to_ref_p)) {
+ int align = memory_alignment(s, iv_adjustment);
+ set_alignment(s, align);
+ } else {
+ memops.remove(i);
+ }
+ }
+
+ // Create initial pack pairs of memory operations
+ for (uint i = 0; i < memops.size(); i++) {
+ Node* s1 = memops.at(i);
+ for (uint j = 0; j < memops.size(); j++) {
+ Node* s2 = memops.at(j);
+ if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+ int align = alignment(s1);
+ if (stmts_can_pack(s1, s2, align)) {
+ Node_List* pair = new Node_List();
+ pair->push(s1);
+ pair->push(s2);
+ _packset.append(pair);
+ }
+ }
+ }
+ }
+
+#ifndef PRODUCT
+ if (TraceSuperWord) {
+ tty->print_cr("\nAfter find_adjacent_refs");
+ print_packset();
+ }
+#endif
+}
+
+//------------------------------find_align_to_ref---------------------------
+// Find a memory reference to align the loop induction variable to.
+// Looks first at stores then at loads, looking for a memory reference
+// with the largest number of references similar to it.
+void SuperWord::find_align_to_ref(Node_List &memops) {
+ GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
+
+ // Count number of comparable memory ops
+ for (uint i = 0; i < memops.size(); i++) {
+ MemNode* s1 = memops.at(i)->as_Mem();
+ SWPointer p1(s1, this);
+ // Discard if pre loop can't align this reference
+ if (!ref_is_alignable(p1)) {
+ *cmp_ct.adr_at(i) = 0;
+ continue;
+ }
+ for (uint j = i+1; j < memops.size(); j++) {
+ MemNode* s2 = memops.at(j)->as_Mem();
+ if (isomorphic(s1, s2)) {
+ SWPointer p2(s2, this);
+ if (p1.comparable(p2)) {
+ (*cmp_ct.adr_at(i))++;
+ (*cmp_ct.adr_at(j))++;
+ }
+ }
+ }
+ }
+
+ // Find Store (or Load) with the greatest number of "comparable" references
+ int max_ct = 0;
+ int max_idx = -1;
+ int min_size = max_jint;
+ int min_iv_offset = max_jint;
+ for (uint j = 0; j < memops.size(); j++) {
+ MemNode* s = memops.at(j)->as_Mem();
+ if (s->is_Store()) {
+ SWPointer p(s, this);
+ if (cmp_ct.at(j) > max_ct ||
+ cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
+ data_size(s) == min_size &&
+ p.offset_in_bytes() < min_iv_offset)) {
+ max_ct = cmp_ct.at(j);
+ max_idx = j;
+ min_size = data_size(s);
+ min_iv_offset = p.offset_in_bytes();
+ }
+ }
+ }
+ // If no stores, look at loads
+ if (max_ct == 0) {
+ for (uint j = 0; j < memops.size(); j++) {
+ MemNode* s = memops.at(j)->as_Mem();
+ if (s->is_Load()) {
+ SWPointer p(s, this);
+ if (cmp_ct.at(j) > max_ct ||
+ cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
+ data_size(s) == min_size &&
+ p.offset_in_bytes() < min_iv_offset)) {
+ max_ct = cmp_ct.at(j);
+ max_idx = j;
+ min_size = data_size(s);
+ min_iv_offset = p.offset_in_bytes();
+ }
+ }
+ }
+ }
+
+ if (max_ct > 0)
+ set_align_to_ref(memops.at(max_idx)->as_Mem());
+
+#ifndef PRODUCT
+ if (TraceSuperWord && Verbose) {
+ tty->print_cr("\nVector memops after find_align_to_refs");
+ for (uint i = 0; i < memops.size(); i++) {
+ MemNode* s = memops.at(i)->as_Mem();
+ s->dump();
+ }
+ }
+#endif
+}
+
+//------------------------------ref_is_alignable---------------------------
+// Can the preloop align the reference to position zero in the vector?
+bool SuperWord::ref_is_alignable(SWPointer& p) {
+ if (!p.has_iv()) {
+ return true; // no induction variable
+ }
+ CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
+ assert(pre_end->stride_is_con(), "pre loop stride is constant");
+ int preloop_stride = pre_end->stride_con();
+
+ int span = preloop_stride * p.scale_in_bytes();
+
+ // Stride one accesses are alignable.
+ if (ABS(span) == p.memory_size())
+ return true;
+
+ // If initial offset from start of object is computable,
+ // compute alignment within the vector.
+ int vw = vector_width_in_bytes();
+ if (vw % span == 0) {
+ Node* init_nd = pre_end->init_trip();
+ if (init_nd->is_Con() && p.invar() == NULL) {
+ int init = init_nd->bottom_type()->is_int()->get_con();
+
+ int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes();
+ assert(init_offset >= 0, "positive offset from object start");
+
+ if (span > 0) {
+ return (vw - (init_offset % vw)) % span == 0;
+ } else {
+ assert(span < 0, "nonzero stride * scale");
+ return (init_offset % vw) % -span == 0;
+ }
+ }
+ }
+ return false;
+}
+
+//---------------------------dependence_graph---------------------------
+// Construct dependency graph.
+// Add dependence edges to load/store nodes for memory dependence
+// A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
+void SuperWord::dependence_graph() {
+ // First, assign a dependence node to each memory node
+ for (int i = 0; i < _block.length(); i++ ) {
+ Node *n = _block.at(i);
+ if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
+ _dg.make_node(n);
+ }
+ }
+
+ // For each memory slice, create the dependences
+ for (int i = 0; i < _mem_slice_head.length(); i++) {
+ Node* n = _mem_slice_head.at(i);
+ Node* n_tail = _mem_slice_tail.at(i);
+
+ // Get slice in predecessor order (last is first)
+ mem_slice_preds(n_tail, n, _nlist);
+
+ // Make the slice dependent on the root
+ DepMem* slice = _dg.dep(n);
+ _dg.make_edge(_dg.root(), slice);
+
+ // Create a sink for the slice
+ DepMem* slice_sink = _dg.make_node(NULL);
+ _dg.make_edge(slice_sink, _dg.tail());
+
+ // Now visit each pair of memory ops, creating the edges
+ for (int j = _nlist.length() - 1; j >= 0 ; j--) {
+ Node* s1 = _nlist.at(j);
+
+ // If no dependency yet, use slice
+ if (_dg.dep(s1)->in_cnt() == 0) {
+ _dg.make_edge(slice, s1);
+ }
+ SWPointer p1(s1->as_Mem(), this);
+ bool sink_dependent = true;
+ for (int k = j - 1; k >= 0; k--) {
+ Node* s2 = _nlist.at(k);
+ if (s1->is_Load() && s2->is_Load())
+ continue;
+ SWPointer p2(s2->as_Mem(), this);
+
+ int cmp = p1.cmp(p2);
+ if (SuperWordRTDepCheck &&
+ p1.base() != p2.base() && p1.valid() && p2.valid()) {
+ // Create a runtime check to disambiguate
+ OrderedPair pp(p1.base(), p2.base());
+ _disjoint_ptrs.append_if_missing(pp);
+ } else if (!SWPointer::not_equal(cmp)) {
+ // Possibly same address
+ _dg.make_edge(s1, s2);
+ sink_dependent = false;
+ }
+ }
+ if (sink_dependent) {
+ _dg.make_edge(s1, slice_sink);
+ }
+ }
+#ifndef PRODUCT
+ if (TraceSuperWord) {
+ tty->print_cr("\nDependence graph for slice: %d", n->_idx);
+ for (int q = 0; q < _nlist.length(); q++) {
+ _dg.print(_nlist.at(q));
+ }
+ tty->cr();
+ }
+#endif
+ _nlist.clear();
+ }
+
+#ifndef PRODUCT
+ if (TraceSuperWord) {
+ tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");
+ for (int r = 0; r < _disjoint_ptrs.length(); r++) {
+ _disjoint_ptrs.at(r).print();
+ tty->cr();
+ }
+ tty->cr();
+ }
+#endif
+}
+
+//---------------------------mem_slice_preds---------------------------
+// Return a memory slice (node list) in predecessor order starting at "start"
+void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {
+ assert(preds.length() == 0, "start empty");
+ Node* n = start;
+ Node* prev = NULL;
+ while (true) {
+ assert(in_bb(n), "must be in block");
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node* out = n->fast_out(i);
+ if (out->is_Load()) {
+ if (in_bb(out)) {
+ preds.push(out);
+ }
+ } else {
+ // FIXME
+ if (out->is_MergeMem() && !in_bb(out)) {
+ // Either unrolling is causing a memory edge not to disappear,
+ // or need to run igvn.optimize() again before SLP
+ } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
+ // Ditto. Not sure what else to check further.
+ } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) {
+ // StoreCM has an input edge used as a precedence edge.
+ // Maybe an issue when oop stores are vectorized.
+ } else {
+ assert(out == prev || prev == NULL, "no branches off of store slice");
+ }
+ }
+ }
+ if (n == stop) break;
+ preds.push(n);
+ prev = n;
+ n = n->in(MemNode::Memory);
+ }
+}
+
+//------------------------------stmts_can_pack---------------------------
+// Can s1 and s2 be in a pack with s1 immediately preceeding s2 and
+// s1 aligned at "align"
+bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
+ if (isomorphic(s1, s2)) {
+ if (independent(s1, s2)) {
+ if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
+ if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
+ int s1_align = alignment(s1);
+ int s2_align = alignment(s2);
+ if (s1_align == top_align || s1_align == align) {
+ if (s2_align == top_align || s2_align == align + data_size(s1)) {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+//------------------------------exists_at---------------------------
+// Does s exist in a pack at position pos?
+bool SuperWord::exists_at(Node* s, uint pos) {
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ if (p->at(pos) == s) {
+ return true;
+ }
+ }
+ return false;
+}
+
+//------------------------------are_adjacent_refs---------------------------
+// Is s1 immediately before s2 in memory?
+bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
+ if (!s1->is_Mem() || !s2->is_Mem()) return false;
+ if (!in_bb(s1) || !in_bb(s2)) return false;
+ // FIXME - co_locate_pack fails on Stores in different mem-slices, so
+ // only pack memops that are in the same alias set until that's fixed.
+ if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
+ _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
+ return false;
+ SWPointer p1(s1->as_Mem(), this);
+ SWPointer p2(s2->as_Mem(), this);
+ if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
+ int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
+ return diff == data_size(s1);
+}
+
+//------------------------------isomorphic---------------------------
+// Are s1 and s2 similar?
+bool SuperWord::isomorphic(Node* s1, Node* s2) {
+ if (s1->Opcode() != s2->Opcode()) return false;
+ if (s1->req() != s2->req()) return false;
+ if (s1->in(0) != s2->in(0)) return false;
+ if (velt_type(s1) != velt_type(s2)) return false;
+ return true;
+}
+
+//------------------------------independent---------------------------
+// Is there no data path from s1 to s2 or s2 to s1?
+bool SuperWord::independent(Node* s1, Node* s2) {
+ // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
+ int d1 = depth(s1);
+ int d2 = depth(s2);
+ if (d1 == d2) return s1 != s2;
+ Node* deep = d1 > d2 ? s1 : s2;
+ Node* shallow = d1 > d2 ? s2 : s1;
+
+ visited_clear();
+
+ return independent_path(shallow, deep);
+}
+
+//------------------------------independent_path------------------------------
+// Helper for independent
+bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
+ if (dp >= 1000) return false; // stop deep recursion
+ visited_set(deep);
+ int shal_depth = depth(shallow);
+ assert(shal_depth <= depth(deep), "must be");
+ for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
+ Node* pred = preds.current();
+ if (in_bb(pred) && !visited_test(pred)) {
+ if (shallow == pred) {
+ return false;
+ }
+ if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+//------------------------------set_alignment---------------------------
+void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
+ set_alignment(s1, align);
+ set_alignment(s2, align + data_size(s1));
+}
+
+//------------------------------data_size---------------------------
+int SuperWord::data_size(Node* s) {
+ const Type* t = velt_type(s);
+ BasicType bt = t->array_element_basic_type();
+ int bsize = type2aelembytes[bt];
+ assert(bsize != 0, "valid size");
+ return bsize;
+}
+
+//------------------------------extend_packlist---------------------------
+// Extend packset by following use->def and def->use links from pack members.
+void SuperWord::extend_packlist() {
+ bool changed;
+ do {
+ changed = false;
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ changed |= follow_use_defs(p);
+ changed |= follow_def_uses(p);
+ }
+ } while (changed);
+
+#ifndef PRODUCT
+ if (TraceSuperWord) {
+ tty->print_cr("\nAfter extend_packlist");
+ print_packset();
+ }
+#endif
+}
+
+//------------------------------follow_use_defs---------------------------
+// Extend the packset by visiting operand definitions of nodes in pack p
+bool SuperWord::follow_use_defs(Node_List* p) {
+ Node* s1 = p->at(0);
+ Node* s2 = p->at(1);
+ assert(p->size() == 2, "just checking");
+ assert(s1->req() == s2->req(), "just checking");
+ assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
+
+ if (s1->is_Load()) return false;
+
+ int align = alignment(s1);
+ bool changed = false;
+ int start = s1->is_Store() ? MemNode::ValueIn : 1;
+ int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
+ for (int j = start; j < end; j++) {
+ Node* t1 = s1->in(j);
+ Node* t2 = s2->in(j);
+ if (!in_bb(t1) || !in_bb(t2))
+ continue;
+ if (stmts_can_pack(t1, t2, align)) {
+ if (est_savings(t1, t2) >= 0) {
+ Node_List* pair = new Node_List();
+ pair->push(t1);
+ pair->push(t2);
+ _packset.append(pair);
+ set_alignment(t1, t2, align);
+ changed = true;
+ }
+ }
+ }
+ return changed;
+}
+
+//------------------------------follow_def_uses---------------------------
+// Extend the packset by visiting uses of nodes in pack p
+bool SuperWord::follow_def_uses(Node_List* p) {
+ bool changed = false;
+ Node* s1 = p->at(0);
+ Node* s2 = p->at(1);
+ assert(p->size() == 2, "just checking");
+ assert(s1->req() == s2->req(), "just checking");
+ assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
+
+ if (s1->is_Store()) return false;
+
+ int align = alignment(s1);
+ int savings = -1;
+ Node* u1 = NULL;
+ Node* u2 = NULL;
+ for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
+ Node* t1 = s1->fast_out(i);
+ if (!in_bb(t1)) continue;
+ for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
+ Node* t2 = s2->fast_out(j);
+ if (!in_bb(t2)) continue;
+ if (!opnd_positions_match(s1, t1, s2, t2))
+ continue;
+ if (stmts_can_pack(t1, t2, align)) {
+ int my_savings = est_savings(t1, t2);
+ if (my_savings > savings) {
+ savings = my_savings;
+ u1 = t1;
+ u2 = t2;
+ }
+ }
+ }
+ }
+ if (savings >= 0) {
+ Node_List* pair = new Node_List();
+ pair->push(u1);
+ pair->push(u2);
+ _packset.append(pair);
+ set_alignment(u1, u2, align);
+ changed = true;
+ }
+ return changed;
+}
+
+//---------------------------opnd_positions_match-------------------------
+// Is the use of d1 in u1 at the same operand position as d2 in u2?
+bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
+ uint ct = u1->req();
+ if (ct != u2->req()) return false;
+ uint i1 = 0;
+ uint i2 = 0;
+ do {
+ for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
+ for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
+ if (i1 != i2) {
+ return false;
+ }
+ } while (i1 < ct);
+ return true;
+}
+
+//------------------------------est_savings---------------------------
+// Estimate the savings from executing s1 and s2 as a pack
+int SuperWord::est_savings(Node* s1, Node* s2) {
+ int save = 2 - 1; // 2 operations per instruction in packed form
+
+ // inputs
+ for (uint i = 1; i < s1->req(); i++) {
+ Node* x1 = s1->in(i);
+ Node* x2 = s2->in(i);
+ if (x1 != x2) {
+ if (are_adjacent_refs(x1, x2)) {
+ save += adjacent_profit(x1, x2);
+ } else if (!in_packset(x1, x2)) {
+ save -= pack_cost(2);
+ } else {
+ save += unpack_cost(2);
+ }
+ }
+ }
+
+ // uses of result
+ uint ct = 0;
+ for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
+ Node* s1_use = s1->fast_out(i);
+ for (int j = 0; j < _packset.length(); j++) {
+ Node_List* p = _packset.at(j);
+ if (p->at(0) == s1_use) {
+ for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) {
+ Node* s2_use = s2->fast_out(k);
+ if (p->at(p->size()-1) == s2_use) {
+ ct++;
+ if (are_adjacent_refs(s1_use, s2_use)) {
+ save += adjacent_profit(s1_use, s2_use);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (ct < s1->outcnt()) save += unpack_cost(1);
+ if (ct < s2->outcnt()) save += unpack_cost(1);
+
+ return save;
+}
+
+//------------------------------costs---------------------------
+int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
+int SuperWord::pack_cost(int ct) { return ct; }
+int SuperWord::unpack_cost(int ct) { return ct; }
+
+//------------------------------combine_packs---------------------------
+// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
+void SuperWord::combine_packs() {
+ bool changed;
+ do {
+ changed = false;
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p1 = _packset.at(i);
+ if (p1 == NULL) continue;
+ for (int j = 0; j < _packset.length(); j++) {
+ Node_List* p2 = _packset.at(j);
+ if (p2 == NULL) continue;
+ if (p1->at(p1->size()-1) == p2->at(0)) {
+ for (uint k = 1; k < p2->size(); k++) {
+ p1->push(p2->at(k));
+ }
+ _packset.at_put(j, NULL);
+ changed = true;
+ }
+ }
+ }
+ } while (changed);
+
+ for (int i = _packset.length() - 1; i >= 0; i--) {
+ Node_List* p1 = _packset.at(i);
+ if (p1 == NULL) {
+ _packset.remove_at(i);
+ }
+ }
+
+#ifndef PRODUCT
+ if (TraceSuperWord) {
+ tty->print_cr("\nAfter combine_packs");
+ print_packset();
+ }
+#endif
+}
+
+//-----------------------------construct_my_pack_map--------------------------
+// Construct the map from nodes to packs. Only valid after the
+// point where a node is only in one pack (after combine_packs).
+void SuperWord::construct_my_pack_map() {
+ Node_List* rslt = NULL;
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ for (uint j = 0; j < p->size(); j++) {
+ Node* s = p->at(j);
+ assert(my_pack(s) == NULL, "only in one pack");
+ set_my_pack(s, p);
+ }
+ }
+}
+
+//------------------------------filter_packs---------------------------
+// Remove packs that are not implemented or not profitable.
+void SuperWord::filter_packs() {
+
+ // Remove packs that are not implemented
+ for (int i = _packset.length() - 1; i >= 0; i--) {
+ Node_List* pk = _packset.at(i);
+ bool impl = implemented(pk);
+ if (!impl) {
+#ifndef PRODUCT
+ if (TraceSuperWord && Verbose) {
+ tty->print_cr("Unimplemented");
+ pk->at(0)->dump();
+ }
+#endif
+ remove_pack_at(i);
+ }
+ }
+
+ // Remove packs that are not profitable
+ bool changed;
+ do {
+ changed = false;
+ for (int i = _packset.length() - 1; i >= 0; i--) {
+ Node_List* pk = _packset.at(i);
+ bool prof = profitable(pk);
+ if (!prof) {
+#ifndef PRODUCT
+ if (TraceSuperWord && Verbose) {
+ tty->print_cr("Unprofitable");
+ pk->at(0)->dump();
+ }
+#endif
+ remove_pack_at(i);
+ changed = true;
+ }
+ }
+ } while (changed);
+
+#ifndef PRODUCT
+ if (TraceSuperWord) {
+ tty->print_cr("\nAfter filter_packs");
+ print_packset();
+ tty->cr();
+ }
+#endif
+}
+
+//------------------------------implemented---------------------------
+// Can code be generated for pack p?
+bool SuperWord::implemented(Node_List* p) {
+ Node* p0 = p->at(0);
+ int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0));
+ return vopc > 0 && Matcher::has_match_rule(vopc);
+}
+
+//------------------------------profitable---------------------------
+// For pack p, are all operands and all uses (with in the block) vector?
+bool SuperWord::profitable(Node_List* p) {
+ Node* p0 = p->at(0);
+ uint start, end;
+ vector_opd_range(p0, &start, &end);
+
+ // Return false if some input is not vector and inside block
+ for (uint i = start; i < end; i++) {
+ if (!is_vector_use(p0, i)) {
+ // For now, return false if not scalar promotion case (inputs are the same.)
+ // Later, implement PackNode and allow differring, non-vector inputs
+ // (maybe just the ones from outside the block.)
+ Node* p0_def = p0->in(i);
+ for (uint j = 1; j < p->size(); j++) {
+ Node* use = p->at(j);
+ Node* def = use->in(i);
+ if (p0_def != def)
+ return false;
+ }
+ }
+ }
+ if (!p0->is_Store()) {
+ // For now, return false if not all uses are vector.
+ // Later, implement ExtractNode and allow non-vector uses (maybe
+ // just the ones outside the block.)
+ for (uint i = 0; i < p->size(); i++) {
+ Node* def = p->at(i);
+ for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+ Node* use = def->fast_out(j);
+ for (uint k = 0; k < use->req(); k++) {
+ Node* n = use->in(k);
+ if (def == n) {
+ if (!is_vector_use(use, k)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+//------------------------------schedule---------------------------
+// Adjust the memory graph for the packed operations
+void SuperWord::schedule() {
+
+ // Co-locate in the memory graph the members of each memory pack
+ for (int i = 0; i < _packset.length(); i++) {
+ co_locate_pack(_packset.at(i));
+ }
+}
+
+//------------------------------co_locate_pack---------------------------
+// Within a pack, move stores down to the last executed store,
+// and move loads up to the first executed load.
+void SuperWord::co_locate_pack(Node_List* pk) {
+ if (pk->at(0)->is_Store()) {
+ // Push Stores down towards last executed pack member
+ MemNode* first = executed_first(pk)->as_Mem();
+ MemNode* last = executed_last(pk)->as_Mem();
+ MemNode* insert_pt = last;
+ MemNode* current = last->in(MemNode::Memory)->as_Mem();
+ while (true) {
+ assert(in_bb(current), "stay in block");
+ Node* my_mem = current->in(MemNode::Memory);
+ if (in_pack(current, pk)) {
+ // Forward users of my memory state to my input memory state
+ _igvn.hash_delete(current);
+ _igvn.hash_delete(my_mem);
+ for (DUIterator i = current->outs(); current->has_out(i); i++) {
+ Node* use = current->out(i);
+ if (use->is_Mem()) {
+ assert(use->in(MemNode::Memory) == current, "must be");
+ _igvn.hash_delete(use);
+ use->set_req(MemNode::Memory, my_mem);
+ _igvn._worklist.push(use);
+ --i; // deleted this edge; rescan position
+ }
+ }
+ // put current immediately before insert_pt
+ current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory));
+ _igvn.hash_delete(insert_pt);
+ insert_pt->set_req(MemNode::Memory, current);
+ _igvn._worklist.push(insert_pt);
+ _igvn._worklist.push(current);
+ insert_pt = current;
+ }
+ if (current == first) break;
+ current = my_mem->as_Mem();
+ }
+ } else if (pk->at(0)->is_Load()) {
+ // Pull Loads up towards first executed pack member
+ LoadNode* first = executed_first(pk)->as_Load();
+ Node* first_mem = first->in(MemNode::Memory);
+ _igvn.hash_delete(first_mem);
+ // Give each load same memory state as first
+ for (uint i = 0; i < pk->size(); i++) {
+ LoadNode* ld = pk->at(i)->as_Load();
+ _igvn.hash_delete(ld);
+ ld->set_req(MemNode::Memory, first_mem);
+ _igvn._worklist.push(ld);
+ }
+ }
+}
+
+//------------------------------output---------------------------
+// Convert packs into vector node operations
+void SuperWord::output() {
+ if (_packset.length() == 0) return;
+
+ // MUST ENSURE main loop's initial value is properly aligned:
+ // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
+
+ align_initial_loop_index(align_to_ref());
+
+ // Insert extract (unpack) operations for scalar uses
+ for (int i = 0; i < _packset.length(); i++) {
+ insert_extracts(_packset.at(i));
+ }
+
+ for (int i = 0; i < _block.length(); i++) {
+ Node* n = _block.at(i);
+ Node_List* p = my_pack(n);
+ if (p && n == executed_last(p)) {
+ uint vlen = p->size();
+ Node* vn = NULL;
+ Node* low_adr = p->at(0);
+ Node* first = executed_first(p);
+ if (n->is_Load()) {
+ int opc = n->Opcode();
+ Node* ctl = n->in(MemNode::Control);
+ Node* mem = first->in(MemNode::Memory);
+ Node* adr = low_adr->in(MemNode::Address);
+ const TypePtr* atyp = n->adr_type();
+ vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen);
+
+ } else if (n->is_Store()) {
+ // Promote value to be stored to vector
+ VectorNode* val = vector_opd(p, MemNode::ValueIn);
+
+ int opc = n->Opcode();
+ Node* ctl = n->in(MemNode::Control);
+ Node* mem = first->in(MemNode::Memory);
+ Node* adr = low_adr->in(MemNode::Address);
+ const TypePtr* atyp = n->adr_type();
+ vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
+
+ } else if (n->req() == 3) {
+ // Promote operands to vector
+ Node* in1 = vector_opd(p, 1);
+ Node* in2 = vector_opd(p, 2);
+ vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n));
+
+ } else {
+ ShouldNotReachHere();
+ }
+
+ _phase->_igvn.register_new_node_with_optimizer(vn);
+ _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
+ for (uint j = 0; j < p->size(); j++) {
+ Node* pm = p->at(j);
+ _igvn.hash_delete(pm);
+ _igvn.subsume_node(pm, vn);
+ }
+ _igvn._worklist.push(vn);
+ }
+ }
+}
+
+//------------------------------vector_opd---------------------------
+// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
+VectorNode* SuperWord::vector_opd(Node_List* p, int opd_idx) {
+ Node* p0 = p->at(0);
+ uint vlen = p->size();
+ Node* opd = p0->in(opd_idx);
+
+ bool same_opd = true;
+ for (uint i = 1; i < vlen; i++) {
+ Node* pi = p->at(i);
+ Node* in = pi->in(opd_idx);
+ if (opd != in) {
+ same_opd = false;
+ break;
+ }
+ }
+
+ if (same_opd) {
+ if (opd->is_Vector()) {
+ return (VectorNode*)opd; // input is matching vector
+ }
+ // Convert scalar input to vector. Use p0's type because it's container
+ // maybe smaller than the operand's container.
+ const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd);
+ const Type* p0_t = velt_type(p0);
+ if (p0_t->higher_equal(opd_t)) opd_t = p0_t;
+ VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, opd_t);
+
+ _phase->_igvn.register_new_node_with_optimizer(vn);
+ _phase->set_ctrl(vn, _phase->get_ctrl(opd));
+ return vn;
+ }
+
+ // Insert pack operation
+ const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd);
+ PackNode* pk = PackNode::make(_phase->C, opd, opd_t);
+
+ for (uint i = 1; i < vlen; i++) {
+ Node* pi = p->at(i);
+ Node* in = pi->in(opd_idx);
+ assert(my_pack(in) == NULL, "Should already have been unpacked");
+ assert(opd_t == velt_type(!in_bb(in) ? pi : in), "all same type");
+ pk->add_opd(in);
+ }
+ _phase->_igvn.register_new_node_with_optimizer(pk);
+ _phase->set_ctrl(pk, _phase->get_ctrl(opd));
+ return pk;
+}
+
+//------------------------------insert_extracts---------------------------
+// If a use of pack p is not a vector use, then replace the
+// use with an extract operation.
+void SuperWord::insert_extracts(Node_List* p) {
+ if (p->at(0)->is_Store()) return;
+ assert(_n_idx_list.is_empty(), "empty (node,index) list");
+
+ // Inspect each use of each pack member. For each use that is
+ // not a vector use, replace the use with an extract operation.
+
+ for (uint i = 0; i < p->size(); i++) {
+ Node* def = p->at(i);
+ for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+ Node* use = def->fast_out(j);
+ for (uint k = 0; k < use->req(); k++) {
+ Node* n = use->in(k);
+ if (def == n) {
+ if (!is_vector_use(use, k)) {
+ _n_idx_list.push(use, k);
+ }
+ }
+ }
+ }
+ }
+
+ while (_n_idx_list.is_nonempty()) {
+ Node* use = _n_idx_list.node();
+ int idx = _n_idx_list.index();
+ _n_idx_list.pop();
+ Node* def = use->in(idx);
+
+ // Insert extract operation
+ _igvn.hash_delete(def);
+ _igvn.hash_delete(use);
+ int def_pos = alignment(def) / data_size(def);
+ const Type* def_t = velt_type(def);
+
+ Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t);
+ _phase->_igvn.register_new_node_with_optimizer(ex);
+ _phase->set_ctrl(ex, _phase->get_ctrl(def));
+ use->set_req(idx, ex);
+ _igvn._worklist.push(def);
+ _igvn._worklist.push(use);
+
+ bb_insert_after(ex, bb_idx(def));
+ set_velt_type(ex, def_t);
+ }
+}
+
+//------------------------------is_vector_use---------------------------
+// Is use->in(u_idx) a vector use?
+bool SuperWord::is_vector_use(Node* use, int u_idx) {
+ Node_List* u_pk = my_pack(use);
+ if (u_pk == NULL) return false;
+ Node* def = use->in(u_idx);
+ Node_List* d_pk = my_pack(def);
+ if (d_pk == NULL) {
+ // check for scalar promotion
+ Node* n = u_pk->at(0)->in(u_idx);
+ for (uint i = 1; i < u_pk->size(); i++) {
+ if (u_pk->at(i)->in(u_idx) != n) return false;
+ }
+ return true;
+ }
+ if (u_pk->size() != d_pk->size())
+ return false;
+ for (uint i = 0; i < u_pk->size(); i++) {
+ Node* ui = u_pk->at(i);
+ Node* di = d_pk->at(i);
+ if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
+ return false;
+ }
+ return true;
+}
+
+//------------------------------construct_bb---------------------------
+// Construct reverse postorder list of block members
+void SuperWord::construct_bb() {
+ Node* entry = bb();
+
+ assert(_stk.length() == 0, "stk is empty");
+ assert(_block.length() == 0, "block is empty");
+ assert(_data_entry.length() == 0, "data_entry is empty");
+ assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
+ assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
+
+ // Find non-control nodes with no inputs from within block,
+ // create a temporary map from node _idx to bb_idx for use
+ // by the visited and post_visited sets,
+ // and count number of nodes in block.
+ int bb_ct = 0;
+ for (uint i = 0; i < lpt()->_body.size(); i++ ) {
+ Node *n = lpt()->_body.at(i);
+ set_bb_idx(n, i); // Create a temporary map
+ if (in_bb(n)) {
+ bb_ct++;
+ if (!n->is_CFG()) {
+ bool found = false;
+ for (uint j = 0; j < n->req(); j++) {
+ Node* def = n->in(j);
+ if (def && in_bb(def)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ assert(n != entry, "can't be entry");
+ _data_entry.push(n);
+ }
+ }
+ }
+ }
+
+ // Find memory slices (head and tail)
+ for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
+ Node *n = lp()->fast_out(i);
+ if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
+ Node* n_tail = n->in(LoopNode::LoopBackControl);
+ _mem_slice_head.push(n);
+ _mem_slice_tail.push(n_tail);
+ }
+ }
+
+ // Create an RPO list of nodes in block
+
+ visited_clear();
+ post_visited_clear();
+
+ // Push all non-control nodes with no inputs from within block, then control entry
+ for (int j = 0; j < _data_entry.length(); j++) {
+ Node* n = _data_entry.at(j);
+ visited_set(n);
+ _stk.push(n);
+ }
+ visited_set(entry);
+ _stk.push(entry);
+
+ // Do a depth first walk over out edges
+ int rpo_idx = bb_ct - 1;
+ int size;
+ while ((size = _stk.length()) > 0) {
+ Node* n = _stk.top(); // Leave node on stack
+ if (!visited_test_set(n)) {
+ // forward arc in graph
+ } else if (!post_visited_test(n)) {
+ // cross or back arc
+ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+ Node *use = n->fast_out(i);
+ if (in_bb(use) && !visited_test(use) &&
+ // Don't go around backedge
+ (!use->is_Phi() || n == entry)) {
+ _stk.push(use);
+ }
+ }
+ if (_stk.length() == size) {
+ // There were no additional uses, post visit node now
+ _stk.pop(); // Remove node from stack
+ assert(rpo_idx >= 0, "");
+ _block.at_put_grow(rpo_idx, n);
+ rpo_idx--;
+ post_visited_set(n);
+ assert(rpo_idx >= 0 || _stk.is_empty(), "");
+ }
+ } else {
+ _stk.pop(); // Remove post-visited node from stack
+ }
+ }
+
+ // Create real map of block indices for nodes
+ for (int j = 0; j < _block.length(); j++) {
+ Node* n = _block.at(j);
+ set_bb_idx(n, j);
+ }
+
+ initialize_bb(); // Ensure extra info is allocated.
+
+#ifndef PRODUCT
+ if (TraceSuperWord) {
+ print_bb();
+ tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
+ for (int m = 0; m < _data_entry.length(); m++) {
+ tty->print("%3d ", m);
+ _data_entry.at(m)->dump();
+ }
+ tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
+ for (int m = 0; m < _mem_slice_head.length(); m++) {
+ tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
+ tty->print(" "); _mem_slice_tail.at(m)->dump();
+ }
+ }
+#endif
+ assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
+}
+
+//------------------------------initialize_bb---------------------------
+// Initialize per node info
+void SuperWord::initialize_bb() {
+ Node* last = _block.at(_block.length() - 1);
+ grow_node_info(bb_idx(last));
+}
+
+//------------------------------bb_insert_after---------------------------
+// Insert n into block after pos
+void SuperWord::bb_insert_after(Node* n, int pos) {
+ int n_pos = pos + 1;
+ // Make room
+ for (int i = _block.length() - 1; i >= n_pos; i--) {
+ _block.at_put_grow(i+1, _block.at(i));
+ }
+ for (int j = _node_info.length() - 1; j >= n_pos; j--) {
+ _node_info.at_put_grow(j+1, _node_info.at(j));
+ }
+ // Set value
+ _block.at_put_grow(n_pos, n);
+ _node_info.at_put_grow(n_pos, SWNodeInfo::initial);
+ // Adjust map from node->_idx to _block index
+ for (int i = n_pos; i < _block.length(); i++) {
+ set_bb_idx(_block.at(i), i);
+ }
+}
+
+//------------------------------compute_max_depth---------------------------
+// Compute max depth for expressions from beginning of block
+// Use to prune search paths during test for independence.
+void SuperWord::compute_max_depth() {
+ int ct = 0;
+ bool again;
+ do {
+ again = false;
+ for (int i = 0; i < _block.length(); i++) {
+ Node* n = _block.at(i);
+ if (!n->is_Phi()) {
+ int d_orig = depth(n);
+ int d_in = 0;
+ for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
+ Node* pred = preds.current();
+ if (in_bb(pred)) {
+ d_in = MAX2(d_in, depth(pred));
+ }
+ }
+ if (d_in + 1 != d_orig) {
+ set_depth(n, d_in + 1);
+ again = true;
+ }
+ }
+ }
+ ct++;
+ } while (again);
+#ifndef PRODUCT
+ if (TraceSuperWord && Verbose)
+ tty->print_cr("compute_max_depth iterated: %d times", ct);
+#endif
+}
+
+//-------------------------compute_vector_element_type-----------------------
+// Compute necessary vector element type for expressions
+// This propagates backwards a narrower integer type when the
+// upper bits of the value are not needed.
+// Example: char a,b,c; a = b + c;
+// Normally the type of the add is integer, but for packed character
+// operations the type of the add needs to be char.
+void SuperWord::compute_vector_element_type() {
+#ifndef PRODUCT
+ if (TraceSuperWord && Verbose)
+ tty->print_cr("\ncompute_velt_type:");
+#endif
+
+ // Initial type
+ for (int i = 0; i < _block.length(); i++) {
+ Node* n = _block.at(i);
+ const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type())
+ : _igvn.type(n);
+ const Type* vt = container_type(t);
+ set_velt_type(n, vt);
+ }
+
+ // Propagate narrowed type backwards through operations
+ // that don't depend on higher order bits
+ for (int i = _block.length() - 1; i >= 0; i--) {
+ Node* n = _block.at(i);
+ // Only integer types need be examined
+ if (n->bottom_type()->isa_int()) {
+ uint start, end;
+ vector_opd_range(n, &start, &end);
+ const Type* vt = velt_type(n);
+
+ for (uint j = start; j < end; j++) {
+ Node* in = n->in(j);
+ // Don't propagate through a type conversion
+ if (n->bottom_type() != in->bottom_type())
+ continue;
+ switch(in->Opcode()) {
+ case Op_AddI: case Op_AddL:
+ case Op_SubI: case Op_SubL:
+ case Op_MulI: case Op_MulL:
+ case Op_AndI: case Op_AndL:
+ case Op_OrI: case Op_OrL:
+ case Op_XorI: case Op_XorL:
+ case Op_LShiftI: case Op_LShiftL:
+ case Op_CMoveI: case Op_CMoveL:
+ if (in_bb(in)) {
+ bool same_type = true;
+ for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
+ Node *use = in->fast_out(k);
+ if (!in_bb(use) || velt_type(use) != vt) {
+ same_type = false;
+ break;
+ }
+ }
+ if (same_type) {
+ set_velt_type(in, vt);
+ }
+ }
+ }
+ }
+ }
+ }
+#ifndef PRODUCT
+ if (TraceSuperWord && Verbose) {
+ for (int i = 0; i < _block.length(); i++) {
+ Node* n = _block.at(i);
+ velt_type(n)->dump();
+ tty->print("\t");
+ n->dump();
+ }
+ }
+#endif
+}
+
+//------------------------------memory_alignment---------------------------
+// Alignment within a vector memory reference
+int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) {
+ SWPointer p(s, this);
+ if (!p.valid()) {
+ return bottom_align;
+ }
+ int offset = p.offset_in_bytes();
+ offset += iv_adjust_in_bytes;
+ int off_rem = offset % vector_width_in_bytes();
+ int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes();
+ return off_mod;
+}
+
+//---------------------------container_type---------------------------
+// Smallest type containing range of values
+const Type* SuperWord::container_type(const Type* t) {
+ if (t->isa_aryptr()) {
+ t = t->is_aryptr()->elem();
+ }
+ if (t->basic_type() == T_INT) {
+ if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL;
+ if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE;
+ if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR;
+ if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT;
+ return TypeInt::INT;
+ }
+ return t;
+}
+
+//-------------------------vector_opd_range-----------------------
+// (Start, end] half-open range defining which operands are vector
+void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
+ switch (n->Opcode()) {
+ case Op_LoadB: case Op_LoadC:
+ case Op_LoadI: case Op_LoadL:
+ case Op_LoadF: case Op_LoadD:
+ case Op_LoadP:
+ *start = 0;
+ *end = 0;
+ return;
+ case Op_StoreB: case Op_StoreC:
+ case Op_StoreI: case Op_StoreL:
+ case Op_StoreF: case Op_StoreD:
+ case Op_StoreP:
+ *start = MemNode::ValueIn;
+ *end = *start + 1;
+ return;
+ case Op_LShiftI: case Op_LShiftL:
+ *start = 1;
+ *end = 2;
+ return;
+ case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD:
+ *start = 2;
+ *end = n->req();
+ return;
+ }
+ *start = 1;
+ *end = n->req(); // default is all operands
+}
+
+//------------------------------in_packset---------------------------
+// Are s1 and s2 in a pack pair and ordered as s1,s2?
+bool SuperWord::in_packset(Node* s1, Node* s2) {
+ for (int i = 0; i < _packset.length(); i++) {
+ Node_List* p = _packset.at(i);
+ assert(p->size() == 2, "must be");
+ if (p->at(0) == s1 && p->at(p->size()-1) == s2) {
+ return true;
+ }
+ }
+ return false;
+}
+
+//------------------------------in_pack---------------------------
+// Is s in pack p?
+Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
+ for (uint i = 0; i < p->size(); i++) {
+ if (p->at(i) == s) {
+ return p;
+ }
+ }
+ return NULL;
+}
+
+//------------------------------remove_pack_at---------------------------
+// Remove the pack at position pos in the packset
+void SuperWord::remove_pack_at(int pos) {
+ Node_List* p = _packset.at(pos);
+ for (uint i = 0; i < p->size(); i++) {
+ Node* s = p->at(i);
+ set_my_pack(s, NULL);
+ }
+ _packset.remove_at(pos);
+}
+
+//------------------------------executed_first---------------------------
+// Return the node executed first in pack p. Uses the RPO block list
+// to determine order.
+Node* SuperWord::executed_first(Node_List* p) {
+ Node* n = p->at(0);
+ int n_rpo = bb_idx(n);
+ for (uint i = 1; i < p->size(); i++) {
+ Node* s = p->at(i);
+ int s_rpo = bb_idx(s);
+ if (s_rpo < n_rpo) {
+ n = s;
+ n_rpo = s_rpo;
+ }
+ }
+ return n;
+}
+
+//------------------------------executed_last---------------------------
+// Return the node executed last in pack p.
+Node* SuperWord::executed_last(Node_List* p) {
+ Node* n = p->at(0);
+ int n_rpo = bb_idx(n);
+ for (uint i = 1; i < p->size(); i++) {
+ Node* s = p->at(i);
+ int s_rpo = bb_idx(s);
+ if (s_rpo > n_rpo) {
+ n = s;
+ n_rpo = s_rpo;
+ }
+ }
+ return n;
+}
+
+//----------------------------align_initial_loop_index---------------------------
+// Adjust pre-loop limit so that in main loop, a load/store reference
+// to align_to_ref will be a position zero in the vector.
+// (iv + k) mod vector_align == 0
+void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
+ CountedLoopNode *main_head = lp()->as_CountedLoop();
+ assert(main_head->is_main_loop(), "");
+ CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
+ assert(pre_end != NULL, "");
+ Node *pre_opaq1 = pre_end->limit();
+ assert(pre_opaq1->Opcode() == Op_Opaque1, "");
+ Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
+ Node *pre_limit = pre_opaq->in(1);
+
+ // Where we put new limit calculations
+ Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+
+ // Ensure the original loop limit is available from the
+ // pre-loop Opaque1 node.
+ Node *orig_limit = pre_opaq->original_loop_limit();
+ assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
+
+ SWPointer align_to_ref_p(align_to_ref, this);
+
+ // Let l0 == original pre_limit, l == new pre_limit, V == v_align
+ //
+ // For stride > 0
+ // Need l such that l > l0 && (l+k)%V == 0
+ // Find n such that l = (l0 + n)
+ // (l0 + n + k) % V == 0
+ // n = [V - (l0 + k)%V]%V
+ // new limit = l0 + [V - (l0 + k)%V]%V
+ // For stride < 0
+ // Need l such that l < l0 && (l+k)%V == 0
+ // Find n such that l = (l0 - n)
+ // (l0 - n + k) % V == 0
+ // n = (l0 + k)%V
+ // new limit = l0 - (l0 + k)%V
+
+ int elt_size = align_to_ref_p.memory_size();
+ int v_align = vector_width_in_bytes() / elt_size;
+ int k = align_to_ref_p.offset_in_bytes() / elt_size;
+
+ Node *kn = _igvn.intcon(k);
+ Node *limk = new (_phase->C, 3) AddINode(pre_limit, kn);
+ _phase->_igvn.register_new_node_with_optimizer(limk);
+ _phase->set_ctrl(limk, pre_ctrl);
+ if (align_to_ref_p.invar() != NULL) {
+ Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
+ Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt);
+ _phase->_igvn.register_new_node_with_optimizer(aref);
+ _phase->set_ctrl(aref, pre_ctrl);
+ if (!align_to_ref_p.negate_invar()) {
+ limk = new (_phase->C, 3) AddINode(limk, aref);
+ } else {
+ limk = new (_phase->C, 3) SubINode(limk, aref);
+ }
+ _phase->_igvn.register_new_node_with_optimizer(limk);
+ _phase->set_ctrl(limk, pre_ctrl);
+ }
+ Node* va_msk = _igvn.intcon(v_align - 1);
+ Node* n = new (_phase->C, 3) AndINode(limk, va_msk);
+ _phase->_igvn.register_new_node_with_optimizer(n);
+ _phase->set_ctrl(n, pre_ctrl);
+ Node* newlim;
+ if (iv_stride() > 0) {
+ Node* va = _igvn.intcon(v_align);
+ Node* adj = new (_phase->C, 3) SubINode(va, n);
+ _phase->_igvn.register_new_node_with_optimizer(adj);
+ _phase->set_ctrl(adj, pre_ctrl);
+ Node* adj2 = new (_phase->C, 3) AndINode(adj, va_msk);
+ _phase->_igvn.register_new_node_with_optimizer(adj2);
+ _phase->set_ctrl(adj2, pre_ctrl);
+ newlim = new (_phase->C, 3) AddINode(pre_limit, adj2);
+ } else {
+ newlim = new (_phase->C, 3) SubINode(pre_limit, n);
+ }
+ _phase->_igvn.register_new_node_with_optimizer(newlim);
+ _phase->set_ctrl(newlim, pre_ctrl);
+ Node* constrained =
+ (iv_stride() > 0) ? (Node*) new (_phase->C,3) MinINode(newlim, orig_limit)
+ : (Node*) new (_phase->C,3) MaxINode(newlim, orig_limit);
+ _phase->_igvn.register_new_node_with_optimizer(constrained);
+ _phase->set_ctrl(constrained, pre_ctrl);
+ _igvn.hash_delete(pre_opaq);
+ pre_opaq->set_req(1, constrained);
+}
+
+//----------------------------get_pre_loop_end---------------------------
+// Find pre loop end from main loop. Returns null if none.
+CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) {
+ Node *ctrl = cl->in(LoopNode::EntryControl);
+ if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL;
+ Node *iffm = ctrl->in(0);
+ if (!iffm->is_If()) return NULL;
+ Node *p_f = iffm->in(0);
+ if (!p_f->is_IfFalse()) return NULL;
+ if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
+ CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
+ if (!pre_end->loopnode()->is_pre_loop()) return NULL;
+ return pre_end;
+}
+
+
+//------------------------------init---------------------------
+void SuperWord::init() {
+ _dg.init();
+ _packset.clear();
+ _disjoint_ptrs.clear();
+ _block.clear();
+ _data_entry.clear();
+ _mem_slice_head.clear();
+ _mem_slice_tail.clear();
+ _node_info.clear();
+ _align_to_ref = NULL;
+ _lpt = NULL;
+ _lp = NULL;
+ _bb = NULL;
+ _iv = NULL;
+}
+
+//------------------------------print_packset---------------------------
+void SuperWord::print_packset() {
+#ifndef PRODUCT
+ tty->print_cr("packset");
+ for (int i = 0; i < _packset.length(); i++) {
+ tty->print_cr("Pack: %d", i);
+ Node_List* p = _packset.at(i);
+ print_pack(p);
+ }
+#endif
+}
+
+//------------------------------print_pack---------------------------
+void SuperWord::print_pack(Node_List* p) {
+ for (uint i = 0; i < p->size(); i++) {
+ print_stmt(p->at(i));
+ }
+}
+
+//------------------------------print_bb---------------------------
+void SuperWord::print_bb() {
+#ifndef PRODUCT
+ tty->print_cr("\nBlock");
+ for (int i = 0; i < _block.length(); i++) {
+ Node* n = _block.at(i);
+ tty->print("%d ", i);
+ if (n) {
+ n->dump();
+ }
+ }
+#endif
+}
+
+//------------------------------print_stmt---------------------------
+void SuperWord::print_stmt(Node* s) {
+#ifndef PRODUCT
+ tty->print(" align: %d \t", alignment(s));
+ s->dump();
+#endif
+}
+
+//------------------------------blank---------------------------
+char* SuperWord::blank(uint depth) {
+ static char blanks[101];
+ assert(depth < 101, "too deep");
+ for (uint i = 0; i < depth; i++) blanks[i] = ' ';
+ blanks[depth] = '\0';
+ return blanks;
+}
+
+
+//==============================SWPointer===========================
+
+//----------------------------SWPointer------------------------
+SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
+ _mem(mem), _slp(slp), _base(NULL), _adr(NULL),
+ _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
+
+ Node* adr = mem->in(MemNode::Address);
+ if (!adr->is_AddP()) {
+ assert(!valid(), "too complex");
+ return;
+ }
+ // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
+ Node* base = adr->in(AddPNode::Base);
+ for (int i = 0; i < 3; i++) {
+ if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
+ assert(!valid(), "too complex");
+ return;
+ }
+ adr = adr->in(AddPNode::Address);
+ if (base == adr || !adr->is_AddP()) {
+ break; // stop looking at addp's
+ }
+ }
+ _base = base;
+ _adr = adr;
+ assert(valid(), "Usable");
+}
+
+// Following is used to create a temporary object during
+// the pattern match of an address expression.
+SWPointer::SWPointer(SWPointer* p) :
+ _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL),
+ _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}
+
+//------------------------scaled_iv_plus_offset--------------------
+// Match: k*iv + offset
+// where: k is a constant that maybe zero, and
+// offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
+bool SWPointer::scaled_iv_plus_offset(Node* n) {
+ if (scaled_iv(n)) {
+ return true;
+ }
+ if (offset_plus_k(n)) {
+ return true;
+ }
+ int opc = n->Opcode();
+ if (opc == Op_AddI) {
+ if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
+ return true;
+ }
+ if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
+ return true;
+ }
+ } else if (opc == Op_SubI) {
+ if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) {
+ return true;
+ }
+ if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
+ _scale *= -1;
+ return true;
+ }
+ }
+ return false;
+}
+
+//----------------------------scaled_iv------------------------
+// Match: k*iv where k is a constant that's not zero
+bool SWPointer::scaled_iv(Node* n) {
+ if (_scale != 0) {
+ return false; // already found a scale
+ }
+ if (n == iv()) {
+ _scale = 1;
+ return true;
+ }
+ int opc = n->Opcode();
+ if (opc == Op_MulI) {
+ if (n->in(1) == iv() && n->in(2)->is_Con()) {
+ _scale = n->in(2)->get_int();
+ return true;
+ } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
+ _scale = n->in(1)->get_int();
+ return true;
+ }
+ } else if (opc == Op_LShiftI) {
+ if (n->in(1) == iv() && n->in(2)->is_Con()) {
+ _scale = 1 << n->in(2)->get_int();
+ return true;
+ }
+ } else if (opc == Op_ConvI2L) {
+ if (scaled_iv_plus_offset(n->in(1))) {
+ return true;
+ }
+ } else if (opc == Op_LShiftL) {
+ if (!has_iv() && _invar == NULL) {
+ // Need to preserve the current _offset value, so
+ // create a temporary object for this expression subtree.
+ // Hacky, so should re-engineer the address pattern match.
+ SWPointer tmp(this);
+ if (tmp.scaled_iv_plus_offset(n->in(1))) {
+ if (tmp._invar == NULL) {
+ int mult = 1 << n->in(2)->get_int();
+ _scale = tmp._scale * mult;
+ _offset += tmp._offset * mult;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+//----------------------------offset_plus_k------------------------
+// Match: offset is (k [+/- invariant])
+// where k maybe zero and invariant is optional, but not both.
+bool SWPointer::offset_plus_k(Node* n, bool negate) {
+ int opc = n->Opcode();
+ if (opc == Op_ConI) {
+ _offset += negate ? -(n->get_int()) : n->get_int();
+ return true;
+ } else if (opc == Op_ConL) {
+ // Okay if value fits into an int
+ const TypeLong* t = n->find_long_type();
+ if (t->higher_equal(TypeLong::INT)) {
+ jlong loff = n->get_long();
+ jint off = (jint)loff;
+ _offset += negate ? -off : loff;
+ return true;
+ }
+ return false;
+ }
+ if (_invar != NULL) return false; // already have an invariant
+ if (opc == Op_AddI) {
+ if (n->in(2)->is_Con() && invariant(n->in(1))) {
+ _negate_invar = negate;
+ _invar = n->in(1);
+ _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+ return true;
+ } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
+ _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
+ _negate_invar = negate;
+ _invar = n->in(2);
+ return true;
+ }
+ }
+ if (opc == Op_SubI) {
+ if (n->in(2)->is_Con() && invariant(n->in(1))) {
+ _negate_invar = negate;
+ _invar = n->in(1);
+ _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+ return true;
+ } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
+ _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
+ _negate_invar = !negate;
+ _invar = n->in(2);
+ return true;
+ }
+ }
+ if (invariant(n)) {
+ _negate_invar = negate;
+ _invar = n;
+ return true;
+ }
+ return false;
+}
+
+//----------------------------print------------------------
+void SWPointer::print() {
+#ifndef PRODUCT
+ tty->print("base: %d adr: %d scale: %d offset: %d invar: %c%d\n",
+ _base != NULL ? _base->_idx : 0,
+ _adr != NULL ? _adr->_idx : 0,
+ _scale, _offset,
+ _negate_invar?'-':'+',
+ _invar != NULL ? _invar->_idx : 0);
+#endif
+}
+
+// ========================= OrderedPair =====================
+
+const OrderedPair OrderedPair::initial;
+
+// ========================= SWNodeInfo =====================
+
+const SWNodeInfo SWNodeInfo::initial;
+
+
+// ============================ DepGraph ===========================
+
+//------------------------------make_node---------------------------
+// Make a new dependence graph node for an ideal node.
+DepMem* DepGraph::make_node(Node* node) {
+ DepMem* m = new (_arena) DepMem(node);
+ if (node != NULL) {
+ assert(_map.at_grow(node->_idx) == NULL, "one init only");
+ _map.at_put_grow(node->_idx, m);
+ }
+ return m;
+}
+
+//------------------------------make_edge---------------------------
+// Make a new dependence graph edge from dpred -> dsucc
+DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) {
+ DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head());
+ dpred->set_out_head(e);
+ dsucc->set_in_head(e);
+ return e;
+}
+
+// ========================== DepMem ========================
+
+//------------------------------in_cnt---------------------------
+int DepMem::in_cnt() {
+ int ct = 0;
+ for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++;
+ return ct;
+}
+
+//------------------------------out_cnt---------------------------
+int DepMem::out_cnt() {
+ int ct = 0;
+ for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++;
+ return ct;
+}
+
+//------------------------------print-----------------------------
+void DepMem::print() {
+#ifndef PRODUCT
+ tty->print(" DepNode %d (", _node->_idx);
+ for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) {
+ Node* pred = p->pred()->node();
+ tty->print(" %d", pred != NULL ? pred->_idx : 0);
+ }
+ tty->print(") [");
+ for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) {
+ Node* succ = s->succ()->node();
+ tty->print(" %d", succ != NULL ? succ->_idx : 0);
+ }
+ tty->print_cr(" ]");
+#endif
+}
+
+// =========================== DepEdge =========================
+
+//------------------------------DepPreds---------------------------
+void DepEdge::print() {
+#ifndef PRODUCT
+ tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx);
+#endif
+}
+
+// =========================== DepPreds =========================
+// Iterator over predecessor edges in the dependence graph.
+
+//------------------------------DepPreds---------------------------
+DepPreds::DepPreds(Node* n, DepGraph& dg) {
+ _n = n;
+ _done = false;
+ if (_n->is_Store() || _n->is_Load()) {
+ _next_idx = MemNode::Address;
+ _end_idx = n->req();
+ _dep_next = dg.dep(_n)->in_head();
+ } else if (_n->is_Mem()) {
+ _next_idx = 0;
+ _end_idx = 0;
+ _dep_next = dg.dep(_n)->in_head();
+ } else {
+ _next_idx = 1;
+ _end_idx = _n->req();
+ _dep_next = NULL;
+ }
+ next();
+}
+
+//------------------------------next---------------------------
+void DepPreds::next() {
+ if (_dep_next != NULL) {
+ _current = _dep_next->pred()->node();
+ _dep_next = _dep_next->next_in();
+ } else if (_next_idx < _end_idx) {
+ _current = _n->in(_next_idx++);
+ } else {
+ _done = true;
+ }
+}
+
+// =========================== DepSuccs =========================
+// Iterator over successor edges in the dependence graph.
+
+//------------------------------DepSuccs---------------------------
+DepSuccs::DepSuccs(Node* n, DepGraph& dg) {
+ _n = n;
+ _done = false;
+ if (_n->is_Load()) {
+ _next_idx = 0;
+ _end_idx = _n->outcnt();
+ _dep_next = dg.dep(_n)->out_head();
+ } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) {
+ _next_idx = 0;
+ _end_idx = 0;
+ _dep_next = dg.dep(_n)->out_head();
+ } else {
+ _next_idx = 0;
+ _end_idx = _n->outcnt();
+ _dep_next = NULL;
+ }
+ next();
+}
+
+//-------------------------------next---------------------------
+void DepSuccs::next() {
+ if (_dep_next != NULL) {
+ _current = _dep_next->succ()->node();
+ _dep_next = _dep_next->next_out();
+ } else if (_next_idx < _end_idx) {
+ _current = _n->raw_out(_next_idx++);
+ } else {
+ _done = true;
+ }
+}
diff --git a/src/share/vm/opto/superword.hpp b/src/share/vm/opto/superword.hpp
new file mode 100644
index 000000000..b60cc83c1
--- /dev/null
+++ b/src/share/vm/opto/superword.hpp
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+//
+// S U P E R W O R D T R A N S F O R M
+//
+// SuperWords are short, fixed length vectors.
+//
+// Algorithm from:
+//
+// Exploiting SuperWord Level Parallelism with
+// Multimedia Instruction Sets
+// by
+// Samuel Larsen and Saman Amarasighe
+// MIT Laboratory for Computer Science
+// date
+// May 2000
+// published in
+// ACM SIGPLAN Notices
+// Proceedings of ACM PLDI '00, Volume 35 Issue 5
+//
+// Definition 3.1 A Pack is an n-tuple, <s1, ...,sn>, where
+// s1,...,sn are independent isomorphic statements in a basic
+// block.
+//
+// Definition 3.2 A PackSet is a set of Packs.
+//
+// Definition 3.3 A Pair is a Pack of size two, where the
+// first statement is considered the left element, and the
+// second statement is considered the right element.
+
+class SWPointer;
+class OrderedPair;
+
+// ========================= Dependence Graph =====================
+
+class DepMem;
+
+//------------------------------DepEdge---------------------------
+// An edge in the dependence graph. The edges incident to a dependence
+// node are threaded through _next_in for incoming edges and _next_out
+// for outgoing edges.
+class DepEdge : public ResourceObj {
+ protected:
+ DepMem* _pred;
+ DepMem* _succ;
+ DepEdge* _next_in; // list of in edges, null terminated
+ DepEdge* _next_out; // list of out edges, null terminated
+
+ public:
+ DepEdge(DepMem* pred, DepMem* succ, DepEdge* next_in, DepEdge* next_out) :
+ _pred(pred), _succ(succ), _next_in(next_in), _next_out(next_out) {}
+
+ DepEdge* next_in() { return _next_in; }
+ DepEdge* next_out() { return _next_out; }
+ DepMem* pred() { return _pred; }
+ DepMem* succ() { return _succ; }
+
+ void print();
+};
+
+//------------------------------DepMem---------------------------
+// A node in the dependence graph. _in_head starts the threaded list of
+// incoming edges, and _out_head starts the list of outgoing edges.
+class DepMem : public ResourceObj {
+ protected:
+ Node* _node; // Corresponding ideal node
+ DepEdge* _in_head; // Head of list of in edges, null terminated
+ DepEdge* _out_head; // Head of list of out edges, null terminated
+
+ public:
+ DepMem(Node* node) : _node(node), _in_head(NULL), _out_head(NULL) {}
+
+ Node* node() { return _node; }
+ DepEdge* in_head() { return _in_head; }
+ DepEdge* out_head() { return _out_head; }
+ void set_in_head(DepEdge* hd) { _in_head = hd; }
+ void set_out_head(DepEdge* hd) { _out_head = hd; }
+
+ int in_cnt(); // Incoming edge count
+ int out_cnt(); // Outgoing edge count
+
+ void print();
+};
+
+//------------------------------DepGraph---------------------------
+class DepGraph VALUE_OBJ_CLASS_SPEC {
+ protected:
+ Arena* _arena;
+ GrowableArray<DepMem*> _map;
+ DepMem* _root;
+ DepMem* _tail;
+
+ public:
+ DepGraph(Arena* a) : _arena(a), _map(a, 8, 0, NULL) {
+ _root = new (_arena) DepMem(NULL);
+ _tail = new (_arena) DepMem(NULL);
+ }
+
+ DepMem* root() { return _root; }
+ DepMem* tail() { return _tail; }
+
+ // Return dependence node corresponding to an ideal node
+ DepMem* dep(Node* node) { return _map.at(node->_idx); }
+
+ // Make a new dependence graph node for an ideal node.
+ DepMem* make_node(Node* node);
+
+ // Make a new dependence graph edge dprec->dsucc
+ DepEdge* make_edge(DepMem* dpred, DepMem* dsucc);
+
+ DepEdge* make_edge(Node* pred, Node* succ) { return make_edge(dep(pred), dep(succ)); }
+ DepEdge* make_edge(DepMem* pred, Node* succ) { return make_edge(pred, dep(succ)); }
+ DepEdge* make_edge(Node* pred, DepMem* succ) { return make_edge(dep(pred), succ); }
+
+ void init() { _map.clear(); } // initialize
+
+ void print(Node* n) { dep(n)->print(); }
+ void print(DepMem* d) { d->print(); }
+};
+
+//------------------------------DepPreds---------------------------
+// Iterator over predecessors in the dependence graph and
+// non-memory-graph inputs of ideal nodes.
+class DepPreds : public StackObj {
+private:
+ Node* _n;
+ int _next_idx, _end_idx;
+ DepEdge* _dep_next;
+ Node* _current;
+ bool _done;
+
+public:
+ DepPreds(Node* n, DepGraph& dg);
+ Node* current() { return _current; }
+ bool done() { return _done; }
+ void next();
+};
+
+//------------------------------DepSuccs---------------------------
+// Iterator over successors in the dependence graph and
+// non-memory-graph outputs of ideal nodes.
+class DepSuccs : public StackObj {
+private:
+ Node* _n;
+ int _next_idx, _end_idx;
+ DepEdge* _dep_next;
+ Node* _current;
+ bool _done;
+
+public:
+ DepSuccs(Node* n, DepGraph& dg);
+ Node* current() { return _current; }
+ bool done() { return _done; }
+ void next();
+};
+
+
+// ========================= SuperWord =====================
+
+// -----------------------------SWNodeInfo---------------------------------
+// Per node info needed by SuperWord
+class SWNodeInfo VALUE_OBJ_CLASS_SPEC {
+ public:
+ int _alignment; // memory alignment for a node
+ int _depth; // Max expression (DAG) depth from block start
+ const Type* _velt_type; // vector element type
+ Node_List* _my_pack; // pack containing this node
+
+ SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {}
+ static const SWNodeInfo initial;
+};
+
+// -----------------------------SuperWord---------------------------------
+// Transforms scalar operations into packed (superword) operations.
+class SuperWord : public ResourceObj {
+ private:
+ PhaseIdealLoop* _phase;
+ Arena* _arena;
+ PhaseIterGVN &_igvn;
+
+ enum consts { top_align = -1, bottom_align = -666 };
+
+ GrowableArray<Node_List*> _packset; // Packs for the current block
+
+ GrowableArray<int> _bb_idx; // Map from Node _idx to index within block
+
+ GrowableArray<Node*> _block; // Nodes in current block
+ GrowableArray<Node*> _data_entry; // Nodes with all inputs from outside
+ GrowableArray<Node*> _mem_slice_head; // Memory slice head nodes
+ GrowableArray<Node*> _mem_slice_tail; // Memory slice tail nodes
+
+ GrowableArray<SWNodeInfo> _node_info; // Info needed per node
+
+ MemNode* _align_to_ref; // Memory reference that pre-loop will align to
+
+ GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
+
+ DepGraph _dg; // Dependence graph
+
+ // Scratch pads
+ VectorSet _visited; // Visited set
+ VectorSet _post_visited; // Post-visited set
+ Node_Stack _n_idx_list; // List of (node,index) pairs
+ GrowableArray<Node*> _nlist; // List of nodes
+ GrowableArray<Node*> _stk; // Stack of nodes
+
+ public:
+ SuperWord(PhaseIdealLoop* phase);
+
+ void transform_loop(IdealLoopTree* lpt);
+
+ // Accessors for SWPointer
+ PhaseIdealLoop* phase() { return _phase; }
+ IdealLoopTree* lpt() { return _lpt; }
+ PhiNode* iv() { return _iv; }
+
+ private:
+ IdealLoopTree* _lpt; // Current loop tree node
+ LoopNode* _lp; // Current LoopNode
+ Node* _bb; // Current basic block
+ PhiNode* _iv; // Induction var
+
+ // Accessors
+ Arena* arena() { return _arena; }
+
+ Node* bb() { return _bb; }
+ void set_bb(Node* bb) { _bb = bb; }
+
+ void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; }
+
+ LoopNode* lp() { return _lp; }
+ void set_lp(LoopNode* lp) { _lp = lp;
+ _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
+ int iv_stride() { return lp()->as_CountedLoop()->stride_con(); }
+
+ int vector_width_in_bytes() { return Matcher::vector_width_in_bytes(); }
+
+ MemNode* align_to_ref() { return _align_to_ref; }
+ void set_align_to_ref(MemNode* m) { _align_to_ref = m; }
+
+ Node* ctrl(Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; }
+
+ // block accessors
+ bool in_bb(Node* n) { return n != NULL && n->outcnt() > 0 && ctrl(n) == _bb; }
+ int bb_idx(Node* n) { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); }
+ void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); }
+
+ // visited set accessors
+ void visited_clear() { _visited.Clear(); }
+ void visited_set(Node* n) { return _visited.set(bb_idx(n)); }
+ int visited_test(Node* n) { return _visited.test(bb_idx(n)); }
+ int visited_test_set(Node* n) { return _visited.test_set(bb_idx(n)); }
+ void post_visited_clear() { _post_visited.Clear(); }
+ void post_visited_set(Node* n) { return _post_visited.set(bb_idx(n)); }
+ int post_visited_test(Node* n) { return _post_visited.test(bb_idx(n)); }
+
+ // Ensure node_info contains element "i"
+ void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }
+
+ // memory alignment for a node
+ int alignment(Node* n) { return _node_info.adr_at(bb_idx(n))->_alignment; }
+ void set_alignment(Node* n, int a) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
+
+ // Max expression (DAG) depth from beginning of the block for each node
+ int depth(Node* n) { return _node_info.adr_at(bb_idx(n))->_depth; }
+ void set_depth(Node* n, int d) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; }
+
+ // vector element type
+ const Type* velt_type(Node* n) { return _node_info.adr_at(bb_idx(n))->_velt_type; }
+ void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
+
+ // my_pack
+ Node_List* my_pack(Node* n) { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
+ void set_my_pack(Node* n, Node_List* p) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
+
+ // methods
+
+ // Extract the superword level parallelism
+ void SLP_extract();
+ // Find the adjacent memory references and create pack pairs for them.
+ void find_adjacent_refs();
+ // Find a memory reference to align the loop induction variable to.
+ void find_align_to_ref(Node_List &memops);
+ // Can the preloop align the reference to position zero in the vector?
+ bool ref_is_alignable(SWPointer& p);
+ // Construct dependency graph.
+ void dependence_graph();
+ // Return a memory slice (node list) in predecessor order starting at "start"
+ void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds);
+ // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and s1 aligned at "align"
+ bool stmts_can_pack(Node* s1, Node* s2, int align);
+ // Does s exist in a pack at position pos?
+ bool exists_at(Node* s, uint pos);
+ // Is s1 immediately before s2 in memory?
+ bool are_adjacent_refs(Node* s1, Node* s2);
+ // Are s1 and s2 similar?
+ bool isomorphic(Node* s1, Node* s2);
+ // Is there no data path from s1 to s2 or s2 to s1?
+ bool independent(Node* s1, Node* s2);
+ // Helper for independent
+ bool independent_path(Node* shallow, Node* deep, uint dp=0);
+ void set_alignment(Node* s1, Node* s2, int align);
+ int data_size(Node* s);
+ // Extend packset by following use->def and def->use links from pack members.
+ void extend_packlist();
+ // Extend the packset by visiting operand definitions of nodes in pack p
+ bool follow_use_defs(Node_List* p);
+ // Extend the packset by visiting uses of nodes in pack p
+ bool follow_def_uses(Node_List* p);
+ // Estimate the savings from executing s1 and s2 as a pack
+ int est_savings(Node* s1, Node* s2);
+ int adjacent_profit(Node* s1, Node* s2);
+ int pack_cost(int ct);
+ int unpack_cost(int ct);
+ // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
+ void combine_packs();
+ // Construct the map from nodes to packs.
+ void construct_my_pack_map();
+ // Remove packs that are not implemented or not profitable.
+ void filter_packs();
+ // Adjust the memory graph for the packed operations
+ void schedule();
+ // Within a pack, move stores down to the last executed store,
+ // and move loads up to the first executed load.
+ void co_locate_pack(Node_List* p);
+ // Convert packs into vector node operations
+ void output();
+ // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
+ VectorNode* vector_opd(Node_List* p, int opd_idx);
+ // Can code be generated for pack p?
+ bool implemented(Node_List* p);
+ // For pack p, are all operands and all uses (with in the block) vector?
+ bool profitable(Node_List* p);
+ // If a use of pack p is not a vector use, then replace the use with an extract operation.
+ void insert_extracts(Node_List* p);
+ // Is use->in(u_idx) a vector use?
+ bool is_vector_use(Node* use, int u_idx);
+ // Construct reverse postorder list of block members
+ void construct_bb();
+ // Initialize per node info
+ void initialize_bb();
+ // Insert n into block after pos
+ void bb_insert_after(Node* n, int pos);
+ // Compute max depth for expressions from beginning of block
+ void compute_max_depth();
+ // Compute necessary vector element type for expressions
+ void compute_vector_element_type();
+ // Are s1 and s2 in a pack pair and ordered as s1,s2?
+ bool in_packset(Node* s1, Node* s2);
+ // Is s in pack p?
+ Node_List* in_pack(Node* s, Node_List* p);
+ // Remove the pack at position pos in the packset
+ void remove_pack_at(int pos);
+ // Return the node executed first in pack p.
+ Node* executed_first(Node_List* p);
+ // Return the node executed last in pack p.
+ Node* executed_last(Node_List* p);
+ // Alignment within a vector memory reference
+ int memory_alignment(MemNode* s, int iv_adjust_in_bytes);
+ // (Start, end] half-open range defining which operands are vector
+ void vector_opd_range(Node* n, uint* start, uint* end);
+ // Smallest type containing range of values
+ static const Type* container_type(const Type* t);
+ // Adjust pre-loop limit so that in main loop, a load/store reference
+ // to align_to_ref will be a position zero in the vector.
+ void align_initial_loop_index(MemNode* align_to_ref);
+ // Find pre loop end from main loop. Returns null if none.
+ CountedLoopEndNode* get_pre_loop_end(CountedLoopNode *cl);
+ // Is the use of d1 in u1 at the same operand position as d2 in u2?
+ bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2);
+ void init();
+
+ // print methods
+ void print_packset();
+ void print_pack(Node_List* p);
+ void print_bb();
+ void print_stmt(Node* s);
+ char* blank(uint depth);
+};
+
+
+//------------------------------SWPointer---------------------------
+// Information about an address for dependence checking and vector alignment
+class SWPointer VALUE_OBJ_CLASS_SPEC {
+ protected:
+ MemNode* _mem; // My memory reference node
+ SuperWord* _slp; // SuperWord class
+
+ Node* _base; // NULL if unsafe nonheap reference
+ Node* _adr; // address pointer
+ jint _scale; // multipler for iv (in bytes), 0 if no loop iv
+ jint _offset; // constant offset (in bytes)
+ Node* _invar; // invariant offset (in bytes), NULL if none
+ bool _negate_invar; // if true then use: (0 - _invar)
+
+ PhaseIdealLoop* phase() { return _slp->phase(); }
+ IdealLoopTree* lpt() { return _slp->lpt(); }
+ PhiNode* iv() { return _slp->iv(); } // Induction var
+
+ bool invariant(Node* n) {
+ Node *n_c = phase()->get_ctrl(n);
+ return !lpt()->is_member(phase()->get_loop(n_c));
+ }
+
+ // Match: k*iv + offset
+ bool scaled_iv_plus_offset(Node* n);
+ // Match: k*iv where k is a constant that's not zero
+ bool scaled_iv(Node* n);
+ // Match: offset is (k [+/- invariant])
+ bool offset_plus_k(Node* n, bool negate = false);
+
+ public:
+ enum CMP {
+ Less = 1,
+ Greater = 2,
+ Equal = 4,
+ NotEqual = (Less | Greater),
+ NotComparable = (Less | Greater | Equal)
+ };
+
+ SWPointer(MemNode* mem, SuperWord* slp);
+ // Following is used to create a temporary object during
+ // the pattern match of an address expression.
+ SWPointer(SWPointer* p);
+
+ bool valid() { return _adr != NULL; }
+ bool has_iv() { return _scale != 0; }
+
+ Node* base() { return _base; }
+ Node* adr() { return _adr; }
+ int scale_in_bytes() { return _scale; }
+ Node* invar() { return _invar; }
+ bool negate_invar() { return _negate_invar; }
+ int offset_in_bytes() { return _offset; }
+ int memory_size() { return _mem->memory_size(); }
+
+ // Comparable?
+ int cmp(SWPointer& q) {
+ if (valid() && q.valid() &&
+ (_adr == q._adr || _base == _adr && q._base == q._adr) &&
+ _scale == q._scale &&
+ _invar == q._invar &&
+ _negate_invar == q._negate_invar) {
+ bool overlap = q._offset < _offset + memory_size() &&
+ _offset < q._offset + q.memory_size();
+ return overlap ? Equal : (_offset < q._offset ? Less : Greater);
+ } else {
+ return NotComparable;
+ }
+ }
+
+ bool not_equal(SWPointer& q) { return not_equal(cmp(q)); }
+ bool equal(SWPointer& q) { return equal(cmp(q)); }
+ bool comparable(SWPointer& q) { return comparable(cmp(q)); }
+ static bool not_equal(int cmp) { return cmp <= NotEqual; }
+ static bool equal(int cmp) { return cmp == Equal; }
+ static bool comparable(int cmp) { return cmp < NotComparable; }
+
+ void print();
+};
+
+
+//------------------------------OrderedPair---------------------------
+// Ordered pair of Node*.
+class OrderedPair VALUE_OBJ_CLASS_SPEC {
+ protected:
+ Node* _p1;
+ Node* _p2;
+ public:
+ OrderedPair() : _p1(NULL), _p2(NULL) {}
+ OrderedPair(Node* p1, Node* p2) {
+ if (p1->_idx < p2->_idx) {
+ _p1 = p1; _p2 = p2;
+ } else {
+ _p1 = p2; _p2 = p1;
+ }
+ }
+
+ bool operator==(const OrderedPair &rhs) {
+ return _p1 == rhs._p1 && _p2 == rhs._p2;
+ }
+ void print() { tty->print(" (%d, %d)", _p1->_idx, _p2->_idx); }
+
+ static const OrderedPair initial;
+};
diff --git a/src/share/vm/opto/type.cpp b/src/share/vm/opto/type.cpp
new file mode 100644
index 000000000..e396c9732
--- /dev/null
+++ b/src/share/vm/opto/type.cpp
@@ -0,0 +1,3751 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_type.cpp.incl"
+
+// Dictionary of types shared among compilations.
+Dict* Type::_shared_type_dict = NULL;
+
+// Array which maps compiler types to Basic Types
+const BasicType Type::_basic_type[Type::lastype] = {
+ T_ILLEGAL, // Bad
+ T_ILLEGAL, // Control
+ T_VOID, // Top
+ T_INT, // Int
+ T_LONG, // Long
+ T_VOID, // Half
+
+ T_ILLEGAL, // Tuple
+ T_ARRAY, // Array
+
+ T_ADDRESS, // AnyPtr // shows up in factory methods for NULL_PTR
+ T_ADDRESS, // RawPtr
+ T_OBJECT, // OopPtr
+ T_OBJECT, // InstPtr
+ T_OBJECT, // AryPtr
+ T_OBJECT, // KlassPtr
+
+ T_OBJECT, // Function
+ T_ILLEGAL, // Abio
+ T_ADDRESS, // Return_Address
+ T_ILLEGAL, // Memory
+ T_FLOAT, // FloatTop
+ T_FLOAT, // FloatCon
+ T_FLOAT, // FloatBot
+ T_DOUBLE, // DoubleTop
+ T_DOUBLE, // DoubleCon
+ T_DOUBLE, // DoubleBot
+ T_ILLEGAL, // Bottom
+};
+
+// Map ideal registers (machine types) to ideal types
+const Type *Type::mreg2type[_last_machine_leaf];
+
+// Map basic types to canonical Type* pointers.
+const Type* Type:: _const_basic_type[T_CONFLICT+1];
+
+// Map basic types to constant-zero Types.
+const Type* Type:: _zero_type[T_CONFLICT+1];
+
+// Map basic types to array-body alias types.
+const TypeAryPtr* TypeAryPtr::_array_body_type[T_CONFLICT+1];
+
+//=============================================================================
+// Convenience common pre-built types.
+const Type *Type::ABIO; // State-of-machine only
+const Type *Type::BOTTOM; // All values
+const Type *Type::CONTROL; // Control only
+const Type *Type::DOUBLE; // All doubles
+const Type *Type::FLOAT; // All floats
+const Type *Type::HALF; // Placeholder half of doublewide type
+const Type *Type::MEMORY; // Abstract store only
+const Type *Type::RETURN_ADDRESS;
+const Type *Type::TOP; // No values in set
+
+//------------------------------get_const_type---------------------------
+const Type* Type::get_const_type(ciType* type) {
+ if (type == NULL) {
+ return NULL;
+ } else if (type->is_primitive_type()) {
+ return get_const_basic_type(type->basic_type());
+ } else {
+ return TypeOopPtr::make_from_klass(type->as_klass());
+ }
+}
+
+//---------------------------array_element_basic_type---------------------------------
+// Mapping to the array element's basic type.
+BasicType Type::array_element_basic_type() const {
+ BasicType bt = basic_type();
+ if (bt == T_INT) {
+ if (this == TypeInt::INT) return T_INT;
+ if (this == TypeInt::CHAR) return T_CHAR;
+ if (this == TypeInt::BYTE) return T_BYTE;
+ if (this == TypeInt::BOOL) return T_BOOLEAN;
+ if (this == TypeInt::SHORT) return T_SHORT;
+ return T_VOID;
+ }
+ return bt;
+}
+
+//---------------------------get_typeflow_type---------------------------------
+// Import a type produced by ciTypeFlow.
+const Type* Type::get_typeflow_type(ciType* type) {
+ switch (type->basic_type()) {
+
+ case ciTypeFlow::StateVector::T_BOTTOM:
+ assert(type == ciTypeFlow::StateVector::bottom_type(), "");
+ return Type::BOTTOM;
+
+ case ciTypeFlow::StateVector::T_TOP:
+ assert(type == ciTypeFlow::StateVector::top_type(), "");
+ return Type::TOP;
+
+ case ciTypeFlow::StateVector::T_NULL:
+ assert(type == ciTypeFlow::StateVector::null_type(), "");
+ return TypePtr::NULL_PTR;
+
+ case ciTypeFlow::StateVector::T_LONG2:
+ // The ciTypeFlow pass pushes a long, then the half.
+ // We do the same.
+ assert(type == ciTypeFlow::StateVector::long2_type(), "");
+ return TypeInt::TOP;
+
+ case ciTypeFlow::StateVector::T_DOUBLE2:
+ // The ciTypeFlow pass pushes double, then the half.
+ // Our convention is the same.
+ assert(type == ciTypeFlow::StateVector::double2_type(), "");
+ return Type::TOP;
+
+ case T_ADDRESS:
+ assert(type->is_return_address(), "");
+ return TypeRawPtr::make((address)(intptr_t)type->as_return_address()->bci());
+
+ default:
+ // make sure we did not mix up the cases:
+ assert(type != ciTypeFlow::StateVector::bottom_type(), "");
+ assert(type != ciTypeFlow::StateVector::top_type(), "");
+ assert(type != ciTypeFlow::StateVector::null_type(), "");
+ assert(type != ciTypeFlow::StateVector::long2_type(), "");
+ assert(type != ciTypeFlow::StateVector::double2_type(), "");
+ assert(!type->is_return_address(), "");
+
+ return Type::get_const_type(type);
+ }
+}
+
+
+//------------------------------make-------------------------------------------
+// Create a simple Type, with default empty symbol sets. Then hashcons it
+// and look for an existing copy in the type dictionary.
+const Type *Type::make( enum TYPES t ) {
+ return (new Type(t))->hashcons();
+}
+
+//------------------------------cmp--------------------------------------------
+int Type::cmp( const Type *const t1, const Type *const t2 ) {
+ if( t1->_base != t2->_base )
+ return 1; // Missed badly
+ assert(t1 != t2 || t1->eq(t2), "eq must be reflexive");
+ return !t1->eq(t2); // Return ZERO if equal
+}
+
+//------------------------------hash-------------------------------------------
+int Type::uhash( const Type *const t ) {
+ return t->hash();
+}
+
+//--------------------------Initialize_shared----------------------------------
+void Type::Initialize_shared(Compile* current) {
+ // This method does not need to be locked because the first system
+ // compilations (stub compilations) occur serially. If they are
+ // changed to proceed in parallel, then this section will need
+ // locking.
+
+ Arena* save = current->type_arena();
+ Arena* shared_type_arena = new Arena();
+
+ current->set_type_arena(shared_type_arena);
+ _shared_type_dict =
+ new (shared_type_arena) Dict( (CmpKey)Type::cmp, (Hash)Type::uhash,
+ shared_type_arena, 128 );
+ current->set_type_dict(_shared_type_dict);
+
+ // Make shared pre-built types.
+ CONTROL = make(Control); // Control only
+ TOP = make(Top); // No values in set
+ MEMORY = make(Memory); // Abstract store only
+ ABIO = make(Abio); // State-of-machine only
+ RETURN_ADDRESS=make(Return_Address);
+ FLOAT = make(FloatBot); // All floats
+ DOUBLE = make(DoubleBot); // All doubles
+ BOTTOM = make(Bottom); // Everything
+ HALF = make(Half); // Placeholder half of doublewide type
+
+ TypeF::ZERO = TypeF::make(0.0); // Float 0 (positive zero)
+ TypeF::ONE = TypeF::make(1.0); // Float 1
+
+ TypeD::ZERO = TypeD::make(0.0); // Double 0 (positive zero)
+ TypeD::ONE = TypeD::make(1.0); // Double 1
+
+ TypeInt::MINUS_1 = TypeInt::make(-1); // -1
+ TypeInt::ZERO = TypeInt::make( 0); // 0
+ TypeInt::ONE = TypeInt::make( 1); // 1
+ TypeInt::BOOL = TypeInt::make(0,1, WidenMin); // 0 or 1, FALSE or TRUE.
+ TypeInt::CC = TypeInt::make(-1, 1, WidenMin); // -1, 0 or 1, condition codes
+ TypeInt::CC_LT = TypeInt::make(-1,-1, WidenMin); // == TypeInt::MINUS_1
+ TypeInt::CC_GT = TypeInt::make( 1, 1, WidenMin); // == TypeInt::ONE
+ TypeInt::CC_EQ = TypeInt::make( 0, 0, WidenMin); // == TypeInt::ZERO
+ TypeInt::CC_LE = TypeInt::make(-1, 0, WidenMin);
+ TypeInt::CC_GE = TypeInt::make( 0, 1, WidenMin); // == TypeInt::BOOL
+ TypeInt::BYTE = TypeInt::make(-128,127, WidenMin); // Bytes
+ TypeInt::CHAR = TypeInt::make(0,65535, WidenMin); // Java chars
+ TypeInt::SHORT = TypeInt::make(-32768,32767, WidenMin); // Java shorts
+ TypeInt::POS = TypeInt::make(0,max_jint, WidenMin); // Non-neg values
+ TypeInt::POS1 = TypeInt::make(1,max_jint, WidenMin); // Positive values
+ TypeInt::INT = TypeInt::make(min_jint,max_jint, WidenMax); // 32-bit integers
+ TypeInt::SYMINT = TypeInt::make(-max_jint,max_jint,WidenMin); // symmetric range
+ // CmpL is overloaded both as the bytecode computation returning
+ // a trinary (-1,0,+1) integer result AND as an efficient long
+ // compare returning optimizer ideal-type flags.
+ assert( TypeInt::CC_LT == TypeInt::MINUS_1, "types must match for CmpL to work" );
+ assert( TypeInt::CC_GT == TypeInt::ONE, "types must match for CmpL to work" );
+ assert( TypeInt::CC_EQ == TypeInt::ZERO, "types must match for CmpL to work" );
+ assert( TypeInt::CC_GE == TypeInt::BOOL, "types must match for CmpL to work" );
+
+ TypeLong::MINUS_1 = TypeLong::make(-1); // -1
+ TypeLong::ZERO = TypeLong::make( 0); // 0
+ TypeLong::ONE = TypeLong::make( 1); // 1
+ TypeLong::POS = TypeLong::make(0,max_jlong, WidenMin); // Non-neg values
+ TypeLong::LONG = TypeLong::make(min_jlong,max_jlong,WidenMax); // 64-bit integers
+ TypeLong::INT = TypeLong::make((jlong)min_jint,(jlong)max_jint,WidenMin);
+ TypeLong::UINT = TypeLong::make(0,(jlong)max_juint,WidenMin);
+
+ const Type **fboth =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+ fboth[0] = Type::CONTROL;
+ fboth[1] = Type::CONTROL;
+ TypeTuple::IFBOTH = TypeTuple::make( 2, fboth );
+
+ const Type **ffalse =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+ ffalse[0] = Type::CONTROL;
+ ffalse[1] = Type::TOP;
+ TypeTuple::IFFALSE = TypeTuple::make( 2, ffalse );
+
+ const Type **fneither =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+ fneither[0] = Type::TOP;
+ fneither[1] = Type::TOP;
+ TypeTuple::IFNEITHER = TypeTuple::make( 2, fneither );
+
+ const Type **ftrue =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+ ftrue[0] = Type::TOP;
+ ftrue[1] = Type::CONTROL;
+ TypeTuple::IFTRUE = TypeTuple::make( 2, ftrue );
+
+ const Type **floop =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+ floop[0] = Type::CONTROL;
+ floop[1] = TypeInt::INT;
+ TypeTuple::LOOPBODY = TypeTuple::make( 2, floop );
+
+ TypePtr::NULL_PTR= TypePtr::make( AnyPtr, TypePtr::Null, 0 );
+ TypePtr::NOTNULL = TypePtr::make( AnyPtr, TypePtr::NotNull, OffsetBot );
+ TypePtr::BOTTOM = TypePtr::make( AnyPtr, TypePtr::BotPTR, OffsetBot );
+
+ TypeRawPtr::BOTTOM = TypeRawPtr::make( TypePtr::BotPTR );
+ TypeRawPtr::NOTNULL= TypeRawPtr::make( TypePtr::NotNull );
+
+ mreg2type[Op_Node] = Type::BOTTOM;
+ mreg2type[Op_Set ] = 0;
+ mreg2type[Op_RegI] = TypeInt::INT;
+ mreg2type[Op_RegP] = TypePtr::BOTTOM;
+ mreg2type[Op_RegF] = Type::FLOAT;
+ mreg2type[Op_RegD] = Type::DOUBLE;
+ mreg2type[Op_RegL] = TypeLong::LONG;
+ mreg2type[Op_RegFlags] = TypeInt::CC;
+
+ const Type **fmembar = TypeTuple::fields(0);
+ TypeTuple::MEMBAR = TypeTuple::make(TypeFunc::Parms+0, fmembar);
+
+ const Type **fsc = (const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+ fsc[0] = TypeInt::CC;
+ fsc[1] = Type::MEMORY;
+ TypeTuple::STORECONDITIONAL = TypeTuple::make(2, fsc);
+
+ TypeInstPtr::NOTNULL = TypeInstPtr::make(TypePtr::NotNull, current->env()->Object_klass());
+ TypeInstPtr::BOTTOM = TypeInstPtr::make(TypePtr::BotPTR, current->env()->Object_klass());
+ TypeInstPtr::MIRROR = TypeInstPtr::make(TypePtr::NotNull, current->env()->Class_klass());
+ TypeInstPtr::MARK = TypeInstPtr::make(TypePtr::BotPTR, current->env()->Object_klass(),
+ false, 0, oopDesc::mark_offset_in_bytes());
+ TypeInstPtr::KLASS = TypeInstPtr::make(TypePtr::BotPTR, current->env()->Object_klass(),
+ false, 0, oopDesc::klass_offset_in_bytes());
+ TypeOopPtr::BOTTOM = TypeOopPtr::make(TypePtr::BotPTR, OffsetBot);
+
+ TypeAryPtr::RANGE = TypeAryPtr::make( TypePtr::BotPTR, TypeAry::make(Type::BOTTOM,TypeInt::POS), current->env()->Object_klass(), false, arrayOopDesc::length_offset_in_bytes());
+ // There is no shared klass for Object[]. See note in TypeAryPtr::klass().
+ TypeAryPtr::OOPS = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInstPtr::BOTTOM,TypeInt::POS), NULL /*ciArrayKlass::make(o)*/, false, Type::OffsetBot);
+ TypeAryPtr::BYTES = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::BYTE ,TypeInt::POS), ciTypeArrayKlass::make(T_BYTE), true, Type::OffsetBot);
+ TypeAryPtr::SHORTS = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::SHORT ,TypeInt::POS), ciTypeArrayKlass::make(T_SHORT), true, Type::OffsetBot);
+ TypeAryPtr::CHARS = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::CHAR ,TypeInt::POS), ciTypeArrayKlass::make(T_CHAR), true, Type::OffsetBot);
+ TypeAryPtr::INTS = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::INT ,TypeInt::POS), ciTypeArrayKlass::make(T_INT), true, Type::OffsetBot);
+ TypeAryPtr::LONGS = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeLong::LONG ,TypeInt::POS), ciTypeArrayKlass::make(T_LONG), true, Type::OffsetBot);
+ TypeAryPtr::FLOATS = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(Type::FLOAT ,TypeInt::POS), ciTypeArrayKlass::make(T_FLOAT), true, Type::OffsetBot);
+ TypeAryPtr::DOUBLES = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(Type::DOUBLE ,TypeInt::POS), ciTypeArrayKlass::make(T_DOUBLE), true, Type::OffsetBot);
+
+ TypeAryPtr::_array_body_type[T_OBJECT] = TypeAryPtr::OOPS;
+ TypeAryPtr::_array_body_type[T_ARRAY] = TypeAryPtr::OOPS; // arrays are stored in oop arrays
+ TypeAryPtr::_array_body_type[T_BYTE] = TypeAryPtr::BYTES;
+ TypeAryPtr::_array_body_type[T_BOOLEAN] = TypeAryPtr::BYTES; // boolean[] is a byte array
+ TypeAryPtr::_array_body_type[T_SHORT] = TypeAryPtr::SHORTS;
+ TypeAryPtr::_array_body_type[T_CHAR] = TypeAryPtr::CHARS;
+ TypeAryPtr::_array_body_type[T_INT] = TypeAryPtr::INTS;
+ TypeAryPtr::_array_body_type[T_LONG] = TypeAryPtr::LONGS;
+ TypeAryPtr::_array_body_type[T_FLOAT] = TypeAryPtr::FLOATS;
+ TypeAryPtr::_array_body_type[T_DOUBLE] = TypeAryPtr::DOUBLES;
+
+ TypeKlassPtr::OBJECT = TypeKlassPtr::make( TypePtr::NotNull, current->env()->Object_klass(), 0 );
+ TypeKlassPtr::OBJECT_OR_NULL = TypeKlassPtr::make( TypePtr::BotPTR, current->env()->Object_klass(), 0 );
+
+ const Type **fi2c = TypeTuple::fields(2);
+ fi2c[TypeFunc::Parms+0] = TypeInstPtr::BOTTOM; // methodOop
+ fi2c[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // argument pointer
+ TypeTuple::START_I2C = TypeTuple::make(TypeFunc::Parms+2, fi2c);
+
+ const Type **intpair = TypeTuple::fields(2);
+ intpair[0] = TypeInt::INT;
+ intpair[1] = TypeInt::INT;
+ TypeTuple::INT_PAIR = TypeTuple::make(2, intpair);
+
+ const Type **longpair = TypeTuple::fields(2);
+ longpair[0] = TypeLong::LONG;
+ longpair[1] = TypeLong::LONG;
+ TypeTuple::LONG_PAIR = TypeTuple::make(2, longpair);
+
+ _const_basic_type[T_BOOLEAN] = TypeInt::BOOL;
+ _const_basic_type[T_CHAR] = TypeInt::CHAR;
+ _const_basic_type[T_BYTE] = TypeInt::BYTE;
+ _const_basic_type[T_SHORT] = TypeInt::SHORT;
+ _const_basic_type[T_INT] = TypeInt::INT;
+ _const_basic_type[T_LONG] = TypeLong::LONG;
+ _const_basic_type[T_FLOAT] = Type::FLOAT;
+ _const_basic_type[T_DOUBLE] = Type::DOUBLE;
+ _const_basic_type[T_OBJECT] = TypeInstPtr::BOTTOM;
+ _const_basic_type[T_ARRAY] = TypeInstPtr::BOTTOM; // there is no separate bottom for arrays
+ _const_basic_type[T_VOID] = TypePtr::NULL_PTR; // reflection represents void this way
+ _const_basic_type[T_ADDRESS] = TypeRawPtr::BOTTOM; // both interpreter return addresses & random raw ptrs
+ _const_basic_type[T_CONFLICT]= Type::BOTTOM; // why not?
+
+ _zero_type[T_BOOLEAN] = TypeInt::ZERO; // false == 0
+ _zero_type[T_CHAR] = TypeInt::ZERO; // '\0' == 0
+ _zero_type[T_BYTE] = TypeInt::ZERO; // 0x00 == 0
+ _zero_type[T_SHORT] = TypeInt::ZERO; // 0x0000 == 0
+ _zero_type[T_INT] = TypeInt::ZERO;
+ _zero_type[T_LONG] = TypeLong::ZERO;
+ _zero_type[T_FLOAT] = TypeF::ZERO;
+ _zero_type[T_DOUBLE] = TypeD::ZERO;
+ _zero_type[T_OBJECT] = TypePtr::NULL_PTR;
+ _zero_type[T_ARRAY] = TypePtr::NULL_PTR; // null array is null oop
+ _zero_type[T_ADDRESS] = TypePtr::NULL_PTR; // raw pointers use the same null
+ _zero_type[T_VOID] = Type::TOP; // the only void value is no value at all
+
+ // get_zero_type() should not happen for T_CONFLICT
+ _zero_type[T_CONFLICT]= NULL;
+
+ // Restore working type arena.
+ current->set_type_arena(save);
+ current->set_type_dict(NULL);
+}
+
+//------------------------------Initialize-------------------------------------
+void Type::Initialize(Compile* current) {
+ assert(current->type_arena() != NULL, "must have created type arena");
+
+ if (_shared_type_dict == NULL) {
+ Initialize_shared(current);
+ }
+
+ Arena* type_arena = current->type_arena();
+
+ // Create the hash-cons'ing dictionary with top-level storage allocation
+ Dict *tdic = new (type_arena) Dict( (CmpKey)Type::cmp,(Hash)Type::uhash, type_arena, 128 );
+ current->set_type_dict(tdic);
+
+ // Transfer the shared types.
+ DictI i(_shared_type_dict);
+ for( ; i.test(); ++i ) {
+ Type* t = (Type*)i._value;
+ tdic->Insert(t,t); // New Type, insert into Type table
+ }
+}
+
+//------------------------------hashcons---------------------------------------
+// Do the hash-cons trick. If the Type already exists in the type table,
+// delete the current Type and return the existing Type. Otherwise stick the
+// current Type in the Type table.
+const Type *Type::hashcons(void) {
+ debug_only(base()); // Check the assertion in Type::base().
+ // Look up the Type in the Type dictionary
+ Dict *tdic = type_dict();
+ Type* old = (Type*)(tdic->Insert(this, this, false));
+ if( old ) { // Pre-existing Type?
+ if( old != this ) // Yes, this guy is not the pre-existing?
+ delete this; // Yes, Nuke this guy
+ assert( old->_dual, "" );
+ return old; // Return pre-existing
+ }
+
+ // Every type has a dual (to make my lattice symmetric).
+ // Since we just discovered a new Type, compute its dual right now.
+ assert( !_dual, "" ); // No dual yet
+ _dual = xdual(); // Compute the dual
+ if( cmp(this,_dual)==0 ) { // Handle self-symmetric
+ _dual = this;
+ return this;
+ }
+ assert( !_dual->_dual, "" ); // No reverse dual yet
+ assert( !(*tdic)[_dual], "" ); // Dual not in type system either
+ // New Type, insert into Type table
+ tdic->Insert((void*)_dual,(void*)_dual);
+ ((Type*)_dual)->_dual = this; // Finish up being symmetric
+#ifdef ASSERT
+ Type *dual_dual = (Type*)_dual->xdual();
+ assert( eq(dual_dual), "xdual(xdual()) should be identity" );
+ delete dual_dual;
+#endif
+ return this; // Return new Type
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool Type::eq( const Type * ) const {
+ return true; // Nothing else can go wrong
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int Type::hash(void) const {
+ return _base;
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool Type::is_finite() const {
+ return false;
+}
+
+//------------------------------is_nan-----------------------------------------
+// Is not a number (NaN)
+bool Type::is_nan() const {
+ return false;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. NOT virtual. It enforces that meet is
+// commutative and the lattice is symmetric.
+const Type *Type::meet( const Type *t ) const {
+ const Type *mt = xmeet(t);
+#ifdef ASSERT
+ assert( mt == t->xmeet(this), "meet not commutative" );
+ const Type* dual_join = mt->_dual;
+ const Type *t2t = dual_join->xmeet(t->_dual);
+ const Type *t2this = dual_join->xmeet( _dual);
+
+ // Interface meet Oop is Not Symmetric:
+ // Interface:AnyNull meet Oop:AnyNull == Interface:AnyNull
+ // Interface:NotNull meet Oop:NotNull == java/lang/Object:NotNull
+ const TypeInstPtr* this_inst = this->isa_instptr();
+ const TypeInstPtr* t_inst = t->isa_instptr();
+ bool interface_vs_oop = false;
+ if( this_inst && this_inst->is_loaded() && t_inst && t_inst->is_loaded() ) {
+ bool this_interface = this_inst->klass()->is_interface();
+ bool t_interface = t_inst->klass()->is_interface();
+ interface_vs_oop = this_interface ^ t_interface;
+ }
+ const Type *tdual = t->_dual;
+ const Type *thisdual = _dual;
+ // strip out instances
+ if (t2t->isa_oopptr() != NULL) {
+ t2t = t2t->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+ }
+ if (t2this->isa_oopptr() != NULL) {
+ t2this = t2this->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+ }
+ if (tdual->isa_oopptr() != NULL) {
+ tdual = tdual->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+ }
+ if (thisdual->isa_oopptr() != NULL) {
+ thisdual = thisdual->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+ }
+
+ if( !interface_vs_oop && (t2t != tdual || t2this != thisdual) ) {
+ tty->print_cr("=== Meet Not Symmetric ===");
+ tty->print("t = "); t->dump(); tty->cr();
+ tty->print("this= "); dump(); tty->cr();
+ tty->print("mt=(t meet this)= "); mt->dump(); tty->cr();
+
+ tty->print("t_dual= "); t->_dual->dump(); tty->cr();
+ tty->print("this_dual= "); _dual->dump(); tty->cr();
+ tty->print("mt_dual= "); mt->_dual->dump(); tty->cr();
+
+ tty->print("mt_dual meet t_dual= "); t2t ->dump(); tty->cr();
+ tty->print("mt_dual meet this_dual= "); t2this ->dump(); tty->cr();
+
+ fatal("meet not symmetric" );
+ }
+#endif
+ return mt;
+}
+
+//------------------------------xmeet------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *Type::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Meeting TOP with anything?
+ if( _base == Top ) return t;
+
+ // Meeting BOTTOM with anything?
+ if( _base == Bottom ) return BOTTOM;
+
+ // Current "this->_base" is one of: Bad, Multi, Control, Top,
+ // Abio, Abstore, Floatxxx, Doublexxx, Bottom, lastype.
+ switch (t->base()) { // Switch on original type
+
+ // Cut in half the number of cases I must handle. Only need cases for when
+ // the given enum "t->type" is less than or equal to the local enum "type".
+ case FloatCon:
+ case DoubleCon:
+ case Int:
+ case Long:
+ return t->xmeet(this);
+
+ case OopPtr:
+ return t->xmeet(this);
+
+ case InstPtr:
+ return t->xmeet(this);
+
+ case KlassPtr:
+ return t->xmeet(this);
+
+ case AryPtr:
+ return t->xmeet(this);
+
+ case Bad: // Type check
+ default: // Bogus type not in lattice
+ typerr(t);
+ return Type::BOTTOM;
+
+ case Bottom: // Ye Olde Default
+ return t;
+
+ case FloatTop:
+ if( _base == FloatTop ) return this;
+ case FloatBot: // Float
+ if( _base == FloatBot || _base == FloatTop ) return FLOAT;
+ if( _base == DoubleTop || _base == DoubleBot ) return Type::BOTTOM;
+ typerr(t);
+ return Type::BOTTOM;
+
+ case DoubleTop:
+ if( _base == DoubleTop ) return this;
+ case DoubleBot: // Double
+ if( _base == DoubleBot || _base == DoubleTop ) return DOUBLE;
+ if( _base == FloatTop || _base == FloatBot ) return Type::BOTTOM;
+ typerr(t);
+ return Type::BOTTOM;
+
+ // These next few cases must match exactly or it is a compile-time error.
+ case Control: // Control of code
+ case Abio: // State of world outside of program
+ case Memory:
+ if( _base == t->_base ) return this;
+ typerr(t);
+ return Type::BOTTOM;
+
+ case Top: // Top of the lattice
+ return this;
+ }
+
+ // The type is unchanged
+ return this;
+}
+
+//-----------------------------filter------------------------------------------
+const Type *Type::filter( const Type *kills ) const {
+ const Type* ft = join(kills);
+ if (ft->empty())
+ return Type::TOP; // Canonical empty value
+ return ft;
+}
+
+//------------------------------xdual------------------------------------------
+// Compute dual right now.
+const Type::TYPES Type::dual_type[Type::lastype] = {
+ Bad, // Bad
+ Control, // Control
+ Bottom, // Top
+ Bad, // Int - handled in v-call
+ Bad, // Long - handled in v-call
+ Half, // Half
+
+ Bad, // Tuple - handled in v-call
+ Bad, // Array - handled in v-call
+
+ Bad, // AnyPtr - handled in v-call
+ Bad, // RawPtr - handled in v-call
+ Bad, // OopPtr - handled in v-call
+ Bad, // InstPtr - handled in v-call
+ Bad, // AryPtr - handled in v-call
+ Bad, // KlassPtr - handled in v-call
+
+ Bad, // Function - handled in v-call
+ Abio, // Abio
+ Return_Address,// Return_Address
+ Memory, // Memory
+ FloatBot, // FloatTop
+ FloatCon, // FloatCon
+ FloatTop, // FloatBot
+ DoubleBot, // DoubleTop
+ DoubleCon, // DoubleCon
+ DoubleTop, // DoubleBot
+ Top // Bottom
+};
+
+const Type *Type::xdual() const {
+ // Note: the base() accessor asserts the sanity of _base.
+ assert(dual_type[base()] != Bad, "implement with v-call");
+ return new Type(dual_type[_base]);
+}
+
+//------------------------------has_memory-------------------------------------
+bool Type::has_memory() const {
+ Type::TYPES tx = base();
+ if (tx == Memory) return true;
+ if (tx == Tuple) {
+ const TypeTuple *t = is_tuple();
+ for (uint i=0; i < t->cnt(); i++) {
+ tx = t->field_at(i)->base();
+ if (tx == Memory) return true;
+ }
+ }
+ return false;
+}
+
+#ifndef PRODUCT
+//------------------------------dump2------------------------------------------
+void Type::dump2( Dict &d, uint depth, outputStream *st ) const {
+ st->print(msg[_base]);
+}
+
+//------------------------------dump-------------------------------------------
+void Type::dump_on(outputStream *st) const {
+ ResourceMark rm;
+ Dict d(cmpkey,hashkey); // Stop recursive type dumping
+ dump2(d,1, st);
+}
+
+//------------------------------data-------------------------------------------
+const char * const Type::msg[Type::lastype] = {
+ "bad","control","top","int:","long:","half",
+ "tuple:", "aryptr",
+ "anyptr:", "rawptr:", "java:", "inst:", "ary:", "klass:",
+ "func", "abIO", "return_address", "memory",
+ "float_top", "ftcon:", "float",
+ "double_top", "dblcon:", "double",
+ "bottom"
+};
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Singletons are integer, float or double constants.
+bool Type::singleton(void) const {
+ return _base == Top || _base == Half;
+}
+
+//------------------------------empty------------------------------------------
+// TRUE if Type is a type with no values, FALSE otherwise.
+bool Type::empty(void) const {
+ switch (_base) {
+ case DoubleTop:
+ case FloatTop:
+ case Top:
+ return true;
+
+ case Half:
+ case Abio:
+ case Return_Address:
+ case Memory:
+ case Bottom:
+ case FloatBot:
+ case DoubleBot:
+ return false; // never a singleton, therefore never empty
+ }
+
+ ShouldNotReachHere();
+ return false;
+}
+
+//------------------------------dump_stats-------------------------------------
+// Dump collected statistics to stderr
+#ifndef PRODUCT
+void Type::dump_stats() {
+ tty->print("Types made: %d\n", type_dict()->Size());
+}
+#endif
+
+//------------------------------typerr-----------------------------------------
+void Type::typerr( const Type *t ) const {
+#ifndef PRODUCT
+ tty->print("\nError mixing types: ");
+ dump();
+ tty->print(" and ");
+ t->dump();
+ tty->print("\n");
+#endif
+ ShouldNotReachHere();
+}
+
+//------------------------------isa_oop_ptr------------------------------------
+// Return true if type is an oop pointer type. False for raw pointers.
+static char isa_oop_ptr_tbl[Type::lastype] = {
+ 0,0,0,0,0,0,0/*tuple*/, 0/*ary*/,
+ 0/*anyptr*/,0/*rawptr*/,1/*OopPtr*/,1/*InstPtr*/,1/*AryPtr*/,1/*KlassPtr*/,
+ 0/*func*/,0,0/*return_address*/,0,
+ /*floats*/0,0,0, /*doubles*/0,0,0,
+ 0
+};
+bool Type::isa_oop_ptr() const {
+ return isa_oop_ptr_tbl[_base] != 0;
+}
+
+//------------------------------dump_stats-------------------------------------
+// // Check that arrays match type enum
+#ifndef PRODUCT
+void Type::verify_lastype() {
+ // Check that arrays match enumeration
+ assert( Type::dual_type [Type::lastype - 1] == Type::Top, "did not update array");
+ assert( strcmp(Type::msg [Type::lastype - 1],"bottom") == 0, "did not update array");
+ // assert( PhiNode::tbl [Type::lastype - 1] == NULL, "did not update array");
+ assert( Matcher::base2reg[Type::lastype - 1] == 0, "did not update array");
+ assert( isa_oop_ptr_tbl [Type::lastype - 1] == (char)0, "did not update array");
+}
+#endif
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeF *TypeF::ZERO; // Floating point zero
+const TypeF *TypeF::ONE; // Floating point one
+
+//------------------------------make-------------------------------------------
+// Create a float constant
+const TypeF *TypeF::make(float f) {
+ return (TypeF*)(new TypeF(f))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeF::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is FloatCon
+ switch (t->base()) { // Switch on original type
+ case AnyPtr: // Mixing with oops happens when javac
+ case RawPtr: // reuses local variables
+ case OopPtr:
+ case InstPtr:
+ case KlassPtr:
+ case AryPtr:
+ case Int:
+ case Long:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+
+ case FloatBot:
+ return t;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case FloatCon: // Float-constant vs Float-constant?
+ if( jint_cast(_f) != jint_cast(t->getf()) ) // unequal constants?
+ // must compare bitwise as positive zero, negative zero and NaN have
+ // all the same representation in C++
+ return FLOAT; // Return generic float
+ // Equal constants
+ case Top:
+ case FloatTop:
+ break; // Return the float constant
+ }
+ return this; // Return the float constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: symmetric
+const Type *TypeF::xdual() const {
+ return this;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeF::eq( const Type *t ) const {
+ if( g_isnan(_f) ||
+ g_isnan(t->getf()) ) {
+ // One or both are NANs. If both are NANs return true, else false.
+ return (g_isnan(_f) && g_isnan(t->getf()));
+ }
+ if (_f == t->getf()) {
+ // (NaN is impossible at this point, since it is not equal even to itself)
+ if (_f == 0.0) {
+ // difference between positive and negative zero
+ if (jint_cast(_f) != jint_cast(t->getf())) return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeF::hash(void) const {
+ return *(int*)(&_f);
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeF::is_finite() const {
+ return g_isfinite(getf()) != 0;
+}
+
+//------------------------------is_nan-----------------------------------------
+// Is not a number (NaN)
+bool TypeF::is_nan() const {
+ return g_isnan(getf()) != 0;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump float constant Type
+#ifndef PRODUCT
+void TypeF::dump2( Dict &d, uint depth, outputStream *st ) const {
+ Type::dump2(d,depth, st);
+ st->print("%f", _f);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeF::singleton(void) const {
+ return true; // Always a singleton
+}
+
+bool TypeF::empty(void) const {
+ return false; // always exactly a singleton
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeD *TypeD::ZERO; // Floating point zero
+const TypeD *TypeD::ONE; // Floating point one
+
+//------------------------------make-------------------------------------------
+const TypeD *TypeD::make(double d) {
+ return (TypeD*)(new TypeD(d))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeD::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is DoubleCon
+ switch (t->base()) { // Switch on original type
+ case AnyPtr: // Mixing with oops happens when javac
+ case RawPtr: // reuses local variables
+ case OopPtr:
+ case InstPtr:
+ case KlassPtr:
+ case AryPtr:
+ case Int:
+ case Long:
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+
+ case DoubleBot:
+ return t;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case DoubleCon: // Double-constant vs Double-constant?
+ if( jlong_cast(_d) != jlong_cast(t->getd()) ) // unequal constants? (see comment in TypeF::xmeet)
+ return DOUBLE; // Return generic double
+ case Top:
+ case DoubleTop:
+ break;
+ }
+ return this; // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: symmetric
+const Type *TypeD::xdual() const {
+ return this;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeD::eq( const Type *t ) const {
+ if( g_isnan(_d) ||
+ g_isnan(t->getd()) ) {
+ // One or both are NANs. If both are NANs return true, else false.
+ return (g_isnan(_d) && g_isnan(t->getd()));
+ }
+ if (_d == t->getd()) {
+ // (NaN is impossible at this point, since it is not equal even to itself)
+ if (_d == 0.0) {
+ // difference between positive and negative zero
+ if (jlong_cast(_d) != jlong_cast(t->getd())) return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeD::hash(void) const {
+ return *(int*)(&_d);
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeD::is_finite() const {
+ return g_isfinite(getd()) != 0;
+}
+
+//------------------------------is_nan-----------------------------------------
+// Is not a number (NaN)
+bool TypeD::is_nan() const {
+ return g_isnan(getd()) != 0;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump double constant Type
+#ifndef PRODUCT
+void TypeD::dump2( Dict &d, uint depth, outputStream *st ) const {
+ Type::dump2(d,depth,st);
+ st->print("%f", _d);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeD::singleton(void) const {
+ return true; // Always a singleton
+}
+
+bool TypeD::empty(void) const {
+ return false; // always exactly a singleton
+}
+
+//=============================================================================
+// Convience common pre-built types.
+const TypeInt *TypeInt::MINUS_1;// -1
+const TypeInt *TypeInt::ZERO; // 0
+const TypeInt *TypeInt::ONE; // 1
+const TypeInt *TypeInt::BOOL; // 0 or 1, FALSE or TRUE.
+const TypeInt *TypeInt::CC; // -1,0 or 1, condition codes
+const TypeInt *TypeInt::CC_LT; // [-1] == MINUS_1
+const TypeInt *TypeInt::CC_GT; // [1] == ONE
+const TypeInt *TypeInt::CC_EQ; // [0] == ZERO
+const TypeInt *TypeInt::CC_LE; // [-1,0]
+const TypeInt *TypeInt::CC_GE; // [0,1] == BOOL (!)
+const TypeInt *TypeInt::BYTE; // Bytes, -128 to 127
+const TypeInt *TypeInt::CHAR; // Java chars, 0-65535
+const TypeInt *TypeInt::SHORT; // Java shorts, -32768-32767
+const TypeInt *TypeInt::POS; // Positive 32-bit integers or zero
+const TypeInt *TypeInt::POS1; // Positive 32-bit integers
+const TypeInt *TypeInt::INT; // 32-bit integers
+const TypeInt *TypeInt::SYMINT; // symmetric range [-max_jint..max_jint]
+
+//------------------------------TypeInt----------------------------------------
+TypeInt::TypeInt( jint lo, jint hi, int w ) : Type(Int), _lo(lo), _hi(hi), _widen(w) {
+}
+
+//------------------------------make-------------------------------------------
+const TypeInt *TypeInt::make( jint lo ) {
+ return (TypeInt*)(new TypeInt(lo,lo,WidenMin))->hashcons();
+}
+
+#define SMALLINT ((juint)3) // a value too insignificant to consider widening
+
+const TypeInt *TypeInt::make( jint lo, jint hi, int w ) {
+ // Certain normalizations keep us sane when comparing types.
+ // The 'SMALLINT' covers constants and also CC and its relatives.
+ assert(CC == NULL || (juint)(CC->_hi - CC->_lo) <= SMALLINT, "CC is truly small");
+ if (lo <= hi) {
+ if ((juint)(hi - lo) <= SMALLINT) w = Type::WidenMin;
+ if ((juint)(hi - lo) >= max_juint) w = Type::WidenMax; // plain int
+ }
+ return (TypeInt*)(new TypeInt(lo,hi,w))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type representation object
+// with reference count equal to the number of Types pointing at it.
+// Caller should wrap a Types around it.
+const Type *TypeInt::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type?
+
+ // Currently "this->_base" is a TypeInt
+ switch (t->base()) { // Switch on original type
+ case AnyPtr: // Mixing with oops happens when javac
+ case RawPtr: // reuses local variables
+ case OopPtr:
+ case InstPtr:
+ case KlassPtr:
+ case AryPtr:
+ case Long:
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+ default: // All else is a mistake
+ typerr(t);
+ case Top: // No change
+ return this;
+ case Int: // Int vs Int?
+ break;
+ }
+
+ // Expand covered set
+ const TypeInt *r = t->is_int();
+ // (Avoid TypeInt::make, to avoid the argument normalizations it enforces.)
+ return (new TypeInt( MIN2(_lo,r->_lo), MAX2(_hi,r->_hi), MAX2(_widen,r->_widen) ))->hashcons();
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: reverse hi & lo; flip widen
+const Type *TypeInt::xdual() const {
+ return new TypeInt(_hi,_lo,WidenMax-_widen);
+}
+
+//------------------------------widen------------------------------------------
+// Only happens for optimistic top-down optimizations.
+const Type *TypeInt::widen( const Type *old ) const {
+ // Coming from TOP or such; no widening
+ if( old->base() != Int ) return this;
+ const TypeInt *ot = old->is_int();
+
+ // If new guy is equal to old guy, no widening
+ if( _lo == ot->_lo && _hi == ot->_hi )
+ return old;
+
+ // If new guy contains old, then we widened
+ if( _lo <= ot->_lo && _hi >= ot->_hi ) {
+ // New contains old
+ // If new guy is already wider than old, no widening
+ if( _widen > ot->_widen ) return this;
+ // If old guy was a constant, do not bother
+ if (ot->_lo == ot->_hi) return this;
+ // Now widen new guy.
+ // Check for widening too far
+ if (_widen == WidenMax) {
+ if (min_jint < _lo && _hi < max_jint) {
+ // If neither endpoint is extremal yet, push out the endpoint
+ // which is closer to its respective limit.
+ if (_lo >= 0 || // easy common case
+ (juint)(_lo - min_jint) >= (juint)(max_jint - _hi)) {
+ // Try to widen to an unsigned range type of 31 bits:
+ return make(_lo, max_jint, WidenMax);
+ } else {
+ return make(min_jint, _hi, WidenMax);
+ }
+ }
+ return TypeInt::INT;
+ }
+ // Returned widened new guy
+ return make(_lo,_hi,_widen+1);
+ }
+
+ // If old guy contains new, then we probably widened too far & dropped to
+ // bottom. Return the wider fellow.
+ if ( ot->_lo <= _lo && ot->_hi >= _hi )
+ return old;
+
+ //fatal("Integer value range is not subset");
+ //return this;
+ return TypeInt::INT;
+}
+
+//------------------------------narrow---------------------------------------
+// Only happens for pessimistic optimizations.
+const Type *TypeInt::narrow( const Type *old ) const {
+ if (_lo >= _hi) return this; // already narrow enough
+ if (old == NULL) return this;
+ const TypeInt* ot = old->isa_int();
+ if (ot == NULL) return this;
+ jint olo = ot->_lo;
+ jint ohi = ot->_hi;
+
+ // If new guy is equal to old guy, no narrowing
+ if (_lo == olo && _hi == ohi) return old;
+
+ // If old guy was maximum range, allow the narrowing
+ if (olo == min_jint && ohi == max_jint) return this;
+
+ if (_lo < olo || _hi > ohi)
+ return this; // doesn't narrow; pretty wierd
+
+ // The new type narrows the old type, so look for a "death march".
+ // See comments on PhaseTransform::saturate.
+ juint nrange = _hi - _lo;
+ juint orange = ohi - olo;
+ if (nrange < max_juint - 1 && nrange > (orange >> 1) + (SMALLINT*2)) {
+ // Use the new type only if the range shrinks a lot.
+ // We do not want the optimizer computing 2^31 point by point.
+ return old;
+ }
+
+ return this;
+}
+
+//-----------------------------filter------------------------------------------
+const Type *TypeInt::filter( const Type *kills ) const {
+ const TypeInt* ft = join(kills)->isa_int();
+ if (ft == NULL || ft->_lo > ft->_hi)
+ return Type::TOP; // Canonical empty value
+ if (ft->_widen < this->_widen) {
+ // Do not allow the value of kill->_widen to affect the outcome.
+ // The widen bits must be allowed to run freely through the graph.
+ ft = TypeInt::make(ft->_lo, ft->_hi, this->_widen);
+ }
+ return ft;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeInt::eq( const Type *t ) const {
+ const TypeInt *r = t->is_int(); // Handy access
+ return r->_lo == _lo && r->_hi == _hi && r->_widen == _widen;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeInt::hash(void) const {
+ return _lo+_hi+_widen+(int)Type::Int;
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeInt::is_finite() const {
+ return true;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump TypeInt
+#ifndef PRODUCT
+static const char* intname(char* buf, jint n) {
+ if (n == min_jint)
+ return "min";
+ else if (n < min_jint + 10000)
+ sprintf(buf, "min+" INT32_FORMAT, n - min_jint);
+ else if (n == max_jint)
+ return "max";
+ else if (n > max_jint - 10000)
+ sprintf(buf, "max-" INT32_FORMAT, max_jint - n);
+ else
+ sprintf(buf, INT32_FORMAT, n);
+ return buf;
+}
+
+void TypeInt::dump2( Dict &d, uint depth, outputStream *st ) const {
+ char buf[40], buf2[40];
+ if (_lo == min_jint && _hi == max_jint)
+ st->print("int");
+ else if (is_con())
+ st->print("int:%s", intname(buf, get_con()));
+ else if (_lo == BOOL->_lo && _hi == BOOL->_hi)
+ st->print("bool");
+ else if (_lo == BYTE->_lo && _hi == BYTE->_hi)
+ st->print("byte");
+ else if (_lo == CHAR->_lo && _hi == CHAR->_hi)
+ st->print("char");
+ else if (_lo == SHORT->_lo && _hi == SHORT->_hi)
+ st->print("short");
+ else if (_hi == max_jint)
+ st->print("int:>=%s", intname(buf, _lo));
+ else if (_lo == min_jint)
+ st->print("int:<=%s", intname(buf, _hi));
+ else
+ st->print("int:%s..%s", intname(buf, _lo), intname(buf2, _hi));
+
+ if (_widen != 0 && this != TypeInt::INT)
+ st->print(":%.*s", _widen, "wwww");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants.
+bool TypeInt::singleton(void) const {
+ return _lo >= _hi;
+}
+
+bool TypeInt::empty(void) const {
+ return _lo > _hi;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeLong *TypeLong::MINUS_1;// -1
+const TypeLong *TypeLong::ZERO; // 0
+const TypeLong *TypeLong::ONE; // 1
+const TypeLong *TypeLong::POS; // >=0
+const TypeLong *TypeLong::LONG; // 64-bit integers
+const TypeLong *TypeLong::INT; // 32-bit subrange
+const TypeLong *TypeLong::UINT; // 32-bit unsigned subrange
+
+//------------------------------TypeLong---------------------------------------
+TypeLong::TypeLong( jlong lo, jlong hi, int w ) : Type(Long), _lo(lo), _hi(hi), _widen(w) {
+}
+
+//------------------------------make-------------------------------------------
+const TypeLong *TypeLong::make( jlong lo ) {
+ return (TypeLong*)(new TypeLong(lo,lo,WidenMin))->hashcons();
+}
+
+const TypeLong *TypeLong::make( jlong lo, jlong hi, int w ) {
+ // Certain normalizations keep us sane when comparing types.
+ // The '1' covers constants.
+ if (lo <= hi) {
+ if ((julong)(hi - lo) <= SMALLINT) w = Type::WidenMin;
+ if ((julong)(hi - lo) >= max_julong) w = Type::WidenMax; // plain long
+ }
+ return (TypeLong*)(new TypeLong(lo,hi,w))->hashcons();
+}
+
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type representation object
+// with reference count equal to the number of Types pointing at it.
+// Caller should wrap a Types around it.
+const Type *TypeLong::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type?
+
+ // Currently "this->_base" is a TypeLong
+ switch (t->base()) { // Switch on original type
+ case AnyPtr: // Mixing with oops happens when javac
+ case RawPtr: // reuses local variables
+ case OopPtr:
+ case InstPtr:
+ case KlassPtr:
+ case AryPtr:
+ case Int:
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+ default: // All else is a mistake
+ typerr(t);
+ case Top: // No change
+ return this;
+ case Long: // Long vs Long?
+ break;
+ }
+
+ // Expand covered set
+ const TypeLong *r = t->is_long(); // Turn into a TypeLong
+ // (Avoid TypeLong::make, to avoid the argument normalizations it enforces.)
+ return (new TypeLong( MIN2(_lo,r->_lo), MAX2(_hi,r->_hi), MAX2(_widen,r->_widen) ))->hashcons();
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: reverse hi & lo; flip widen
+const Type *TypeLong::xdual() const {
+ return new TypeLong(_hi,_lo,WidenMax-_widen);
+}
+
+//------------------------------widen------------------------------------------
+// Only happens for optimistic top-down optimizations.
+const Type *TypeLong::widen( const Type *old ) const {
+ // Coming from TOP or such; no widening
+ if( old->base() != Long ) return this;
+ const TypeLong *ot = old->is_long();
+
+ // If new guy is equal to old guy, no widening
+ if( _lo == ot->_lo && _hi == ot->_hi )
+ return old;
+
+ // If new guy contains old, then we widened
+ if( _lo <= ot->_lo && _hi >= ot->_hi ) {
+ // New contains old
+ // If new guy is already wider than old, no widening
+ if( _widen > ot->_widen ) return this;
+ // If old guy was a constant, do not bother
+ if (ot->_lo == ot->_hi) return this;
+ // Now widen new guy.
+ // Check for widening too far
+ if (_widen == WidenMax) {
+ if (min_jlong < _lo && _hi < max_jlong) {
+ // If neither endpoint is extremal yet, push out the endpoint
+ // which is closer to its respective limit.
+ if (_lo >= 0 || // easy common case
+ (julong)(_lo - min_jlong) >= (julong)(max_jlong - _hi)) {
+ // Try to widen to an unsigned range type of 32/63 bits:
+ if (_hi < max_juint)
+ return make(_lo, max_juint, WidenMax);
+ else
+ return make(_lo, max_jlong, WidenMax);
+ } else {
+ return make(min_jlong, _hi, WidenMax);
+ }
+ }
+ return TypeLong::LONG;
+ }
+ // Returned widened new guy
+ return make(_lo,_hi,_widen+1);
+ }
+
+ // If old guy contains new, then we probably widened too far & dropped to
+ // bottom. Return the wider fellow.
+ if ( ot->_lo <= _lo && ot->_hi >= _hi )
+ return old;
+
+ // fatal("Long value range is not subset");
+ // return this;
+ return TypeLong::LONG;
+}
+
+//------------------------------narrow----------------------------------------
+// Only happens for pessimistic optimizations.
+const Type *TypeLong::narrow( const Type *old ) const {
+ if (_lo >= _hi) return this; // already narrow enough
+ if (old == NULL) return this;
+ const TypeLong* ot = old->isa_long();
+ if (ot == NULL) return this;
+ jlong olo = ot->_lo;
+ jlong ohi = ot->_hi;
+
+ // If new guy is equal to old guy, no narrowing
+ if (_lo == olo && _hi == ohi) return old;
+
+ // If old guy was maximum range, allow the narrowing
+ if (olo == min_jlong && ohi == max_jlong) return this;
+
+ if (_lo < olo || _hi > ohi)
+ return this; // doesn't narrow; pretty wierd
+
+ // The new type narrows the old type, so look for a "death march".
+ // See comments on PhaseTransform::saturate.
+ julong nrange = _hi - _lo;
+ julong orange = ohi - olo;
+ if (nrange < max_julong - 1 && nrange > (orange >> 1) + (SMALLINT*2)) {
+ // Use the new type only if the range shrinks a lot.
+ // We do not want the optimizer computing 2^31 point by point.
+ return old;
+ }
+
+ return this;
+}
+
+//-----------------------------filter------------------------------------------
+const Type *TypeLong::filter( const Type *kills ) const {
+ const TypeLong* ft = join(kills)->isa_long();
+ if (ft == NULL || ft->_lo > ft->_hi)
+ return Type::TOP; // Canonical empty value
+ if (ft->_widen < this->_widen) {
+ // Do not allow the value of kill->_widen to affect the outcome.
+ // The widen bits must be allowed to run freely through the graph.
+ ft = TypeLong::make(ft->_lo, ft->_hi, this->_widen);
+ }
+ return ft;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeLong::eq( const Type *t ) const {
+ const TypeLong *r = t->is_long(); // Handy access
+ return r->_lo == _lo && r->_hi == _hi && r->_widen == _widen;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeLong::hash(void) const {
+ return (int)(_lo+_hi+_widen+(int)Type::Long);
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeLong::is_finite() const {
+ return true;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump TypeLong
+#ifndef PRODUCT
+static const char* longnamenear(jlong x, const char* xname, char* buf, jlong n) {
+ if (n > x) {
+ if (n >= x + 10000) return NULL;
+ sprintf(buf, "%s+" INT64_FORMAT, xname, n - x);
+ } else if (n < x) {
+ if (n <= x - 10000) return NULL;
+ sprintf(buf, "%s-" INT64_FORMAT, xname, x - n);
+ } else {
+ return xname;
+ }
+ return buf;
+}
+
+static const char* longname(char* buf, jlong n) {
+ const char* str;
+ if (n == min_jlong)
+ return "min";
+ else if (n < min_jlong + 10000)
+ sprintf(buf, "min+" INT64_FORMAT, n - min_jlong);
+ else if (n == max_jlong)
+ return "max";
+ else if (n > max_jlong - 10000)
+ sprintf(buf, "max-" INT64_FORMAT, max_jlong - n);
+ else if ((str = longnamenear(max_juint, "maxuint", buf, n)) != NULL)
+ return str;
+ else if ((str = longnamenear(max_jint, "maxint", buf, n)) != NULL)
+ return str;
+ else if ((str = longnamenear(min_jint, "minint", buf, n)) != NULL)
+ return str;
+ else
+ sprintf(buf, INT64_FORMAT, n);
+ return buf;
+}
+
+void TypeLong::dump2( Dict &d, uint depth, outputStream *st ) const {
+ char buf[80], buf2[80];
+ if (_lo == min_jlong && _hi == max_jlong)
+ st->print("long");
+ else if (is_con())
+ st->print("long:%s", longname(buf, get_con()));
+ else if (_hi == max_jlong)
+ st->print("long:>=%s", longname(buf, _lo));
+ else if (_lo == min_jlong)
+ st->print("long:<=%s", longname(buf, _hi));
+ else
+ st->print("long:%s..%s", longname(buf, _lo), longname(buf2, _hi));
+
+ if (_widen != 0 && this != TypeLong::LONG)
+ st->print(":%.*s", _widen, "wwww");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants
+bool TypeLong::singleton(void) const {
+ return _lo >= _hi;
+}
+
+bool TypeLong::empty(void) const {
+ return _lo > _hi;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeTuple *TypeTuple::IFBOTH; // Return both arms of IF as reachable
+const TypeTuple *TypeTuple::IFFALSE;
+const TypeTuple *TypeTuple::IFTRUE;
+const TypeTuple *TypeTuple::IFNEITHER;
+const TypeTuple *TypeTuple::LOOPBODY;
+const TypeTuple *TypeTuple::MEMBAR;
+const TypeTuple *TypeTuple::STORECONDITIONAL;
+const TypeTuple *TypeTuple::START_I2C;
+const TypeTuple *TypeTuple::INT_PAIR;
+const TypeTuple *TypeTuple::LONG_PAIR;
+
+
+//------------------------------make-------------------------------------------
+// Make a TypeTuple from the range of a method signature
+const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
+ ciType* return_type = sig->return_type();
+ uint total_fields = TypeFunc::Parms + return_type->size();
+ const Type **field_array = fields(total_fields);
+ switch (return_type->basic_type()) {
+ case T_LONG:
+ field_array[TypeFunc::Parms] = TypeLong::LONG;
+ field_array[TypeFunc::Parms+1] = Type::HALF;
+ break;
+ case T_DOUBLE:
+ field_array[TypeFunc::Parms] = Type::DOUBLE;
+ field_array[TypeFunc::Parms+1] = Type::HALF;
+ break;
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_BOOLEAN:
+ case T_CHAR:
+ case T_FLOAT:
+ case T_BYTE:
+ case T_SHORT:
+ case T_INT:
+ field_array[TypeFunc::Parms] = get_const_type(return_type);
+ break;
+ case T_VOID:
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ return (TypeTuple*)(new TypeTuple(total_fields,field_array))->hashcons();
+}
+
+// Make a TypeTuple from the domain of a method signature
+const TypeTuple *TypeTuple::make_domain(ciInstanceKlass* recv, ciSignature* sig) {
+ uint total_fields = TypeFunc::Parms + sig->size();
+
+ uint pos = TypeFunc::Parms;
+ const Type **field_array;
+ if (recv != NULL) {
+ total_fields++;
+ field_array = fields(total_fields);
+ // Use get_const_type here because it respects UseUniqueSubclasses:
+ field_array[pos++] = get_const_type(recv)->join(TypePtr::NOTNULL);
+ } else {
+ field_array = fields(total_fields);
+ }
+
+ int i = 0;
+ while (pos < total_fields) {
+ ciType* type = sig->type_at(i);
+
+ switch (type->basic_type()) {
+ case T_LONG:
+ field_array[pos++] = TypeLong::LONG;
+ field_array[pos++] = Type::HALF;
+ break;
+ case T_DOUBLE:
+ field_array[pos++] = Type::DOUBLE;
+ field_array[pos++] = Type::HALF;
+ break;
+ case T_OBJECT:
+ case T_ARRAY:
+ case T_BOOLEAN:
+ case T_CHAR:
+ case T_FLOAT:
+ case T_BYTE:
+ case T_SHORT:
+ case T_INT:
+ field_array[pos++] = get_const_type(type);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ i++;
+ }
+ return (TypeTuple*)(new TypeTuple(total_fields,field_array))->hashcons();
+}
+
+const TypeTuple *TypeTuple::make( uint cnt, const Type **fields ) {
+ return (TypeTuple*)(new TypeTuple(cnt,fields))->hashcons();
+}
+
+//------------------------------fields-----------------------------------------
+// Subroutine call type with space allocated for argument types
+const Type **TypeTuple::fields( uint arg_cnt ) {
+ const Type **flds = (const Type **)(Compile::current()->type_arena()->Amalloc_4((TypeFunc::Parms+arg_cnt)*sizeof(Type*) ));
+ flds[TypeFunc::Control ] = Type::CONTROL;
+ flds[TypeFunc::I_O ] = Type::ABIO;
+ flds[TypeFunc::Memory ] = Type::MEMORY;
+ flds[TypeFunc::FramePtr ] = TypeRawPtr::BOTTOM;
+ flds[TypeFunc::ReturnAdr] = Type::RETURN_ADDRESS;
+
+ return flds;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeTuple::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is Tuple
+ switch (t->base()) { // switch on original type
+
+ case Bottom: // Ye Olde Default
+ return t;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case Tuple: { // Meeting 2 signatures?
+ const TypeTuple *x = t->is_tuple();
+ assert( _cnt == x->_cnt, "" );
+ const Type **fields = (const Type **)(Compile::current()->type_arena()->Amalloc_4( _cnt*sizeof(Type*) ));
+ for( uint i=0; i<_cnt; i++ )
+ fields[i] = field_at(i)->xmeet( x->field_at(i) );
+ return TypeTuple::make(_cnt,fields);
+ }
+ case Top:
+ break;
+ }
+ return this; // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeTuple::xdual() const {
+ const Type **fields = (const Type **)(Compile::current()->type_arena()->Amalloc_4( _cnt*sizeof(Type*) ));
+ for( uint i=0; i<_cnt; i++ )
+ fields[i] = _fields[i]->dual();
+ return new TypeTuple(_cnt,fields);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeTuple::eq( const Type *t ) const {
+ const TypeTuple *s = (const TypeTuple *)t;
+ if (_cnt != s->_cnt) return false; // Unequal field counts
+ for (uint i = 0; i < _cnt; i++)
+ if (field_at(i) != s->field_at(i)) // POINTER COMPARE! NO RECURSION!
+ return false; // Missed
+ return true;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeTuple::hash(void) const {
+ intptr_t sum = _cnt;
+ for( uint i=0; i<_cnt; i++ )
+ sum += (intptr_t)_fields[i]; // Hash on pointers directly
+ return sum;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump signature Type
+#ifndef PRODUCT
+void TypeTuple::dump2( Dict &d, uint depth, outputStream *st ) const {
+ st->print("{");
+ if( !depth || d[this] ) { // Check for recursive print
+ st->print("...}");
+ return;
+ }
+ d.Insert((void*)this, (void*)this); // Stop recursion
+ if( _cnt ) {
+ uint i;
+ for( i=0; i<_cnt-1; i++ ) {
+ st->print("%d:", i);
+ _fields[i]->dump2(d, depth-1, st);
+ st->print(", ");
+ }
+ st->print("%d:", i);
+ _fields[i]->dump2(d, depth-1, st);
+ }
+ st->print("}");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeTuple::singleton(void) const {
+ return false; // Never a singleton
+}
+
+bool TypeTuple::empty(void) const {
+ for( uint i=0; i<_cnt; i++ ) {
+ if (_fields[i]->empty()) return true;
+ }
+ return false;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+
+inline const TypeInt* normalize_array_size(const TypeInt* size) {
+ // Certain normalizations keep us sane when comparing types.
+ // We do not want arrayOop variables to differ only by the wideness
+ // of their index types. Pick minimum wideness, since that is the
+ // forced wideness of small ranges anyway.
+ if (size->_widen != Type::WidenMin)
+ return TypeInt::make(size->_lo, size->_hi, Type::WidenMin);
+ else
+ return size;
+}
+
+//------------------------------make-------------------------------------------
+const TypeAry *TypeAry::make( const Type *elem, const TypeInt *size) {
+ size = normalize_array_size(size);
+ return (TypeAry*)(new TypeAry(elem,size))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeAry::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is Ary
+ switch (t->base()) { // switch on original type
+
+ case Bottom: // Ye Olde Default
+ return t;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case Array: { // Meeting 2 arrays?
+ const TypeAry *a = t->is_ary();
+ return TypeAry::make(_elem->meet(a->_elem),
+ _size->xmeet(a->_size)->is_int());
+ }
+ case Top:
+ break;
+ }
+ return this; // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeAry::xdual() const {
+ const TypeInt* size_dual = _size->dual()->is_int();
+ size_dual = normalize_array_size(size_dual);
+ return new TypeAry( _elem->dual(), size_dual);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeAry::eq( const Type *t ) const {
+ const TypeAry *a = (const TypeAry*)t;
+ return _elem == a->_elem &&
+ _size == a->_size;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeAry::hash(void) const {
+ return (intptr_t)_elem + (intptr_t)_size;
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeAry::dump2( Dict &d, uint depth, outputStream *st ) const {
+ _elem->dump2(d, depth, st);
+ st->print("[");
+ _size->dump2(d, depth, st);
+ st->print("]");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeAry::singleton(void) const {
+ return false; // Never a singleton
+}
+
+bool TypeAry::empty(void) const {
+ return _elem->empty() || _size->empty();
+}
+
+//--------------------------ary_must_be_exact----------------------------------
+bool TypeAry::ary_must_be_exact() const {
+ if (!UseExactTypes) return false;
+ // This logic looks at the element type of an array, and returns true
+ // if the element type is either a primitive or a final instance class.
+ // In such cases, an array built on this ary must have no subclasses.
+ if (_elem == BOTTOM) return false; // general array not exact
+ if (_elem == TOP ) return false; // inverted general array not exact
+ const TypeOopPtr* toop = _elem->isa_oopptr();
+ if (!toop) return true; // a primitive type, like int
+ ciKlass* tklass = toop->klass();
+ if (tklass == NULL) return false; // unloaded class
+ if (!tklass->is_loaded()) return false; // unloaded class
+ const TypeInstPtr* tinst = _elem->isa_instptr();
+ if (tinst) return tklass->as_instance_klass()->is_final();
+ const TypeAryPtr* tap = _elem->isa_aryptr();
+ if (tap) return tap->ary()->ary_must_be_exact();
+ return false;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypePtr *TypePtr::NULL_PTR;
+const TypePtr *TypePtr::NOTNULL;
+const TypePtr *TypePtr::BOTTOM;
+
+//------------------------------meet-------------------------------------------
+// Meet over the PTR enum
+const TypePtr::PTR TypePtr::ptr_meet[TypePtr::lastPTR][TypePtr::lastPTR] = {
+ // TopPTR, AnyNull, Constant, Null, NotNull, BotPTR,
+ { /* Top */ TopPTR, AnyNull, Constant, Null, NotNull, BotPTR,},
+ { /* AnyNull */ AnyNull, AnyNull, Constant, BotPTR, NotNull, BotPTR,},
+ { /* Constant*/ Constant, Constant, Constant, BotPTR, NotNull, BotPTR,},
+ { /* Null */ Null, BotPTR, BotPTR, Null, BotPTR, BotPTR,},
+ { /* NotNull */ NotNull, NotNull, NotNull, BotPTR, NotNull, BotPTR,},
+ { /* BotPTR */ BotPTR, BotPTR, BotPTR, BotPTR, BotPTR, BotPTR,}
+};
+
+//------------------------------make-------------------------------------------
+const TypePtr *TypePtr::make( TYPES t, enum PTR ptr, int offset ) {
+ return (TypePtr*)(new TypePtr(t,ptr,offset))->hashcons();
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypePtr::cast_to_ptr_type(PTR ptr) const {
+ assert(_base == AnyPtr, "subclass must override cast_to_ptr_type");
+ if( ptr == _ptr ) return this;
+ return make(_base, ptr, _offset);
+}
+
+//------------------------------get_con----------------------------------------
+intptr_t TypePtr::get_con() const {
+ assert( _ptr == Null, "" );
+ return _offset;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypePtr::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is AnyPtr
+ switch (t->base()) { // switch on original type
+ case Int: // Mixing ints & oops happens when javac
+ case Long: // reuses local variables
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+ case Top:
+ return this;
+
+ case AnyPtr: { // Meeting to AnyPtrs
+ const TypePtr *tp = t->is_ptr();
+ return make( AnyPtr, meet_ptr(tp->ptr()), meet_offset(tp->offset()) );
+ }
+ case RawPtr: // For these, flip the call around to cut down
+ case OopPtr:
+ case InstPtr: // on the cases I have to handle.
+ case KlassPtr:
+ case AryPtr:
+ return t->xmeet(this); // Call in reverse direction
+ default: // All else is a mistake
+ typerr(t);
+
+ }
+ return this;
+}
+
+//------------------------------meet_offset------------------------------------
+int TypePtr::meet_offset( int offset ) const {
+ // Either is 'TOP' offset? Return the other offset!
+ if( _offset == OffsetTop ) return offset;
+ if( offset == OffsetTop ) return _offset;
+ // If either is different, return 'BOTTOM' offset
+ if( _offset != offset ) return OffsetBot;
+ return _offset;
+}
+
+//------------------------------dual_offset------------------------------------
+int TypePtr::dual_offset( ) const {
+ if( _offset == OffsetTop ) return OffsetBot;// Map 'TOP' into 'BOTTOM'
+ if( _offset == OffsetBot ) return OffsetTop;// Map 'BOTTOM' into 'TOP'
+ return _offset; // Map everything else into self
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const TypePtr::PTR TypePtr::ptr_dual[TypePtr::lastPTR] = {
+ BotPTR, NotNull, Constant, Null, AnyNull, TopPTR
+};
+const Type *TypePtr::xdual() const {
+ return new TypePtr( AnyPtr, dual_ptr(), dual_offset() );
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypePtr::add_offset( int offset ) const {
+ if( offset == 0 ) return this; // No change
+ if( _offset == OffsetBot ) return this;
+ if( offset == OffsetBot ) offset = OffsetBot;
+ else if( _offset == OffsetTop || offset == OffsetTop ) offset = OffsetTop;
+ else offset += _offset;
+ return make( AnyPtr, _ptr, offset );
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypePtr::eq( const Type *t ) const {
+ const TypePtr *a = (const TypePtr*)t;
+ return _ptr == a->ptr() && _offset == a->offset();
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypePtr::hash(void) const {
+ return _ptr + _offset;
+}
+
+//------------------------------dump2------------------------------------------
+const char *const TypePtr::ptr_msg[TypePtr::lastPTR] = {
+ "TopPTR","AnyNull","Constant","NULL","NotNull","BotPTR"
+};
+
+#ifndef PRODUCT
+void TypePtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+ if( _ptr == Null ) st->print("NULL");
+ else st->print("%s *", ptr_msg[_ptr]);
+ if( _offset == OffsetTop ) st->print("+top");
+ else if( _offset == OffsetBot ) st->print("+bot");
+ else if( _offset ) st->print("+%d", _offset);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants
+bool TypePtr::singleton(void) const {
+ // TopPTR, Null, AnyNull, Constant are all singletons
+ return (_offset != OffsetBot) && !below_centerline(_ptr);
+}
+
+bool TypePtr::empty(void) const {
+ return (_offset == OffsetTop) || above_centerline(_ptr);
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeRawPtr *TypeRawPtr::BOTTOM;
+const TypeRawPtr *TypeRawPtr::NOTNULL;
+
+//------------------------------make-------------------------------------------
+const TypeRawPtr *TypeRawPtr::make( enum PTR ptr ) {
+ assert( ptr != Constant, "what is the constant?" );
+ assert( ptr != Null, "Use TypePtr for NULL" );
+ return (TypeRawPtr*)(new TypeRawPtr(ptr,0))->hashcons();
+}
+
+const TypeRawPtr *TypeRawPtr::make( address bits ) {
+ assert( bits, "Use TypePtr for NULL" );
+ return (TypeRawPtr*)(new TypeRawPtr(Constant,bits))->hashcons();
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeRawPtr::cast_to_ptr_type(PTR ptr) const {
+ assert( ptr != Constant, "what is the constant?" );
+ assert( ptr != Null, "Use TypePtr for NULL" );
+ assert( _bits==0, "Why cast a constant address?");
+ if( ptr == _ptr ) return this;
+ return make(ptr);
+}
+
+//------------------------------get_con----------------------------------------
+intptr_t TypeRawPtr::get_con() const {
+ assert( _ptr == Null || _ptr == Constant, "" );
+ return (intptr_t)_bits;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeRawPtr::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is RawPtr
+ switch( t->base() ) { // switch on original type
+ case Bottom: // Ye Olde Default
+ return t;
+ case Top:
+ return this;
+ case AnyPtr: // Meeting to AnyPtrs
+ break;
+ case RawPtr: { // might be top, bot, any/not or constant
+ enum PTR tptr = t->is_ptr()->ptr();
+ enum PTR ptr = meet_ptr( tptr );
+ if( ptr == Constant ) { // Cannot be equal constants, so...
+ if( tptr == Constant && _ptr != Constant) return t;
+ if( _ptr == Constant && tptr != Constant) return this;
+ ptr = NotNull; // Fall down in lattice
+ }
+ return make( ptr );
+ }
+
+ case OopPtr:
+ case InstPtr:
+ case KlassPtr:
+ case AryPtr:
+ return TypePtr::BOTTOM; // Oop meet raw is not well defined
+ default: // All else is a mistake
+ typerr(t);
+ }
+
+ // Found an AnyPtr type vs self-RawPtr type
+ const TypePtr *tp = t->is_ptr();
+ switch (tp->ptr()) {
+ case TypePtr::TopPTR: return this;
+ case TypePtr::BotPTR: return t;
+ case TypePtr::Null:
+ if( _ptr == TypePtr::TopPTR ) return t;
+ return TypeRawPtr::BOTTOM;
+ case TypePtr::NotNull: return TypePtr::make( AnyPtr, meet_ptr(TypePtr::NotNull), tp->meet_offset(0) );
+ case TypePtr::AnyNull:
+ if( _ptr == TypePtr::Constant) return this;
+ return make( meet_ptr(TypePtr::AnyNull) );
+ default: ShouldNotReachHere();
+ }
+ return this;
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeRawPtr::xdual() const {
+ return new TypeRawPtr( dual_ptr(), _bits );
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeRawPtr::add_offset( int offset ) const {
+ if( offset == OffsetTop ) return BOTTOM; // Undefined offset-> undefined pointer
+ if( offset == OffsetBot ) return BOTTOM; // Unknown offset-> unknown pointer
+ if( offset == 0 ) return this; // No change
+ switch (_ptr) {
+ case TypePtr::TopPTR:
+ case TypePtr::BotPTR:
+ case TypePtr::NotNull:
+ return this;
+ case TypePtr::Null:
+ case TypePtr::Constant:
+ return make( _bits+offset );
+ default: ShouldNotReachHere();
+ }
+ return NULL; // Lint noise
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeRawPtr::eq( const Type *t ) const {
+ const TypeRawPtr *a = (const TypeRawPtr*)t;
+ return _bits == a->_bits && TypePtr::eq(t);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeRawPtr::hash(void) const {
+ return (intptr_t)_bits + TypePtr::hash();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeRawPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+ if( _ptr == Constant )
+ st->print(INTPTR_FORMAT, _bits);
+ else
+ st->print("rawptr:%s", ptr_msg[_ptr]);
+}
+#endif
+
+//=============================================================================
+// Convenience common pre-built type.
+const TypeOopPtr *TypeOopPtr::BOTTOM;
+
+//------------------------------make-------------------------------------------
+const TypeOopPtr *TypeOopPtr::make(PTR ptr,
+ int offset) {
+ assert(ptr != Constant, "no constant generic pointers");
+ ciKlass* k = ciKlassKlass::make();
+ bool xk = false;
+ ciObject* o = NULL;
+ return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, UNKNOWN_INSTANCE))->hashcons();
+}
+
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeOopPtr::cast_to_ptr_type(PTR ptr) const {
+ assert(_base == OopPtr, "subclass must override cast_to_ptr_type");
+ if( ptr == _ptr ) return this;
+ return make(ptr, _offset);
+}
+
+//-----------------------------cast_to_instance-------------------------------
+const TypeOopPtr *TypeOopPtr::cast_to_instance(int instance_id) const {
+ // There are no instances of a general oop.
+ // Return self unchanged.
+ return this;
+}
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeOopPtr::cast_to_exactness(bool klass_is_exact) const {
+ // There is no such thing as an exact general oop.
+ // Return self unchanged.
+ return this;
+}
+
+
+//------------------------------as_klass_type----------------------------------
+// Return the klass type corresponding to this instance or array type.
+// It is the type that is loaded from an object of this type.
+const TypeKlassPtr* TypeOopPtr::as_klass_type() const {
+ ciKlass* k = klass();
+ bool xk = klass_is_exact();
+ if (k == NULL || !k->is_java_klass())
+ return TypeKlassPtr::OBJECT;
+ else
+ return TypeKlassPtr::make(xk? Constant: NotNull, k, 0);
+}
+
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeOopPtr::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is OopPtr
+ switch (t->base()) { // switch on original type
+
+ case Int: // Mixing ints & oops happens when javac
+ case Long: // reuses local variables
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+ case Top:
+ return this;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case RawPtr:
+ return TypePtr::BOTTOM; // Oop meet raw is not well defined
+
+ case AnyPtr: {
+ // Found an AnyPtr type vs self-OopPtr type
+ const TypePtr *tp = t->is_ptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ switch (tp->ptr()) {
+ case Null:
+ if (ptr == Null) return TypePtr::make(AnyPtr, ptr, offset);
+ // else fall through:
+ case TopPTR:
+ case AnyNull:
+ return make(ptr, offset);
+ case BotPTR:
+ case NotNull:
+ return TypePtr::make(AnyPtr, ptr, offset);
+ default: typerr(t);
+ }
+ }
+
+ case OopPtr: { // Meeting to other OopPtrs
+ const TypeOopPtr *tp = t->is_oopptr();
+ return make( meet_ptr(tp->ptr()), meet_offset(tp->offset()) );
+ }
+
+ case InstPtr: // For these, flip the call around to cut down
+ case KlassPtr: // on the cases I have to handle.
+ case AryPtr:
+ return t->xmeet(this); // Call in reverse direction
+
+ } // End of switch
+ return this; // Return the double constant
+}
+
+
+//------------------------------xdual------------------------------------------
+// Dual of a pure heap pointer. No relevant klass or oop information.
+const Type *TypeOopPtr::xdual() const {
+ assert(klass() == ciKlassKlass::make(), "no klasses here");
+ assert(const_oop() == NULL, "no constants here");
+ return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance() );
+}
+
+//--------------------------make_from_klass_common-----------------------------
+// Computes the element-type given a klass.
+const TypeOopPtr* TypeOopPtr::make_from_klass_common(ciKlass *klass, bool klass_change, bool try_for_exact) {
+ assert(klass->is_java_klass(), "must be java language klass");
+ if (klass->is_instance_klass()) {
+ Compile* C = Compile::current();
+ Dependencies* deps = C->dependencies();
+ assert((deps != NULL) == (C->method() != NULL && C->method()->code_size() > 0), "sanity");
+ // Element is an instance
+ bool klass_is_exact = false;
+ if (klass->is_loaded()) {
+ // Try to set klass_is_exact.
+ ciInstanceKlass* ik = klass->as_instance_klass();
+ klass_is_exact = ik->is_final();
+ if (!klass_is_exact && klass_change
+ && deps != NULL && UseUniqueSubclasses) {
+ ciInstanceKlass* sub = ik->unique_concrete_subklass();
+ if (sub != NULL) {
+ deps->assert_abstract_with_unique_concrete_subtype(ik, sub);
+ klass = ik = sub;
+ klass_is_exact = sub->is_final();
+ }
+ }
+ if (!klass_is_exact && try_for_exact
+ && deps != NULL && UseExactTypes) {
+ if (!ik->is_interface() && !ik->has_subklass()) {
+ // Add a dependence; if concrete subclass added we need to recompile
+ deps->assert_leaf_type(ik);
+ klass_is_exact = true;
+ }
+ }
+ }
+ return TypeInstPtr::make(TypePtr::BotPTR, klass, klass_is_exact, NULL, 0);
+ } else if (klass->is_obj_array_klass()) {
+ // Element is an object array. Recursively call ourself.
+ const TypeOopPtr *etype = TypeOopPtr::make_from_klass_common(klass->as_obj_array_klass()->element_klass(), false, try_for_exact);
+ bool xk = etype->klass_is_exact();
+ const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
+ // We used to pass NotNull in here, asserting that the sub-arrays
+ // are all not-null. This is not true in generally, as code can
+ // slam NULLs down in the subarrays.
+ const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::BotPTR, arr0, klass, xk, 0);
+ return arr;
+ } else if (klass->is_type_array_klass()) {
+ // Element is an typeArray
+ const Type* etype = get_const_basic_type(klass->as_type_array_klass()->element_type());
+ const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
+ // We used to pass NotNull in here, asserting that the array pointer
+ // is not-null. That was not true in general.
+ const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::BotPTR, arr0, klass, true, 0);
+ return arr;
+ } else {
+ ShouldNotReachHere();
+ return NULL;
+ }
+}
+
+//------------------------------make_from_constant-----------------------------
+// Make a java pointer from an oop constant
+const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o) {
+ if (o->is_method_data() || o->is_method()) {
+ // Treat much like a typeArray of bytes, like below, but fake the type...
+ assert(o->has_encoding(), "must be a perm space object");
+ const Type* etype = (Type*)get_const_basic_type(T_BYTE);
+ const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
+ ciKlass *klass = ciTypeArrayKlass::make((BasicType) T_BYTE);
+ assert(o->has_encoding(), "method data oops should be tenured");
+ const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+ return arr;
+ } else {
+ assert(o->is_java_object(), "must be java language object");
+ assert(!o->is_null_object(), "null object not yet handled here.");
+ ciKlass *klass = o->klass();
+ if (klass->is_instance_klass()) {
+ // Element is an instance
+ if (!o->has_encoding()) { // not a perm-space constant
+ // %%% remove this restriction by rewriting non-perm ConPNodes in a later phase
+ return TypeInstPtr::make(TypePtr::NotNull, klass, true, NULL, 0);
+ }
+ return TypeInstPtr::make(o);
+ } else if (klass->is_obj_array_klass()) {
+ // Element is an object array. Recursively call ourself.
+ const Type *etype =
+ TypeOopPtr::make_from_klass_raw(klass->as_obj_array_klass()->element_klass());
+ const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
+ // We used to pass NotNull in here, asserting that the sub-arrays
+ // are all not-null. This is not true in generally, as code can
+ // slam NULLs down in the subarrays.
+ if (!o->has_encoding()) { // not a perm-space constant
+ // %%% remove this restriction by rewriting non-perm ConPNodes in a later phase
+ return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
+ }
+ const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+ return arr;
+ } else if (klass->is_type_array_klass()) {
+ // Element is an typeArray
+ const Type* etype =
+ (Type*)get_const_basic_type(klass->as_type_array_klass()->element_type());
+ const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
+ // We used to pass NotNull in here, asserting that the array pointer
+ // is not-null. That was not true in general.
+ if (!o->has_encoding()) { // not a perm-space constant
+ // %%% remove this restriction by rewriting non-perm ConPNodes in a later phase
+ return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
+ }
+ const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+ return arr;
+ }
+ }
+
+ ShouldNotReachHere();
+ return NULL;
+}
+
+//------------------------------get_con----------------------------------------
+intptr_t TypeOopPtr::get_con() const {
+ assert( _ptr == Null || _ptr == Constant, "" );
+ assert( _offset >= 0, "" );
+
+ if (_offset != 0) {
+ // After being ported to the compiler interface, the compiler no longer
+ // directly manipulates the addresses of oops. Rather, it only has a pointer
+ // to a handle at compile time. This handle is embedded in the generated
+ // code and dereferenced at the time the nmethod is made. Until that time,
+ // it is not reasonable to do arithmetic with the addresses of oops (we don't
+ // have access to the addresses!). This does not seem to currently happen,
+ // but this assertion here is to help prevent its occurrance.
+ tty->print_cr("Found oop constant with non-zero offset");
+ ShouldNotReachHere();
+ }
+
+ return (intptr_t)const_oop()->encoding();
+}
+
+
+//-----------------------------filter------------------------------------------
+// Do not allow interface-vs.-noninterface joins to collapse to top.
+const Type *TypeOopPtr::filter( const Type *kills ) const {
+
+ const Type* ft = join(kills);
+ const TypeInstPtr* ftip = ft->isa_instptr();
+ const TypeInstPtr* ktip = kills->isa_instptr();
+
+ if (ft->empty()) {
+ // Check for evil case of 'this' being a class and 'kills' expecting an
+ // interface. This can happen because the bytecodes do not contain
+ // enough type info to distinguish a Java-level interface variable
+ // from a Java-level object variable. If we meet 2 classes which
+ // both implement interface I, but their meet is at 'j/l/O' which
+ // doesn't implement I, we have no way to tell if the result should
+ // be 'I' or 'j/l/O'. Thus we'll pick 'j/l/O'. If this then flows
+ // into a Phi which "knows" it's an Interface type we'll have to
+ // uplift the type.
+ if (!empty() && ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface())
+ return kills; // Uplift to interface
+
+ return Type::TOP; // Canonical empty value
+ }
+
+ // If we have an interface-typed Phi or cast and we narrow to a class type,
+ // the join should report back the class. However, if we have a J/L/Object
+ // class-typed Phi and an interface flows in, it's possible that the meet &
+ // join report an interface back out. This isn't possible but happens
+ // because the type system doesn't interact well with interfaces.
+ if (ftip != NULL && ktip != NULL &&
+ ftip->is_loaded() && ftip->klass()->is_interface() &&
+ ktip->is_loaded() && !ktip->klass()->is_interface()) {
+ // Happens in a CTW of rt.jar, 320-341, no extra flags
+ return ktip->cast_to_ptr_type(ftip->ptr());
+ }
+
+ return ft;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeOopPtr::eq( const Type *t ) const {
+ const TypeOopPtr *a = (const TypeOopPtr*)t;
+ if (_klass_is_exact != a->_klass_is_exact ||
+ _instance_id != a->_instance_id) return false;
+ ciObject* one = const_oop();
+ ciObject* two = a->const_oop();
+ if (one == NULL || two == NULL) {
+ return (one == two) && TypePtr::eq(t);
+ } else {
+ return one->equals(two) && TypePtr::eq(t);
+ }
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeOopPtr::hash(void) const {
+ return
+ (const_oop() ? const_oop()->hash() : 0) +
+ _klass_is_exact +
+ _instance_id +
+ TypePtr::hash();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeOopPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+ st->print("oopptr:%s", ptr_msg[_ptr]);
+ if( _klass_is_exact ) st->print(":exact");
+ if( const_oop() ) st->print(INTPTR_FORMAT, const_oop());
+ switch( _offset ) {
+ case OffsetTop: st->print("+top"); break;
+ case OffsetBot: st->print("+any"); break;
+ case 0: break;
+ default: st->print("+%d",_offset); break;
+ }
+ if (_instance_id != UNKNOWN_INSTANCE)
+ st->print(",iid=%d",_instance_id);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants
+bool TypeOopPtr::singleton(void) const {
+ // detune optimizer to not generate constant oop + constant offset as a constant!
+ // TopPTR, Null, AnyNull, Constant are all singletons
+ return (_offset == 0) && !below_centerline(_ptr);
+}
+
+//------------------------------xadd_offset------------------------------------
+int TypeOopPtr::xadd_offset( int offset ) const {
+ // Adding to 'TOP' offset? Return 'TOP'!
+ if( _offset == OffsetTop || offset == OffsetTop ) return OffsetTop;
+ // Adding to 'BOTTOM' offset? Return 'BOTTOM'!
+ if( _offset == OffsetBot || offset == OffsetBot ) return OffsetBot;
+
+ // assert( _offset >= 0 && _offset+offset >= 0, "" );
+ // It is possible to construct a negative offset during PhaseCCP
+
+ return _offset+offset; // Sum valid offsets
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeOopPtr::add_offset( int offset ) const {
+ return make( _ptr, xadd_offset(offset) );
+}
+
+int TypeOopPtr::meet_instance(int iid) const {
+ if (iid == 0) {
+ return (_instance_id < 0) ? _instance_id : UNKNOWN_INSTANCE;
+ } else if (_instance_id == UNKNOWN_INSTANCE) {
+ return (iid < 0) ? iid : UNKNOWN_INSTANCE;
+ } else {
+ return (_instance_id == iid) ? iid : UNKNOWN_INSTANCE;
+ }
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeInstPtr *TypeInstPtr::NOTNULL;
+const TypeInstPtr *TypeInstPtr::BOTTOM;
+const TypeInstPtr *TypeInstPtr::MIRROR;
+const TypeInstPtr *TypeInstPtr::MARK;
+const TypeInstPtr *TypeInstPtr::KLASS;
+
+//------------------------------TypeInstPtr-------------------------------------
+TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id)
+ : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id), _name(k->name()) {
+ assert(k != NULL &&
+ (k->is_loaded() || o == NULL),
+ "cannot have constants with non-loaded klass");
+};
+
+//------------------------------make-------------------------------------------
+const TypeInstPtr *TypeInstPtr::make(PTR ptr,
+ ciKlass* k,
+ bool xk,
+ ciObject* o,
+ int offset,
+ int instance_id) {
+ assert( !k->is_loaded() || k->is_instance_klass() ||
+ k->is_method_klass(), "Must be for instance or method");
+ // Either const_oop() is NULL or else ptr is Constant
+ assert( (!o && ptr != Constant) || (o && ptr == Constant),
+ "constant pointers must have a value supplied" );
+ // Ptr is never Null
+ assert( ptr != Null, "NULL pointers are not typed" );
+
+ if (instance_id != UNKNOWN_INSTANCE)
+ xk = true; // instances are always exactly typed
+ if (!UseExactTypes) xk = false;
+ if (ptr == Constant) {
+ // Note: This case includes meta-object constants, such as methods.
+ xk = true;
+ } else if (k->is_loaded()) {
+ ciInstanceKlass* ik = k->as_instance_klass();
+ if (!xk && ik->is_final()) xk = true; // no inexact final klass
+ if (xk && ik->is_interface()) xk = false; // no exact interface
+ }
+
+ // Now hash this baby
+ TypeInstPtr *result =
+ (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id))->hashcons();
+
+ return result;
+}
+
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeInstPtr::cast_to_ptr_type(PTR ptr) const {
+ if( ptr == _ptr ) return this;
+ // Reconstruct _sig info here since not a problem with later lazy
+ // construction, _sig will show up on demand.
+ return make(ptr, klass(), klass_is_exact(), const_oop(), _offset);
+}
+
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeInstPtr::cast_to_exactness(bool klass_is_exact) const {
+ if( klass_is_exact == _klass_is_exact ) return this;
+ if (!UseExactTypes) return this;
+ if (!_klass->is_loaded()) return this;
+ ciInstanceKlass* ik = _klass->as_instance_klass();
+ if( (ik->is_final() || _const_oop) ) return this; // cannot clear xk
+ if( ik->is_interface() ) return this; // cannot set xk
+ return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id);
+}
+
+//-----------------------------cast_to_instance-------------------------------
+const TypeOopPtr *TypeInstPtr::cast_to_instance(int instance_id) const {
+ if( instance_id == _instance_id) return this;
+ bool exact = (instance_id == UNKNOWN_INSTANCE) ? _klass_is_exact : true;
+
+ return make(ptr(), klass(), exact, const_oop(), _offset, instance_id);
+}
+
+//------------------------------xmeet_unloaded---------------------------------
+// Compute the MEET of two InstPtrs when at least one is unloaded.
+// Assume classes are different since called after check for same name/class-loader
+const TypeInstPtr *TypeInstPtr::xmeet_unloaded(const TypeInstPtr *tinst) const {
+ int off = meet_offset(tinst->offset());
+ PTR ptr = meet_ptr(tinst->ptr());
+
+ const TypeInstPtr *loaded = is_loaded() ? this : tinst;
+ const TypeInstPtr *unloaded = is_loaded() ? tinst : this;
+ if( loaded->klass()->equals(ciEnv::current()->Object_klass()) ) {
+ //
+ // Meet unloaded class with java/lang/Object
+ //
+ // Meet
+ // | Unloaded Class
+ // Object | TOP | AnyNull | Constant | NotNull | BOTTOM |
+ // ===================================================================
+ // TOP | ..........................Unloaded......................|
+ // AnyNull | U-AN |................Unloaded......................|
+ // Constant | ... O-NN .................................. | O-BOT |
+ // NotNull | ... O-NN .................................. | O-BOT |
+ // BOTTOM | ........................Object-BOTTOM ..................|
+ //
+ assert(loaded->ptr() != TypePtr::Null, "insanity check");
+ //
+ if( loaded->ptr() == TypePtr::TopPTR ) { return unloaded; }
+ else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make( ptr, unloaded->klass() ); }
+ else if (loaded->ptr() == TypePtr::BotPTR ) { return TypeInstPtr::BOTTOM; }
+ else if (loaded->ptr() == TypePtr::Constant || loaded->ptr() == TypePtr::NotNull) {
+ if (unloaded->ptr() == TypePtr::BotPTR ) { return TypeInstPtr::BOTTOM; }
+ else { return TypeInstPtr::NOTNULL; }
+ }
+ else if( unloaded->ptr() == TypePtr::TopPTR ) { return unloaded; }
+
+ return unloaded->cast_to_ptr_type(TypePtr::AnyNull)->is_instptr();
+ }
+
+ // Both are unloaded, not the same class, not Object
+ // Or meet unloaded with a different loaded class, not java/lang/Object
+ if( ptr != TypePtr::BotPTR ) {
+ return TypeInstPtr::NOTNULL;
+ }
+ return TypeInstPtr::BOTTOM;
+}
+
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeInstPtr::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is Pointer
+ switch (t->base()) { // switch on original type
+
+ case Int: // Mixing ints & oops happens when javac
+ case Long: // reuses local variables
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+ case Top:
+ return this;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case RawPtr: return TypePtr::BOTTOM;
+
+ case AryPtr: { // All arrays inherit from Object class
+ const TypeAryPtr *tp = t->is_aryptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ int iid = meet_instance(tp->instance_id());
+ switch (ptr) {
+ case TopPTR:
+ case AnyNull: // Fall 'down' to dual of object klass
+ if (klass()->equals(ciEnv::current()->Object_klass())) {
+ return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, iid);
+ } else {
+ // cannot subclass, so the meet has to fall badly below the centerline
+ ptr = NotNull;
+ return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, iid);
+ }
+ case Constant:
+ case NotNull:
+ case BotPTR: // Fall down to object klass
+ // LCA is object_klass, but if we subclass from the top we can do better
+ if( above_centerline(_ptr) ) { // if( _ptr == TopPTR || _ptr == AnyNull )
+ // If 'this' (InstPtr) is above the centerline and it is Object class
+ // then we can subclass in the Java class heirarchy.
+ if (klass()->equals(ciEnv::current()->Object_klass())) {
+ // that is, tp's array type is a subtype of my klass
+ return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, iid);
+ }
+ }
+ // The other case cannot happen, since I cannot be a subtype of an array.
+ // The meet falls down to Object class below centerline.
+ if( ptr == Constant )
+ ptr = NotNull;
+ return make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, iid );
+ default: typerr(t);
+ }
+ }
+
+ case OopPtr: { // Meeting to OopPtrs
+ // Found a OopPtr type vs self-InstPtr type
+ const TypePtr *tp = t->is_oopptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ switch (tp->ptr()) {
+ case TopPTR:
+ case AnyNull:
+ return make(ptr, klass(), klass_is_exact(),
+ (ptr == Constant ? const_oop() : NULL), offset);
+ case NotNull:
+ case BotPTR:
+ return TypeOopPtr::make(ptr, offset);
+ default: typerr(t);
+ }
+ }
+
+ case AnyPtr: { // Meeting to AnyPtrs
+ // Found an AnyPtr type vs self-InstPtr type
+ const TypePtr *tp = t->is_ptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ switch (tp->ptr()) {
+ case Null:
+ if( ptr == Null ) return TypePtr::make( AnyPtr, ptr, offset );
+ case TopPTR:
+ case AnyNull:
+ return make( ptr, klass(), klass_is_exact(),
+ (ptr == Constant ? const_oop() : NULL), offset );
+ case NotNull:
+ case BotPTR:
+ return TypePtr::make( AnyPtr, ptr, offset );
+ default: typerr(t);
+ }
+ }
+
+ /*
+ A-top }
+ / | \ } Tops
+ B-top A-any C-top }
+ | / | \ | } Any-nulls
+ B-any | C-any }
+ | | |
+ B-con A-con C-con } constants; not comparable across classes
+ | | |
+ B-not | C-not }
+ | \ | / | } not-nulls
+ B-bot A-not C-bot }
+ \ | / } Bottoms
+ A-bot }
+ */
+
+ case InstPtr: { // Meeting 2 Oops?
+ // Found an InstPtr sub-type vs self-InstPtr type
+ const TypeInstPtr *tinst = t->is_instptr();
+ int off = meet_offset( tinst->offset() );
+ PTR ptr = meet_ptr( tinst->ptr() );
+ int instance_id = meet_instance(tinst->instance_id());
+
+ // Check for easy case; klasses are equal (and perhaps not loaded!)
+ // If we have constants, then we created oops so classes are loaded
+ // and we can handle the constants further down. This case handles
+ // both-not-loaded or both-loaded classes
+ if (ptr != Constant && klass()->equals(tinst->klass()) && klass_is_exact() == tinst->klass_is_exact()) {
+ return make( ptr, klass(), klass_is_exact(), NULL, off, instance_id );
+ }
+
+ // Classes require inspection in the Java klass hierarchy. Must be loaded.
+ ciKlass* tinst_klass = tinst->klass();
+ ciKlass* this_klass = this->klass();
+ bool tinst_xk = tinst->klass_is_exact();
+ bool this_xk = this->klass_is_exact();
+ if (!tinst_klass->is_loaded() || !this_klass->is_loaded() ) {
+ // One of these classes has not been loaded
+ const TypeInstPtr *unloaded_meet = xmeet_unloaded(tinst);
+#ifndef PRODUCT
+ if( PrintOpto && Verbose ) {
+ tty->print("meet of unloaded classes resulted in: "); unloaded_meet->dump(); tty->cr();
+ tty->print(" this == "); this->dump(); tty->cr();
+ tty->print(" tinst == "); tinst->dump(); tty->cr();
+ }
+#endif
+ return unloaded_meet;
+ }
+
+ // Handle mixing oops and interfaces first.
+ if( this_klass->is_interface() && !tinst_klass->is_interface() ) {
+ ciKlass *tmp = tinst_klass; // Swap interface around
+ tinst_klass = this_klass;
+ this_klass = tmp;
+ bool tmp2 = tinst_xk;
+ tinst_xk = this_xk;
+ this_xk = tmp2;
+ }
+ if (tinst_klass->is_interface() &&
+ !(this_klass->is_interface() ||
+ // Treat java/lang/Object as an honorary interface,
+ // because we need a bottom for the interface hierarchy.
+ this_klass == ciEnv::current()->Object_klass())) {
+ // Oop meets interface!
+
+ // See if the oop subtypes (implements) interface.
+ ciKlass *k;
+ bool xk;
+ if( this_klass->is_subtype_of( tinst_klass ) ) {
+ // Oop indeed subtypes. Now keep oop or interface depending
+ // on whether we are both above the centerline or either is
+ // below the centerline. If we are on the centerline
+ // (e.g., Constant vs. AnyNull interface), use the constant.
+ k = below_centerline(ptr) ? tinst_klass : this_klass;
+ // If we are keeping this_klass, keep its exactness too.
+ xk = below_centerline(ptr) ? tinst_xk : this_xk;
+ } else { // Does not implement, fall to Object
+ // Oop does not implement interface, so mixing falls to Object
+ // just like the verifier does (if both are above the
+ // centerline fall to interface)
+ k = above_centerline(ptr) ? tinst_klass : ciEnv::current()->Object_klass();
+ xk = above_centerline(ptr) ? tinst_xk : false;
+ // Watch out for Constant vs. AnyNull interface.
+ if (ptr == Constant) ptr = NotNull; // forget it was a constant
+ }
+ ciObject* o = NULL; // the Constant value, if any
+ if (ptr == Constant) {
+ // Find out which constant.
+ o = (this_klass == klass()) ? const_oop() : tinst->const_oop();
+ }
+ return make( ptr, k, xk, o, off );
+ }
+
+ // Either oop vs oop or interface vs interface or interface vs Object
+
+ // !!! Here's how the symmetry requirement breaks down into invariants:
+ // If we split one up & one down AND they subtype, take the down man.
+ // If we split one up & one down AND they do NOT subtype, "fall hard".
+ // If both are up and they subtype, take the subtype class.
+ // If both are up and they do NOT subtype, "fall hard".
+ // If both are down and they subtype, take the supertype class.
+ // If both are down and they do NOT subtype, "fall hard".
+ // Constants treated as down.
+
+ // Now, reorder the above list; observe that both-down+subtype is also
+ // "fall hard"; "fall hard" becomes the default case:
+ // If we split one up & one down AND they subtype, take the down man.
+ // If both are up and they subtype, take the subtype class.
+
+ // If both are down and they subtype, "fall hard".
+ // If both are down and they do NOT subtype, "fall hard".
+ // If both are up and they do NOT subtype, "fall hard".
+ // If we split one up & one down AND they do NOT subtype, "fall hard".
+
+ // If a proper subtype is exact, and we return it, we return it exactly.
+ // If a proper supertype is exact, there can be no subtyping relationship!
+ // If both types are equal to the subtype, exactness is and-ed below the
+ // centerline and or-ed above it. (N.B. Constants are always exact.)
+
+ // Check for subtyping:
+ ciKlass *subtype = NULL;
+ bool subtype_exact = false;
+ if( tinst_klass->equals(this_klass) ) {
+ subtype = this_klass;
+ subtype_exact = below_centerline(ptr) ? (this_xk & tinst_xk) : (this_xk | tinst_xk);
+ } else if( !tinst_xk && this_klass->is_subtype_of( tinst_klass ) ) {
+ subtype = this_klass; // Pick subtyping class
+ subtype_exact = this_xk;
+ } else if( !this_xk && tinst_klass->is_subtype_of( this_klass ) ) {
+ subtype = tinst_klass; // Pick subtyping class
+ subtype_exact = tinst_xk;
+ }
+
+ if( subtype ) {
+ if( above_centerline(ptr) ) { // both are up?
+ this_klass = tinst_klass = subtype;
+ this_xk = tinst_xk = subtype_exact;
+ } else if( above_centerline(this ->_ptr) && !above_centerline(tinst->_ptr) ) {
+ this_klass = tinst_klass; // tinst is down; keep down man
+ this_xk = tinst_xk;
+ } else if( above_centerline(tinst->_ptr) && !above_centerline(this ->_ptr) ) {
+ tinst_klass = this_klass; // this is down; keep down man
+ tinst_xk = this_xk;
+ } else {
+ this_xk = subtype_exact; // either they are equal, or we'll do an LCA
+ }
+ }
+
+ // Check for classes now being equal
+ if (tinst_klass->equals(this_klass)) {
+ // If the klasses are equal, the constants may still differ. Fall to
+ // NotNull if they do (neither constant is NULL; that is a special case
+ // handled elsewhere).
+ ciObject* o = NULL; // Assume not constant when done
+ ciObject* this_oop = const_oop();
+ ciObject* tinst_oop = tinst->const_oop();
+ if( ptr == Constant ) {
+ if (this_oop != NULL && tinst_oop != NULL &&
+ this_oop->equals(tinst_oop) )
+ o = this_oop;
+ else if (above_centerline(this ->_ptr))
+ o = tinst_oop;
+ else if (above_centerline(tinst ->_ptr))
+ o = this_oop;
+ else
+ ptr = NotNull;
+ }
+ return make( ptr, this_klass, this_xk, o, off, instance_id );
+ } // Else classes are not equal
+
+ // Since klasses are different, we require a LCA in the Java
+ // class hierarchy - which means we have to fall to at least NotNull.
+ if( ptr == TopPTR || ptr == AnyNull || ptr == Constant )
+ ptr = NotNull;
+
+ // Now we find the LCA of Java classes
+ ciKlass* k = this_klass->least_common_ancestor(tinst_klass);
+ return make( ptr, k, false, NULL, off );
+ } // End of case InstPtr
+
+ case KlassPtr:
+ return TypeInstPtr::BOTTOM;
+
+ } // End of switch
+ return this; // Return the double constant
+}
+
+
+//------------------------java_mirror_type--------------------------------------
+ciType* TypeInstPtr::java_mirror_type() const {
+ // must be a singleton type
+ if( const_oop() == NULL ) return NULL;
+
+ // must be of type java.lang.Class
+ if( klass() != ciEnv::current()->Class_klass() ) return NULL;
+
+ return const_oop()->as_instance()->java_mirror_type();
+}
+
+
+//------------------------------xdual------------------------------------------
+// Dual: do NOT dual on klasses. This means I do NOT understand the Java
+// inheritence mechanism.
+const Type *TypeInstPtr::xdual() const {
+ return new TypeInstPtr( dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance() );
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeInstPtr::eq( const Type *t ) const {
+ const TypeInstPtr *p = t->is_instptr();
+ return
+ klass()->equals(p->klass()) &&
+ TypeOopPtr::eq(p); // Check sub-type stuff
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeInstPtr::hash(void) const {
+ int hash = klass()->hash() + TypeOopPtr::hash();
+ return hash;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump oop Type
+#ifndef PRODUCT
+void TypeInstPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+ // Print the name of the klass.
+ klass()->print_name_on(st);
+
+ switch( _ptr ) {
+ case Constant:
+ // TO DO: Make CI print the hex address of the underlying oop.
+ if (WizardMode || Verbose) {
+ const_oop()->print_oop(st);
+ }
+ case BotPTR:
+ if (!WizardMode && !Verbose) {
+ if( _klass_is_exact ) st->print(":exact");
+ break;
+ }
+ case TopPTR:
+ case AnyNull:
+ case NotNull:
+ st->print(":%s", ptr_msg[_ptr]);
+ if( _klass_is_exact ) st->print(":exact");
+ break;
+ }
+
+ if( _offset ) { // Dump offset, if any
+ if( _offset == OffsetBot ) st->print("+any");
+ else if( _offset == OffsetTop ) st->print("+unknown");
+ else st->print("+%d", _offset);
+ }
+
+ st->print(" *");
+ if (_instance_id != UNKNOWN_INSTANCE)
+ st->print(",iid=%d",_instance_id);
+}
+#endif
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeInstPtr::add_offset( int offset ) const {
+ return make( _ptr, klass(), klass_is_exact(), const_oop(), xadd_offset(offset), _instance_id );
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeAryPtr *TypeAryPtr::RANGE;
+const TypeAryPtr *TypeAryPtr::OOPS;
+const TypeAryPtr *TypeAryPtr::BYTES;
+const TypeAryPtr *TypeAryPtr::SHORTS;
+const TypeAryPtr *TypeAryPtr::CHARS;
+const TypeAryPtr *TypeAryPtr::INTS;
+const TypeAryPtr *TypeAryPtr::LONGS;
+const TypeAryPtr *TypeAryPtr::FLOATS;
+const TypeAryPtr *TypeAryPtr::DOUBLES;
+
+//------------------------------make-------------------------------------------
+const TypeAryPtr *TypeAryPtr::make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) {
+ assert(!(k == NULL && ary->_elem->isa_int()),
+ "integral arrays must be pre-equipped with a class");
+ if (!xk) xk = ary->ary_must_be_exact();
+ if (instance_id != UNKNOWN_INSTANCE)
+ xk = true; // instances are always exactly typed
+ if (!UseExactTypes) xk = (ptr == Constant);
+ return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id))->hashcons();
+}
+
+//------------------------------make-------------------------------------------
+const TypeAryPtr *TypeAryPtr::make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) {
+ assert(!(k == NULL && ary->_elem->isa_int()),
+ "integral arrays must be pre-equipped with a class");
+ assert( (ptr==Constant && o) || (ptr!=Constant && !o), "" );
+ if (!xk) xk = (o != NULL) || ary->ary_must_be_exact();
+ if (instance_id != UNKNOWN_INSTANCE)
+ xk = true; // instances are always exactly typed
+ if (!UseExactTypes) xk = (ptr == Constant);
+ return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id))->hashcons();
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeAryPtr::cast_to_ptr_type(PTR ptr) const {
+ if( ptr == _ptr ) return this;
+ return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset);
+}
+
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeAryPtr::cast_to_exactness(bool klass_is_exact) const {
+ if( klass_is_exact == _klass_is_exact ) return this;
+ if (!UseExactTypes) return this;
+ if (_ary->ary_must_be_exact()) return this; // cannot clear xk
+ return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id);
+}
+
+//-----------------------------cast_to_instance-------------------------------
+const TypeOopPtr *TypeAryPtr::cast_to_instance(int instance_id) const {
+ if( instance_id == _instance_id) return this;
+ bool exact = (instance_id == UNKNOWN_INSTANCE) ? _klass_is_exact : true;
+ return make(ptr(), const_oop(), _ary, klass(), exact, _offset, instance_id);
+}
+
+//-----------------------------narrow_size_type-------------------------------
+// Local cache for arrayOopDesc::max_array_length(etype),
+// which is kind of slow (and cached elsewhere by other users).
+static jint max_array_length_cache[T_CONFLICT+1];
+static jint max_array_length(BasicType etype) {
+ jint& cache = max_array_length_cache[etype];
+ jint res = cache;
+ if (res == 0) {
+ switch (etype) {
+ case T_CONFLICT:
+ case T_ILLEGAL:
+ case T_VOID:
+ etype = T_BYTE; // will produce conservatively high value
+ }
+ cache = res = arrayOopDesc::max_array_length(etype);
+ }
+ return res;
+}
+
+// Narrow the given size type to the index range for the given array base type.
+// Return NULL if the resulting int type becomes empty.
+const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) {
+ jint hi = size->_hi;
+ jint lo = size->_lo;
+ jint min_lo = 0;
+ jint max_hi = max_array_length(elem);
+ //if (index_not_size) --max_hi; // type of a valid array index, FTR
+ bool chg = false;
+ if (lo < min_lo) { lo = min_lo; chg = true; }
+ if (hi > max_hi) { hi = max_hi; chg = true; }
+ if (lo > hi)
+ return NULL;
+ if (!chg)
+ return size;
+ return TypeInt::make(lo, hi, Type::WidenMin);
+}
+
+//-------------------------------cast_to_size----------------------------------
+const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const {
+ assert(new_size != NULL, "");
+ new_size = narrow_size_type(new_size, elem()->basic_type());
+ if (new_size == NULL) // Negative length arrays will produce weird
+ new_size = TypeInt::ZERO; // intermediate dead fast-path goo
+ if (new_size == size()) return this;
+ const TypeAry* new_ary = TypeAry::make(elem(), new_size);
+ return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset);
+}
+
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeAryPtr::eq( const Type *t ) const {
+ const TypeAryPtr *p = t->is_aryptr();
+ return
+ _ary == p->_ary && // Check array
+ TypeOopPtr::eq(p); // Check sub-parts
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeAryPtr::hash(void) const {
+ return (intptr_t)_ary + TypeOopPtr::hash();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeAryPtr::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+ // Current "this->_base" is Pointer
+ switch (t->base()) { // switch on original type
+
+ // Mixing ints & oops happens when javac reuses local variables
+ case Int:
+ case Long:
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+ case Top:
+ return this;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case OopPtr: { // Meeting to OopPtrs
+ // Found a OopPtr type vs self-AryPtr type
+ const TypePtr *tp = t->is_oopptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ switch (tp->ptr()) {
+ case TopPTR:
+ case AnyNull:
+ return make(ptr, (ptr == Constant ? const_oop() : NULL), _ary, _klass, _klass_is_exact, offset);
+ case BotPTR:
+ case NotNull:
+ return TypeOopPtr::make(ptr, offset);
+ default: ShouldNotReachHere();
+ }
+ }
+
+ case AnyPtr: { // Meeting two AnyPtrs
+ // Found an AnyPtr type vs self-AryPtr type
+ const TypePtr *tp = t->is_ptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ switch (tp->ptr()) {
+ case TopPTR:
+ return this;
+ case BotPTR:
+ case NotNull:
+ return TypePtr::make(AnyPtr, ptr, offset);
+ case Null:
+ if( ptr == Null ) return TypePtr::make(AnyPtr, ptr, offset);
+ case AnyNull:
+ return make( ptr, (ptr == Constant ? const_oop() : NULL), _ary, _klass, _klass_is_exact, offset );
+ default: ShouldNotReachHere();
+ }
+ }
+
+ case RawPtr: return TypePtr::BOTTOM;
+
+ case AryPtr: { // Meeting 2 references?
+ const TypeAryPtr *tap = t->is_aryptr();
+ int off = meet_offset(tap->offset());
+ const TypeAry *tary = _ary->meet(tap->_ary)->is_ary();
+ PTR ptr = meet_ptr(tap->ptr());
+ int iid = meet_instance(tap->instance_id());
+ ciKlass* lazy_klass = NULL;
+ if (tary->_elem->isa_int()) {
+ // Integral array element types have irrelevant lattice relations.
+ // It is the klass that determines array layout, not the element type.
+ if (_klass == NULL)
+ lazy_klass = tap->_klass;
+ else if (tap->_klass == NULL || tap->_klass == _klass) {
+ lazy_klass = _klass;
+ } else {
+ // Something like byte[int+] meets char[int+].
+ // This must fall to bottom, not (int[-128..65535])[int+].
+ tary = TypeAry::make(Type::BOTTOM, tary->_size);
+ }
+ }
+ bool xk;
+ switch (tap->ptr()) {
+ case AnyNull:
+ case TopPTR:
+ // Compute new klass on demand, do not use tap->_klass
+ xk = (tap->_klass_is_exact | this->_klass_is_exact);
+ return make( ptr, const_oop(), tary, lazy_klass, xk, off );
+ case Constant: {
+ ciObject* o = const_oop();
+ if( _ptr == Constant ) {
+ if( tap->const_oop() != NULL && !o->equals(tap->const_oop()) ) {
+ ptr = NotNull;
+ o = NULL;
+ }
+ } else if( above_centerline(_ptr) ) {
+ o = tap->const_oop();
+ }
+ xk = true;
+ return TypeAryPtr::make( ptr, o, tary, tap->_klass, xk, off );
+ }
+ case NotNull:
+ case BotPTR:
+ // Compute new klass on demand, do not use tap->_klass
+ if (above_centerline(this->_ptr))
+ xk = tap->_klass_is_exact;
+ else if (above_centerline(tap->_ptr))
+ xk = this->_klass_is_exact;
+ else xk = (tap->_klass_is_exact & this->_klass_is_exact) &&
+ (klass() == tap->klass()); // Only precise for identical arrays
+ return TypeAryPtr::make( ptr, NULL, tary, lazy_klass, xk, off, iid );
+ default: ShouldNotReachHere();
+ }
+ }
+
+ // All arrays inherit from Object class
+ case InstPtr: {
+ const TypeInstPtr *tp = t->is_instptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ int iid = meet_instance(tp->instance_id());
+ switch (ptr) {
+ case TopPTR:
+ case AnyNull: // Fall 'down' to dual of object klass
+ if( tp->klass()->equals(ciEnv::current()->Object_klass()) ) {
+ return TypeAryPtr::make( ptr, _ary, _klass, _klass_is_exact, offset, iid );
+ } else {
+ // cannot subclass, so the meet has to fall badly below the centerline
+ ptr = NotNull;
+ return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL,offset, iid);
+ }
+ case Constant:
+ case NotNull:
+ case BotPTR: // Fall down to object klass
+ // LCA is object_klass, but if we subclass from the top we can do better
+ if (above_centerline(tp->ptr())) {
+ // If 'tp' is above the centerline and it is Object class
+ // then we can subclass in the Java class heirarchy.
+ if( tp->klass()->equals(ciEnv::current()->Object_klass()) ) {
+ // that is, my array type is a subtype of 'tp' klass
+ return make( ptr, _ary, _klass, _klass_is_exact, offset, iid );
+ }
+ }
+ // The other case cannot happen, since t cannot be a subtype of an array.
+ // The meet falls down to Object class below centerline.
+ if( ptr == Constant )
+ ptr = NotNull;
+ return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL,offset, iid);
+ default: typerr(t);
+ }
+ }
+
+ case KlassPtr:
+ return TypeInstPtr::BOTTOM;
+
+ }
+ return this; // Lint noise
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeAryPtr::xdual() const {
+ return new TypeAryPtr( dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance() );
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeAryPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+ _ary->dump2(d,depth,st);
+ switch( _ptr ) {
+ case Constant:
+ const_oop()->print(st);
+ break;
+ case BotPTR:
+ if (!WizardMode && !Verbose) {
+ if( _klass_is_exact ) st->print(":exact");
+ break;
+ }
+ case TopPTR:
+ case AnyNull:
+ case NotNull:
+ st->print(":%s", ptr_msg[_ptr]);
+ if( _klass_is_exact ) st->print(":exact");
+ break;
+ }
+
+ st->print("*");
+ if (_instance_id != UNKNOWN_INSTANCE)
+ st->print(",iid=%d",_instance_id);
+ if( !_offset ) return;
+ if( _offset == OffsetTop ) st->print("+undefined");
+ else if( _offset == OffsetBot ) st->print("+any");
+ else if( _offset < 12 ) st->print("+%d",_offset);
+ else st->print("[%d]", (_offset-12)/4 );
+}
+#endif
+
+bool TypeAryPtr::empty(void) const {
+ if (_ary->empty()) return true;
+ return TypeOopPtr::empty();
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeAryPtr::add_offset( int offset ) const {
+ return make( _ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id );
+}
+
+
+//=============================================================================
+// Convenience common pre-built types.
+
+// Not-null object klass or below
+const TypeKlassPtr *TypeKlassPtr::OBJECT;
+const TypeKlassPtr *TypeKlassPtr::OBJECT_OR_NULL;
+
+//------------------------------TypeKlasPtr------------------------------------
+TypeKlassPtr::TypeKlassPtr( PTR ptr, ciKlass* klass, int offset )
+ : TypeOopPtr(KlassPtr, ptr, klass, (ptr==Constant), (ptr==Constant ? klass : NULL), offset, 0) {
+}
+
+//------------------------------make-------------------------------------------
+// ptr to klass 'k', if Constant, or possibly to a sub-klass if not a Constant
+const TypeKlassPtr *TypeKlassPtr::make( PTR ptr, ciKlass* k, int offset ) {
+ assert( k != NULL, "Expect a non-NULL klass");
+ assert(k->is_instance_klass() || k->is_array_klass() ||
+ k->is_method_klass(), "Incorrect type of klass oop");
+ TypeKlassPtr *r =
+ (TypeKlassPtr*)(new TypeKlassPtr(ptr, k, offset))->hashcons();
+
+ return r;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeKlassPtr::eq( const Type *t ) const {
+ const TypeKlassPtr *p = t->is_klassptr();
+ return
+ klass()->equals(p->klass()) &&
+ TypeOopPtr::eq(p);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeKlassPtr::hash(void) const {
+ return klass()->hash() + TypeOopPtr::hash();
+}
+
+
+//------------------------------klass------------------------------------------
+// Return the defining klass for this class
+ciKlass* TypeAryPtr::klass() const {
+ if( _klass ) return _klass; // Return cached value, if possible
+
+ // Oops, need to compute _klass and cache it
+ ciKlass* k_ary = NULL;
+ const TypeInstPtr *tinst;
+ const TypeAryPtr *tary;
+ // Get element klass
+ if ((tinst = elem()->isa_instptr()) != NULL) {
+ // Compute array klass from element klass
+ k_ary = ciObjArrayKlass::make(tinst->klass());
+ } else if ((tary = elem()->isa_aryptr()) != NULL) {
+ // Compute array klass from element klass
+ ciKlass* k_elem = tary->klass();
+ // If element type is something like bottom[], k_elem will be null.
+ if (k_elem != NULL)
+ k_ary = ciObjArrayKlass::make(k_elem);
+ } else if ((elem()->base() == Type::Top) ||
+ (elem()->base() == Type::Bottom)) {
+ // element type of Bottom occurs from meet of basic type
+ // and object; Top occurs when doing join on Bottom.
+ // Leave k_ary at NULL.
+ } else {
+ // Cannot compute array klass directly from basic type,
+ // since subtypes of TypeInt all have basic type T_INT.
+ assert(!elem()->isa_int(),
+ "integral arrays must be pre-equipped with a class");
+ // Compute array klass directly from basic type
+ k_ary = ciTypeArrayKlass::make(elem()->basic_type());
+ }
+
+ if( this != TypeAryPtr::OOPS )
+ // The _klass field acts as a cache of the underlying
+ // ciKlass for this array type. In order to set the field,
+ // we need to cast away const-ness.
+ //
+ // IMPORTANT NOTE: we *never* set the _klass field for the
+ // type TypeAryPtr::OOPS. This Type is shared between all
+ // active compilations. However, the ciKlass which represents
+ // this Type is *not* shared between compilations, so caching
+ // this value would result in fetching a dangling pointer.
+ //
+ // Recomputing the underlying ciKlass for each request is
+ // a bit less efficient than caching, but calls to
+ // TypeAryPtr::OOPS->klass() are not common enough to matter.
+ ((TypeAryPtr*)this)->_klass = k_ary;
+ return k_ary;
+}
+
+
+//------------------------------add_offset-------------------------------------
+// Access internals of klass object
+const TypePtr *TypeKlassPtr::add_offset( int offset ) const {
+ return make( _ptr, klass(), xadd_offset(offset) );
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeKlassPtr::cast_to_ptr_type(PTR ptr) const {
+ assert(_base == OopPtr, "subclass must override cast_to_ptr_type");
+ if( ptr == _ptr ) return this;
+ return make(ptr, _klass, _offset);
+}
+
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeKlassPtr::cast_to_exactness(bool klass_is_exact) const {
+ if( klass_is_exact == _klass_is_exact ) return this;
+ if (!UseExactTypes) return this;
+ return make(klass_is_exact ? Constant : NotNull, _klass, _offset);
+}
+
+
+//-----------------------------as_instance_type--------------------------------
+// Corresponding type for an instance of the given class.
+// It will be NotNull, and exact if and only if the klass type is exact.
+const TypeOopPtr* TypeKlassPtr::as_instance_type() const {
+ ciKlass* k = klass();
+ bool xk = klass_is_exact();
+ //return TypeInstPtr::make(TypePtr::NotNull, k, xk, NULL, 0);
+ const TypeOopPtr* toop = TypeOopPtr::make_from_klass_raw(k);
+ toop = toop->cast_to_ptr_type(TypePtr::NotNull)->is_oopptr();
+ return toop->cast_to_exactness(xk)->is_oopptr();
+}
+
+
+//------------------------------xmeet------------------------------------------
+// Compute the MEET of two types, return a new Type object.
+const Type *TypeKlassPtr::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is Pointer
+ switch (t->base()) { // switch on original type
+
+ case Int: // Mixing ints & oops happens when javac
+ case Long: // reuses local variables
+ case FloatTop:
+ case FloatCon:
+ case FloatBot:
+ case DoubleTop:
+ case DoubleCon:
+ case DoubleBot:
+ case Bottom: // Ye Olde Default
+ return Type::BOTTOM;
+ case Top:
+ return this;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case RawPtr: return TypePtr::BOTTOM;
+
+ case OopPtr: { // Meeting to OopPtrs
+ // Found a OopPtr type vs self-KlassPtr type
+ const TypePtr *tp = t->is_oopptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ switch (tp->ptr()) {
+ case TopPTR:
+ case AnyNull:
+ return make(ptr, klass(), offset);
+ case BotPTR:
+ case NotNull:
+ return TypePtr::make(AnyPtr, ptr, offset);
+ default: typerr(t);
+ }
+ }
+
+ case AnyPtr: { // Meeting to AnyPtrs
+ // Found an AnyPtr type vs self-KlassPtr type
+ const TypePtr *tp = t->is_ptr();
+ int offset = meet_offset(tp->offset());
+ PTR ptr = meet_ptr(tp->ptr());
+ switch (tp->ptr()) {
+ case TopPTR:
+ return this;
+ case Null:
+ if( ptr == Null ) return TypePtr::make( AnyPtr, ptr, offset );
+ case AnyNull:
+ return make( ptr, klass(), offset );
+ case BotPTR:
+ case NotNull:
+ return TypePtr::make(AnyPtr, ptr, offset);
+ default: typerr(t);
+ }
+ }
+
+ case AryPtr: // Meet with AryPtr
+ case InstPtr: // Meet with InstPtr
+ return TypeInstPtr::BOTTOM;
+
+ //
+ // A-top }
+ // / | \ } Tops
+ // B-top A-any C-top }
+ // | / | \ | } Any-nulls
+ // B-any | C-any }
+ // | | |
+ // B-con A-con C-con } constants; not comparable across classes
+ // | | |
+ // B-not | C-not }
+ // | \ | / | } not-nulls
+ // B-bot A-not C-bot }
+ // \ | / } Bottoms
+ // A-bot }
+ //
+
+ case KlassPtr: { // Meet two KlassPtr types
+ const TypeKlassPtr *tkls = t->is_klassptr();
+ int off = meet_offset(tkls->offset());
+ PTR ptr = meet_ptr(tkls->ptr());
+
+ // Check for easy case; klasses are equal (and perhaps not loaded!)
+ // If we have constants, then we created oops so classes are loaded
+ // and we can handle the constants further down. This case handles
+ // not-loaded classes
+ if( ptr != Constant && tkls->klass()->equals(klass()) ) {
+ return make( ptr, klass(), off );
+ }
+
+ // Classes require inspection in the Java klass hierarchy. Must be loaded.
+ ciKlass* tkls_klass = tkls->klass();
+ ciKlass* this_klass = this->klass();
+ assert( tkls_klass->is_loaded(), "This class should have been loaded.");
+ assert( this_klass->is_loaded(), "This class should have been loaded.");
+
+ // If 'this' type is above the centerline and is a superclass of the
+ // other, we can treat 'this' as having the same type as the other.
+ if ((above_centerline(this->ptr())) &&
+ tkls_klass->is_subtype_of(this_klass)) {
+ this_klass = tkls_klass;
+ }
+ // If 'tinst' type is above the centerline and is a superclass of the
+ // other, we can treat 'tinst' as having the same type as the other.
+ if ((above_centerline(tkls->ptr())) &&
+ this_klass->is_subtype_of(tkls_klass)) {
+ tkls_klass = this_klass;
+ }
+
+ // Check for classes now being equal
+ if (tkls_klass->equals(this_klass)) {
+ // If the klasses are equal, the constants may still differ. Fall to
+ // NotNull if they do (neither constant is NULL; that is a special case
+ // handled elsewhere).
+ ciObject* o = NULL; // Assume not constant when done
+ ciObject* this_oop = const_oop();
+ ciObject* tkls_oop = tkls->const_oop();
+ if( ptr == Constant ) {
+ if (this_oop != NULL && tkls_oop != NULL &&
+ this_oop->equals(tkls_oop) )
+ o = this_oop;
+ else if (above_centerline(this->ptr()))
+ o = tkls_oop;
+ else if (above_centerline(tkls->ptr()))
+ o = this_oop;
+ else
+ ptr = NotNull;
+ }
+ return make( ptr, this_klass, off );
+ } // Else classes are not equal
+
+ // Since klasses are different, we require the LCA in the Java
+ // class hierarchy - which means we have to fall to at least NotNull.
+ if( ptr == TopPTR || ptr == AnyNull || ptr == Constant )
+ ptr = NotNull;
+ // Now we find the LCA of Java classes
+ ciKlass* k = this_klass->least_common_ancestor(tkls_klass);
+ return make( ptr, k, off );
+ } // End of case KlassPtr
+
+ } // End of switch
+ return this; // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeKlassPtr::xdual() const {
+ return new TypeKlassPtr( dual_ptr(), klass(), dual_offset() );
+}
+
+//------------------------------dump2------------------------------------------
+// Dump Klass Type
+#ifndef PRODUCT
+void TypeKlassPtr::dump2( Dict & d, uint depth, outputStream *st ) const {
+ switch( _ptr ) {
+ case Constant:
+ st->print("precise ");
+ case NotNull:
+ {
+ const char *name = klass()->name()->as_utf8();
+ if( name ) {
+ st->print("klass %s: " INTPTR_FORMAT, name, klass());
+ } else {
+ ShouldNotReachHere();
+ }
+ }
+ case BotPTR:
+ if( !WizardMode && !Verbose && !_klass_is_exact ) break;
+ case TopPTR:
+ case AnyNull:
+ st->print(":%s", ptr_msg[_ptr]);
+ if( _klass_is_exact ) st->print(":exact");
+ break;
+ }
+
+ if( _offset ) { // Dump offset, if any
+ if( _offset == OffsetBot ) { st->print("+any"); }
+ else if( _offset == OffsetTop ) { st->print("+unknown"); }
+ else { st->print("+%d", _offset); }
+ }
+
+ st->print(" *");
+}
+#endif
+
+
+
+//=============================================================================
+// Convenience common pre-built types.
+
+//------------------------------make-------------------------------------------
+const TypeFunc *TypeFunc::make( const TypeTuple *domain, const TypeTuple *range ) {
+ return (TypeFunc*)(new TypeFunc(domain,range))->hashcons();
+}
+
+//------------------------------make-------------------------------------------
+const TypeFunc *TypeFunc::make(ciMethod* method) {
+ Compile* C = Compile::current();
+ const TypeFunc* tf = C->last_tf(method); // check cache
+ if (tf != NULL) return tf; // The hit rate here is almost 50%.
+ const TypeTuple *domain;
+ if (method->flags().is_static()) {
+ domain = TypeTuple::make_domain(NULL, method->signature());
+ } else {
+ domain = TypeTuple::make_domain(method->holder(), method->signature());
+ }
+ const TypeTuple *range = TypeTuple::make_range(method->signature());
+ tf = TypeFunc::make(domain, range);
+ C->set_last_tf(method, tf); // fill cache
+ return tf;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types. It returns a new Type object.
+const Type *TypeFunc::xmeet( const Type *t ) const {
+ // Perform a fast test for common case; meeting the same types together.
+ if( this == t ) return this; // Meeting same type-rep?
+
+ // Current "this->_base" is Func
+ switch (t->base()) { // switch on original type
+
+ case Bottom: // Ye Olde Default
+ return t;
+
+ default: // All else is a mistake
+ typerr(t);
+
+ case Top:
+ break;
+ }
+ return this; // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeFunc::xdual() const {
+ return this;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeFunc::eq( const Type *t ) const {
+ const TypeFunc *a = (const TypeFunc*)t;
+ return _domain == a->_domain &&
+ _range == a->_range;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeFunc::hash(void) const {
+ return (intptr_t)_domain + (intptr_t)_range;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump Function Type
+#ifndef PRODUCT
+void TypeFunc::dump2( Dict &d, uint depth, outputStream *st ) const {
+ if( _range->_cnt <= Parms )
+ st->print("void");
+ else {
+ uint i;
+ for (i = Parms; i < _range->_cnt-1; i++) {
+ _range->field_at(i)->dump2(d,depth,st);
+ st->print("/");
+ }
+ _range->field_at(i)->dump2(d,depth,st);
+ }
+ st->print(" ");
+ st->print("( ");
+ if( !depth || d[this] ) { // Check for recursive dump
+ st->print("...)");
+ return;
+ }
+ d.Insert((void*)this,(void*)this); // Stop recursion
+ if (Parms < _domain->_cnt)
+ _domain->field_at(Parms)->dump2(d,depth-1,st);
+ for (uint i = Parms+1; i < _domain->_cnt; i++) {
+ st->print(", ");
+ _domain->field_at(i)->dump2(d,depth-1,st);
+ }
+ st->print(" )");
+}
+
+//------------------------------print_flattened--------------------------------
+// Print a 'flattened' signature
+static const char * const flat_type_msg[Type::lastype] = {
+ "bad","control","top","int","long","_",
+ "tuple:", "array:",
+ "ptr", "rawptr", "ptr", "ptr", "ptr", "ptr",
+ "func", "abIO", "return_address", "mem",
+ "float_top", "ftcon:", "flt",
+ "double_top", "dblcon:", "dbl",
+ "bottom"
+};
+
+void TypeFunc::print_flattened() const {
+ if( _range->_cnt <= Parms )
+ tty->print("void");
+ else {
+ uint i;
+ for (i = Parms; i < _range->_cnt-1; i++)
+ tty->print("%s/",flat_type_msg[_range->field_at(i)->base()]);
+ tty->print("%s",flat_type_msg[_range->field_at(i)->base()]);
+ }
+ tty->print(" ( ");
+ if (Parms < _domain->_cnt)
+ tty->print("%s",flat_type_msg[_domain->field_at(Parms)->base()]);
+ for (uint i = Parms+1; i < _domain->_cnt; i++)
+ tty->print(", %s",flat_type_msg[_domain->field_at(i)->base()]);
+ tty->print(" )");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise. Singletons are simple
+// constants (Ldi nodes). Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeFunc::singleton(void) const {
+ return false; // Never a singleton
+}
+
+bool TypeFunc::empty(void) const {
+ return false; // Never empty
+}
+
+
+BasicType TypeFunc::return_type() const{
+ if (range()->cnt() == TypeFunc::Parms) {
+ return T_VOID;
+ }
+ return range()->field_at(TypeFunc::Parms)->basic_type();
+}
diff --git a/src/share/vm/opto/type.hpp b/src/share/vm/opto/type.hpp
new file mode 100644
index 000000000..cca1e6404
--- /dev/null
+++ b/src/share/vm/opto/type.hpp
@@ -0,0 +1,1124 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+
+// This class defines a Type lattice. The lattice is used in the constant
+// propagation algorithms, and for some type-checking of the iloc code.
+// Basic types include RSD's (lower bound, upper bound, stride for integers),
+// float & double precision constants, sets of data-labels and code-labels.
+// The complete lattice is described below. Subtypes have no relationship to
+// up or down in the lattice; that is entirely determined by the behavior of
+// the MEET/JOIN functions.
+
+class Dict;
+class Type;
+class TypeD;
+class TypeF;
+class TypeInt;
+class TypeLong;
+class TypeAry;
+class TypeTuple;
+class TypePtr;
+class TypeRawPtr;
+class TypeOopPtr;
+class TypeInstPtr;
+class TypeAryPtr;
+class TypeKlassPtr;
+
+//------------------------------Type-------------------------------------------
+// Basic Type object, represents a set of primitive Values.
+// Types are hash-cons'd into a private class dictionary, so only one of each
+// different kind of Type exists. Types are never modified after creation, so
+// all their interesting fields are constant.
+class Type {
+public:
+ enum TYPES {
+ Bad=0, // Type check
+ Control, // Control of code (not in lattice)
+ Top, // Top of the lattice
+ Int, // Integer range (lo-hi)
+ Long, // Long integer range (lo-hi)
+ Half, // Placeholder half of doubleword
+
+ Tuple, // Method signature or object layout
+ Array, // Array types
+
+ AnyPtr, // Any old raw, klass, inst, or array pointer
+ RawPtr, // Raw (non-oop) pointers
+ OopPtr, // Any and all Java heap entities
+ InstPtr, // Instance pointers (non-array objects)
+ AryPtr, // Array pointers
+ KlassPtr, // Klass pointers
+ // (Ptr order matters: See is_ptr, isa_ptr, is_oopptr, isa_oopptr.)
+
+ Function, // Function signature
+ Abio, // Abstract I/O
+ Return_Address, // Subroutine return address
+ Memory, // Abstract store
+ FloatTop, // No float value
+ FloatCon, // Floating point constant
+ FloatBot, // Any float value
+ DoubleTop, // No double value
+ DoubleCon, // Double precision constant
+ DoubleBot, // Any double value
+ Bottom, // Bottom of lattice
+ lastype // Bogus ending type (not in lattice)
+ };
+
+ // Signal values for offsets from a base pointer
+ enum OFFSET_SIGNALS {
+ OffsetTop = -2000000000, // undefined offset
+ OffsetBot = -2000000001 // any possible offset
+ };
+
+ // Min and max WIDEN values.
+ enum WIDEN {
+ WidenMin = 0,
+ WidenMax = 3
+ };
+
+private:
+ // Dictionary of types shared among compilations.
+ static Dict* _shared_type_dict;
+
+ static int uhash( const Type *const t );
+ // Structural equality check. Assumes that cmp() has already compared
+ // the _base types and thus knows it can cast 't' appropriately.
+ virtual bool eq( const Type *t ) const;
+
+ // Top-level hash-table of types
+ static Dict *type_dict() {
+ return Compile::current()->type_dict();
+ }
+
+ // DUAL operation: reflect around lattice centerline. Used instead of
+ // join to ensure my lattice is symmetric up and down. Dual is computed
+ // lazily, on demand, and cached in _dual.
+ const Type *_dual; // Cached dual value
+ // Table for efficient dualing of base types
+ static const TYPES dual_type[lastype];
+
+protected:
+ // Each class of type is also identified by its base.
+ const TYPES _base; // Enum of Types type
+
+ Type( TYPES t ) : _dual(NULL), _base(t) {} // Simple types
+ // ~Type(); // Use fast deallocation
+ const Type *hashcons(); // Hash-cons the type
+
+public:
+
+ inline void* operator new( size_t x ) {
+ Compile* compile = Compile::current();
+ compile->set_type_last_size(x);
+ void *temp = compile->type_arena()->Amalloc_D(x);
+ compile->set_type_hwm(temp);
+ return temp;
+ }
+ inline void operator delete( void* ptr ) {
+ Compile* compile = Compile::current();
+ compile->type_arena()->Afree(ptr,compile->type_last_size());
+ }
+
+ // Initialize the type system for a particular compilation.
+ static void Initialize(Compile* compile);
+
+ // Initialize the types shared by all compilations.
+ static void Initialize_shared(Compile* compile);
+
+ TYPES base() const {
+ assert(_base > Bad && _base < lastype, "sanity");
+ return _base;
+ }
+
+ // Create a new hash-consd type
+ static const Type *make(enum TYPES);
+ // Test for equivalence of types
+ static int cmp( const Type *const t1, const Type *const t2 );
+ // Test for higher or equal in lattice
+ int higher_equal( const Type *t ) const { return !cmp(meet(t),t); }
+
+ // MEET operation; lower in lattice.
+ const Type *meet( const Type *t ) const;
+ // WIDEN: 'widens' for Ints and other range types
+ virtual const Type *widen( const Type *old ) const { return this; }
+ // NARROW: complement for widen, used by pessimistic phases
+ virtual const Type *narrow( const Type *old ) const { return this; }
+
+ // DUAL operation: reflect around lattice centerline. Used instead of
+ // join to ensure my lattice is symmetric up and down.
+ const Type *dual() const { return _dual; }
+
+ // Compute meet dependent on base type
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ // JOIN operation; higher in lattice. Done by finding the dual of the
+ // meet of the dual of the 2 inputs.
+ const Type *join( const Type *t ) const {
+ return dual()->meet(t->dual())->dual(); }
+
+ // Modified version of JOIN adapted to the needs Node::Value.
+ // Normalizes all empty values to TOP. Does not kill _widen bits.
+ // Currently, it also works around limitations involving interface types.
+ virtual const Type *filter( const Type *kills ) const;
+
+ // Convenience access
+ float getf() const;
+ double getd() const;
+
+ const TypeInt *is_int() const;
+ const TypeInt *isa_int() const; // Returns NULL if not an Int
+ const TypeLong *is_long() const;
+ const TypeLong *isa_long() const; // Returns NULL if not a Long
+ const TypeD *is_double_constant() const; // Asserts it is a DoubleCon
+ const TypeD *isa_double_constant() const; // Returns NULL if not a DoubleCon
+ const TypeF *is_float_constant() const; // Asserts it is a FloatCon
+ const TypeF *isa_float_constant() const; // Returns NULL if not a FloatCon
+ const TypeTuple *is_tuple() const; // Collection of fields, NOT a pointer
+ const TypeAry *is_ary() const; // Array, NOT array pointer
+ const TypePtr *is_ptr() const; // Asserts it is a ptr type
+ const TypePtr *isa_ptr() const; // Returns NULL if not ptr type
+ const TypeRawPtr *is_rawptr() const; // NOT Java oop
+ const TypeOopPtr *isa_oopptr() const; // Returns NULL if not ptr type
+ const TypeKlassPtr *isa_klassptr() const; // Returns NULL if not KlassPtr
+ const TypeKlassPtr *is_klassptr() const; // assert if not KlassPtr
+ const TypeOopPtr *is_oopptr() const; // Java-style GC'd pointer
+ const TypeInstPtr *isa_instptr() const; // Returns NULL if not InstPtr
+ const TypeInstPtr *is_instptr() const; // Instance
+ const TypeAryPtr *isa_aryptr() const; // Returns NULL if not AryPtr
+ const TypeAryPtr *is_aryptr() const; // Array oop
+ virtual bool is_finite() const; // Has a finite value
+ virtual bool is_nan() const; // Is not a number (NaN)
+
+ // Special test for register pressure heuristic
+ bool is_floatingpoint() const; // True if Float or Double base type
+
+ // Do you have memory, directly or through a tuple?
+ bool has_memory( ) const;
+
+ // Are you a pointer type or not?
+ bool isa_oop_ptr() const;
+
+ // TRUE if type is a singleton
+ virtual bool singleton(void) const;
+
+ // TRUE if type is above the lattice centerline, and is therefore vacuous
+ virtual bool empty(void) const;
+
+ // Return a hash for this type. The hash function is public so ConNode
+ // (constants) can hash on their constant, which is represented by a Type.
+ virtual int hash() const;
+
+ // Map ideal registers (machine types) to ideal types
+ static const Type *mreg2type[];
+
+ // Printing, statistics
+ static const char * const msg[lastype]; // Printable strings
+#ifndef PRODUCT
+ void dump_on(outputStream *st) const;
+ void dump() const {
+ dump_on(tty);
+ }
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+ static void dump_stats();
+ static void verify_lastype(); // Check that arrays match type enum
+#endif
+ void typerr(const Type *t) const; // Mixing types error
+
+ // Create basic type
+ static const Type* get_const_basic_type(BasicType type) {
+ assert((uint)type <= T_CONFLICT && _const_basic_type[type] != NULL, "bad type");
+ return _const_basic_type[type];
+ }
+
+ // Mapping to the array element's basic type.
+ BasicType array_element_basic_type() const;
+
+ // Create standard type for a ciType:
+ static const Type* get_const_type(ciType* type);
+
+ // Create standard zero value:
+ static const Type* get_zero_type(BasicType type) {
+ assert((uint)type <= T_CONFLICT && _zero_type[type] != NULL, "bad type");
+ return _zero_type[type];
+ }
+
+ // Report if this is a zero value (not top).
+ bool is_zero_type() const {
+ BasicType type = basic_type();
+ if (type == T_VOID || type >= T_CONFLICT)
+ return false;
+ else
+ return (this == _zero_type[type]);
+ }
+
+ // Convenience common pre-built types.
+ static const Type *ABIO;
+ static const Type *BOTTOM;
+ static const Type *CONTROL;
+ static const Type *DOUBLE;
+ static const Type *FLOAT;
+ static const Type *HALF;
+ static const Type *MEMORY;
+ static const Type *MULTI;
+ static const Type *RETURN_ADDRESS;
+ static const Type *TOP;
+
+ // Mapping from compiler type to VM BasicType
+ BasicType basic_type() const { return _basic_type[_base]; }
+
+ // Mapping from CI type system to compiler type:
+ static const Type* get_typeflow_type(ciType* type);
+
+private:
+ // support arrays
+ static const BasicType _basic_type[];
+ static const Type* _zero_type[T_CONFLICT+1];
+ static const Type* _const_basic_type[T_CONFLICT+1];
+};
+
+//------------------------------TypeF------------------------------------------
+// Class of Float-Constant Types.
+class TypeF : public Type {
+ TypeF( float f ) : Type(FloatCon), _f(f) {};
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+public:
+ const float _f; // Float constant
+
+ static const TypeF *make(float f);
+
+ virtual bool is_finite() const; // Has a finite value
+ virtual bool is_nan() const; // Is not a number (NaN)
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+ // Convenience common pre-built types.
+ static const TypeF *ZERO; // positive zero only
+ static const TypeF *ONE;
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeD------------------------------------------
+// Class of Double-Constant Types.
+class TypeD : public Type {
+ TypeD( double d ) : Type(DoubleCon), _d(d) {};
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+public:
+ const double _d; // Double constant
+
+ static const TypeD *make(double d);
+
+ virtual bool is_finite() const; // Has a finite value
+ virtual bool is_nan() const; // Is not a number (NaN)
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+ // Convenience common pre-built types.
+ static const TypeD *ZERO; // positive zero only
+ static const TypeD *ONE;
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeInt----------------------------------------
+// Class of integer ranges, the set of integers between a lower bound and an
+// upper bound, inclusive.
+class TypeInt : public Type {
+ TypeInt( jint lo, jint hi, int w );
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+public:
+ const jint _lo, _hi; // Lower bound, upper bound
+ const short _widen; // Limit on times we widen this sucker
+
+ static const TypeInt *make(jint lo);
+ // must always specify w
+ static const TypeInt *make(jint lo, jint hi, int w);
+
+ // Check for single integer
+ int is_con() const { return _lo==_hi; }
+ bool is_con(int i) const { return is_con() && _lo == i; }
+ jint get_con() const { assert( is_con(), "" ); return _lo; }
+
+ virtual bool is_finite() const; // Has a finite value
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+ virtual const Type *widen( const Type *t ) const;
+ virtual const Type *narrow( const Type *t ) const;
+ // Do not kill _widen bits.
+ virtual const Type *filter( const Type *kills ) const;
+ // Convenience common pre-built types.
+ static const TypeInt *MINUS_1;
+ static const TypeInt *ZERO;
+ static const TypeInt *ONE;
+ static const TypeInt *BOOL;
+ static const TypeInt *CC;
+ static const TypeInt *CC_LT; // [-1] == MINUS_1
+ static const TypeInt *CC_GT; // [1] == ONE
+ static const TypeInt *CC_EQ; // [0] == ZERO
+ static const TypeInt *CC_LE; // [-1,0]
+ static const TypeInt *CC_GE; // [0,1] == BOOL (!)
+ static const TypeInt *BYTE;
+ static const TypeInt *CHAR;
+ static const TypeInt *SHORT;
+ static const TypeInt *POS;
+ static const TypeInt *POS1;
+ static const TypeInt *INT;
+ static const TypeInt *SYMINT; // symmetric range [-max_jint..max_jint]
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+
+//------------------------------TypeLong---------------------------------------
+// Class of long integer ranges, the set of integers between a lower bound and
+// an upper bound, inclusive.
+class TypeLong : public Type {
+ TypeLong( jlong lo, jlong hi, int w );
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+public:
+ const jlong _lo, _hi; // Lower bound, upper bound
+ const short _widen; // Limit on times we widen this sucker
+
+ static const TypeLong *make(jlong lo);
+ // must always specify w
+ static const TypeLong *make(jlong lo, jlong hi, int w);
+
+ // Check for single integer
+ int is_con() const { return _lo==_hi; }
+ jlong get_con() const { assert( is_con(), "" ); return _lo; }
+
+ virtual bool is_finite() const; // Has a finite value
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+ virtual const Type *widen( const Type *t ) const;
+ virtual const Type *narrow( const Type *t ) const;
+ // Do not kill _widen bits.
+ virtual const Type *filter( const Type *kills ) const;
+ // Convenience common pre-built types.
+ static const TypeLong *MINUS_1;
+ static const TypeLong *ZERO;
+ static const TypeLong *ONE;
+ static const TypeLong *POS;
+ static const TypeLong *LONG;
+ static const TypeLong *INT; // 32-bit subrange [min_jint..max_jint]
+ static const TypeLong *UINT; // 32-bit unsigned [0..max_juint]
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint, outputStream *st ) const;// Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeTuple--------------------------------------
+// Class of Tuple Types, essentially type collections for function signatures
+// and class layouts. It happens to also be a fast cache for the HotSpot
+// signature types.
+class TypeTuple : public Type {
+ TypeTuple( uint cnt, const Type **fields ) : Type(Tuple), _cnt(cnt), _fields(fields) { }
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+
+public:
+ const uint _cnt; // Count of fields
+ const Type ** const _fields; // Array of field types
+
+ // Accessors:
+ uint cnt() const { return _cnt; }
+ const Type* field_at(uint i) const {
+ assert(i < _cnt, "oob");
+ return _fields[i];
+ }
+ void set_field_at(uint i, const Type* t) {
+ assert(i < _cnt, "oob");
+ _fields[i] = t;
+ }
+
+ static const TypeTuple *make( uint cnt, const Type **fields );
+ static const TypeTuple *make_range(ciSignature *sig);
+ static const TypeTuple *make_domain(ciInstanceKlass* recv, ciSignature *sig);
+
+ // Subroutine call type with space allocated for argument types
+ static const Type **fields( uint arg_cnt );
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+ // Convenience common pre-built types.
+ static const TypeTuple *IFBOTH;
+ static const TypeTuple *IFFALSE;
+ static const TypeTuple *IFTRUE;
+ static const TypeTuple *IFNEITHER;
+ static const TypeTuple *LOOPBODY;
+ static const TypeTuple *MEMBAR;
+ static const TypeTuple *STORECONDITIONAL;
+ static const TypeTuple *START_I2C;
+ static const TypeTuple *INT_PAIR;
+ static const TypeTuple *LONG_PAIR;
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeAry----------------------------------------
+// Class of Array Types
+class TypeAry : public Type {
+ TypeAry( const Type *elem, const TypeInt *size) : Type(Array),
+ _elem(elem), _size(size) {}
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+
+private:
+ const Type *_elem; // Element type of array
+ const TypeInt *_size; // Elements in array
+ friend class TypeAryPtr;
+
+public:
+ static const TypeAry *make( const Type *elem, const TypeInt *size);
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+ bool ary_must_be_exact() const; // true if arrays of such are never generic
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypePtr----------------------------------------
+// Class of machine Pointer Types: raw data, instances or arrays.
+// If the _base enum is AnyPtr, then this refers to all of the above.
+// Otherwise the _base will indicate which subset of pointers is affected,
+// and the class will be inherited from.
+class TypePtr : public Type {
+public:
+ enum PTR { TopPTR, AnyNull, Constant, Null, NotNull, BotPTR, lastPTR };
+protected:
+ TypePtr( TYPES t, PTR ptr, int offset ) : Type(t), _ptr(ptr), _offset(offset) {}
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ static const PTR ptr_meet[lastPTR][lastPTR];
+ static const PTR ptr_dual[lastPTR];
+ static const char * const ptr_msg[lastPTR];
+
+public:
+ const int _offset; // Offset into oop, with TOP & BOT
+ const PTR _ptr; // Pointer equivalence class
+
+ const int offset() const { return _offset; }
+ const PTR ptr() const { return _ptr; }
+
+ static const TypePtr *make( TYPES t, PTR ptr, int offset );
+
+ // Return a 'ptr' version of this type
+ virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+ virtual intptr_t get_con() const;
+
+ virtual const TypePtr *add_offset( int offset ) const;
+
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+ virtual const Type *xmeet( const Type *t ) const;
+ int meet_offset( int offset ) const;
+ int dual_offset( ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ // meet, dual and join over pointer equivalence sets
+ PTR meet_ptr( const PTR in_ptr ) const { return ptr_meet[in_ptr][ptr()]; }
+ PTR dual_ptr() const { return ptr_dual[ptr()]; }
+
+ // This is textually confusing unless one recalls that
+ // join(t) == dual()->meet(t->dual())->dual().
+ PTR join_ptr( const PTR in_ptr ) const {
+ return ptr_dual[ ptr_meet[ ptr_dual[in_ptr] ] [ dual_ptr() ] ];
+ }
+
+ // Tests for relation to centerline of type lattice:
+ static bool above_centerline(PTR ptr) { return (ptr <= AnyNull); }
+ static bool below_centerline(PTR ptr) { return (ptr >= NotNull); }
+ // Convenience common pre-built types.
+ static const TypePtr *NULL_PTR;
+ static const TypePtr *NOTNULL;
+ static const TypePtr *BOTTOM;
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeRawPtr-------------------------------------
+// Class of raw pointers, pointers to things other than Oops. Examples
+// include the stack pointer, top of heap, card-marking area, handles, etc.
+class TypeRawPtr : public TypePtr {
+protected:
+ TypeRawPtr( PTR ptr, address bits ) : TypePtr(RawPtr,ptr,0), _bits(bits){}
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+
+ const address _bits; // Constant value, if applicable
+
+ static const TypeRawPtr *make( PTR ptr );
+ static const TypeRawPtr *make( address bits );
+
+ // Return a 'ptr' version of this type
+ virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+ virtual intptr_t get_con() const;
+
+ virtual const TypePtr *add_offset( int offset ) const;
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+ // Convenience common pre-built types.
+ static const TypeRawPtr *BOTTOM;
+ static const TypeRawPtr *NOTNULL;
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeOopPtr-------------------------------------
+// Some kind of oop (Java pointer), either klass or instance or array.
+class TypeOopPtr : public TypePtr {
+protected:
+ TypeOopPtr( TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id ) : TypePtr(t, ptr, offset), _const_oop(o), _klass(k), _klass_is_exact(xk), _instance_id(instance_id) { }
+public:
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ enum {
+ UNKNOWN_INSTANCE = 0
+ };
+protected:
+
+ int xadd_offset( int offset ) const;
+ // Oop is NULL, unless this is a constant oop.
+ ciObject* _const_oop; // Constant oop
+ // If _klass is NULL, then so is _sig. This is an unloaded klass.
+ ciKlass* _klass; // Klass object
+ // Does the type exclude subclasses of the klass? (Inexact == polymorphic.)
+ bool _klass_is_exact;
+
+ int _instance_id; // if not UNKNOWN_INSTANCE, indicates that this is a particular instance
+ // of this type which is distinct. This is the the node index of the
+ // node creating this instance
+
+ static const TypeOopPtr* make_from_klass_common(ciKlass* klass, bool klass_change, bool try_for_exact);
+
+ int dual_instance() const { return -_instance_id; }
+ int meet_instance(int uid) const;
+
+public:
+ // Creates a type given a klass. Correctly handles multi-dimensional arrays
+ // Respects UseUniqueSubclasses.
+ // If the klass is final, the resulting type will be exact.
+ static const TypeOopPtr* make_from_klass(ciKlass* klass) {
+ return make_from_klass_common(klass, true, false);
+ }
+ // Same as before, but will produce an exact type, even if
+ // the klass is not final, as long as it has exactly one implementation.
+ static const TypeOopPtr* make_from_klass_unique(ciKlass* klass) {
+ return make_from_klass_common(klass, true, true);
+ }
+ // Same as before, but does not respects UseUniqueSubclasses.
+ // Use this only for creating array element types.
+ static const TypeOopPtr* make_from_klass_raw(ciKlass* klass) {
+ return make_from_klass_common(klass, false, false);
+ }
+ // Creates a singleton type given an object.
+ static const TypeOopPtr* make_from_constant(ciObject* o);
+
+ // Make a generic (unclassed) pointer to an oop.
+ static const TypeOopPtr* make(PTR ptr, int offset);
+
+ ciObject* const_oop() const { return _const_oop; }
+ virtual ciKlass* klass() const { return _klass; }
+ bool klass_is_exact() const { return _klass_is_exact; }
+ bool is_instance() const { return _instance_id != UNKNOWN_INSTANCE; }
+ uint instance_id() const { return _instance_id; }
+
+ virtual intptr_t get_con() const;
+
+ virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+ virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+ virtual const TypeOopPtr *cast_to_instance(int instance_id) const;
+
+ // corresponding pointer to klass, for a given instance
+ const TypeKlassPtr* as_klass_type() const;
+
+ virtual const TypePtr *add_offset( int offset ) const;
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ // Do not allow interface-vs.-noninterface joins to collapse to top.
+ virtual const Type *filter( const Type *kills ) const;
+
+ // Convenience common pre-built type.
+ static const TypeOopPtr *BOTTOM;
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeInstPtr------------------------------------
+// Class of Java object pointers, pointing either to non-array Java instances
+// or to a klassOop (including array klasses).
+class TypeInstPtr : public TypeOopPtr {
+ TypeInstPtr( PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id );
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+
+ ciSymbol* _name; // class name
+
+ public:
+ ciSymbol* name() const { return _name; }
+
+ bool is_loaded() const { return _klass->is_loaded(); }
+
+ // Make a pointer to a constant oop.
+ static const TypeInstPtr *make(ciObject* o) {
+ return make(TypePtr::Constant, o->klass(), true, o, 0);
+ }
+
+ // Make a pointer to a constant oop with offset.
+ static const TypeInstPtr *make(ciObject* o, int offset) {
+ return make(TypePtr::Constant, o->klass(), true, o, offset);
+ }
+
+ // Make a pointer to some value of type klass.
+ static const TypeInstPtr *make(PTR ptr, ciKlass* klass) {
+ return make(ptr, klass, false, NULL, 0);
+ }
+
+ // Make a pointer to some non-polymorphic value of exactly type klass.
+ static const TypeInstPtr *make_exact(PTR ptr, ciKlass* klass) {
+ return make(ptr, klass, true, NULL, 0);
+ }
+
+ // Make a pointer to some value of type klass with offset.
+ static const TypeInstPtr *make(PTR ptr, ciKlass* klass, int offset) {
+ return make(ptr, klass, false, NULL, offset);
+ }
+
+ // Make a pointer to an oop.
+ static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = 0 );
+
+ // If this is a java.lang.Class constant, return the type for it or NULL.
+ // Pass to Type::get_const_type to turn it to a type, which will usually
+ // be a TypeInstPtr, but may also be a TypeInt::INT for int.class, etc.
+ ciType* java_mirror_type() const;
+
+ virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+ virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+ virtual const TypeOopPtr *cast_to_instance(int instance_id) const;
+
+ virtual const TypePtr *add_offset( int offset ) const;
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const TypeInstPtr *xmeet_unloaded( const TypeInstPtr *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ // Convenience common pre-built types.
+ static const TypeInstPtr *NOTNULL;
+ static const TypeInstPtr *BOTTOM;
+ static const TypeInstPtr *MIRROR;
+ static const TypeInstPtr *MARK;
+ static const TypeInstPtr *KLASS;
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeAryPtr-------------------------------------
+// Class of Java array pointers
+class TypeAryPtr : public TypeOopPtr {
+ TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id), _ary(ary) {};
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ const TypeAry *_ary; // Array we point into
+
+public:
+ // Accessors
+ ciKlass* klass() const;
+ const TypeAry* ary() const { return _ary; }
+ const Type* elem() const { return _ary->_elem; }
+ const TypeInt* size() const { return _ary->_size; }
+
+ static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = 0);
+ // Constant pointer to array
+ static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = 0);
+
+ // Convenience
+ static const TypeAryPtr *make(ciObject* o);
+
+ // Return a 'ptr' version of this type
+ virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+ virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+ virtual const TypeOopPtr *cast_to_instance(int instance_id) const;
+
+ virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const;
+
+ virtual bool empty(void) const; // TRUE if type is vacuous
+ virtual const TypePtr *add_offset( int offset ) const;
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ // Convenience common pre-built types.
+ static const TypeAryPtr *RANGE;
+ static const TypeAryPtr *OOPS;
+ static const TypeAryPtr *BYTES;
+ static const TypeAryPtr *SHORTS;
+ static const TypeAryPtr *CHARS;
+ static const TypeAryPtr *INTS;
+ static const TypeAryPtr *LONGS;
+ static const TypeAryPtr *FLOATS;
+ static const TypeAryPtr *DOUBLES;
+ // selects one of the above:
+ static const TypeAryPtr *get_array_body_type(BasicType elem) {
+ assert((uint)elem <= T_CONFLICT && _array_body_type[elem] != NULL, "bad elem type");
+ return _array_body_type[elem];
+ }
+ static const TypeAryPtr *_array_body_type[T_CONFLICT+1];
+ // sharpen the type of an int which is used as an array size
+ static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem);
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeKlassPtr-----------------------------------
+// Class of Java Klass pointers
+class TypeKlassPtr : public TypeOopPtr {
+ TypeKlassPtr( PTR ptr, ciKlass* klass, int offset );
+
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+
+public:
+ ciSymbol* name() const { return _klass->name(); }
+
+ // ptr to klass 'k'
+ static const TypeKlassPtr *make( ciKlass* k ) { return make( TypePtr::Constant, k, 0); }
+ // ptr to klass 'k' with offset
+ static const TypeKlassPtr *make( ciKlass* k, int offset ) { return make( TypePtr::Constant, k, offset); }
+ // ptr to klass 'k' or sub-klass
+ static const TypeKlassPtr *make( PTR ptr, ciKlass* k, int offset);
+
+ virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+ virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+ // corresponding pointer to instance, for a given class
+ const TypeOopPtr* as_instance_type() const;
+
+ virtual const TypePtr *add_offset( int offset ) const;
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ // Convenience common pre-built types.
+ static const TypeKlassPtr* OBJECT; // Not-null object klass or below
+ static const TypeKlassPtr* OBJECT_OR_NULL; // Maybe-null version of same
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeFunc---------------------------------------
+// Class of Array Types
+class TypeFunc : public Type {
+ TypeFunc( const TypeTuple *domain, const TypeTuple *range ) : Type(Function), _domain(domain), _range(range) {}
+ virtual bool eq( const Type *t ) const;
+ virtual int hash() const; // Type specific hashing
+ virtual bool singleton(void) const; // TRUE if type is a singleton
+ virtual bool empty(void) const; // TRUE if type is vacuous
+public:
+ // Constants are shared among ADLC and VM
+ enum { Control = AdlcVMDeps::Control,
+ I_O = AdlcVMDeps::I_O,
+ Memory = AdlcVMDeps::Memory,
+ FramePtr = AdlcVMDeps::FramePtr,
+ ReturnAdr = AdlcVMDeps::ReturnAdr,
+ Parms = AdlcVMDeps::Parms
+ };
+
+ const TypeTuple* const _domain; // Domain of inputs
+ const TypeTuple* const _range; // Range of results
+
+ // Accessors:
+ const TypeTuple* domain() const { return _domain; }
+ const TypeTuple* range() const { return _range; }
+
+ static const TypeFunc *make(ciMethod* method);
+ static const TypeFunc *make(ciSignature signature, const Type* extra);
+ static const TypeFunc *make(const TypeTuple* domain, const TypeTuple* range);
+
+ virtual const Type *xmeet( const Type *t ) const;
+ virtual const Type *xdual() const; // Compute dual right now.
+
+ BasicType return_type() const;
+
+#ifndef PRODUCT
+ virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+ void print_flattened() const; // Print a 'flattened' signature
+#endif
+ // Convenience common pre-built types.
+};
+
+//------------------------------accessors--------------------------------------
+inline float Type::getf() const {
+ assert( _base == FloatCon, "Not a FloatCon" );
+ return ((TypeF*)this)->_f;
+}
+
+inline double Type::getd() const {
+ assert( _base == DoubleCon, "Not a DoubleCon" );
+ return ((TypeD*)this)->_d;
+}
+
+inline const TypeF *Type::is_float_constant() const {
+ assert( _base == FloatCon, "Not a Float" );
+ return (TypeF*)this;
+}
+
+inline const TypeF *Type::isa_float_constant() const {
+ return ( _base == FloatCon ? (TypeF*)this : NULL);
+}
+
+inline const TypeD *Type::is_double_constant() const {
+ assert( _base == DoubleCon, "Not a Double" );
+ return (TypeD*)this;
+}
+
+inline const TypeD *Type::isa_double_constant() const {
+ return ( _base == DoubleCon ? (TypeD*)this : NULL);
+}
+
+inline const TypeInt *Type::is_int() const {
+ assert( _base == Int, "Not an Int" );
+ return (TypeInt*)this;
+}
+
+inline const TypeInt *Type::isa_int() const {
+ return ( _base == Int ? (TypeInt*)this : NULL);
+}
+
+inline const TypeLong *Type::is_long() const {
+ assert( _base == Long, "Not a Long" );
+ return (TypeLong*)this;
+}
+
+inline const TypeLong *Type::isa_long() const {
+ return ( _base == Long ? (TypeLong*)this : NULL);
+}
+
+inline const TypeTuple *Type::is_tuple() const {
+ assert( _base == Tuple, "Not a Tuple" );
+ return (TypeTuple*)this;
+}
+
+inline const TypeAry *Type::is_ary() const {
+ assert( _base == Array , "Not an Array" );
+ return (TypeAry*)this;
+}
+
+inline const TypePtr *Type::is_ptr() const {
+ // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
+ assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer");
+ return (TypePtr*)this;
+}
+
+inline const TypePtr *Type::isa_ptr() const {
+ // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
+ return (_base >= AnyPtr && _base <= KlassPtr) ? (TypePtr*)this : NULL;
+}
+
+inline const TypeOopPtr *Type::is_oopptr() const {
+ // OopPtr is the first and KlassPtr the last, with no non-oops between.
+ assert(_base >= OopPtr && _base <= KlassPtr, "Not a Java pointer" ) ;
+ return (TypeOopPtr*)this;
+}
+
+inline const TypeOopPtr *Type::isa_oopptr() const {
+ // OopPtr is the first and KlassPtr the last, with no non-oops between.
+ return (_base >= OopPtr && _base <= KlassPtr) ? (TypeOopPtr*)this : NULL;
+}
+
+inline const TypeRawPtr *Type::is_rawptr() const {
+ assert( _base == RawPtr, "Not a raw pointer" );
+ return (TypeRawPtr*)this;
+}
+
+inline const TypeInstPtr *Type::isa_instptr() const {
+ return (_base == InstPtr) ? (TypeInstPtr*)this : NULL;
+}
+
+inline const TypeInstPtr *Type::is_instptr() const {
+ assert( _base == InstPtr, "Not an object pointer" );
+ return (TypeInstPtr*)this;
+}
+
+inline const TypeAryPtr *Type::isa_aryptr() const {
+ return (_base == AryPtr) ? (TypeAryPtr*)this : NULL;
+}
+
+inline const TypeAryPtr *Type::is_aryptr() const {
+ assert( _base == AryPtr, "Not an array pointer" );
+ return (TypeAryPtr*)this;
+}
+
+inline const TypeKlassPtr *Type::isa_klassptr() const {
+ return (_base == KlassPtr) ? (TypeKlassPtr*)this : NULL;
+}
+
+inline const TypeKlassPtr *Type::is_klassptr() const {
+ assert( _base == KlassPtr, "Not a klass pointer" );
+ return (TypeKlassPtr*)this;
+}
+
+inline bool Type::is_floatingpoint() const {
+ if( (_base == FloatCon) || (_base == FloatBot) ||
+ (_base == DoubleCon) || (_base == DoubleBot) )
+ return true;
+ return false;
+}
+
+
+// ===============================================================
+// Things that need to be 64-bits in the 64-bit build but
+// 32-bits in the 32-bit build. Done this way to get full
+// optimization AND strong typing.
+#ifdef _LP64
+
+// For type queries and asserts
+#define is_intptr_t is_long
+#define isa_intptr_t isa_long
+#define find_intptr_t_type find_long_type
+#define find_intptr_t_con find_long_con
+#define TypeX TypeLong
+#define Type_X Type::Long
+#define TypeX_X TypeLong::LONG
+#define TypeX_ZERO TypeLong::ZERO
+// For 'ideal_reg' machine registers
+#define Op_RegX Op_RegL
+// For phase->intcon variants
+#define MakeConX longcon
+#define ConXNode ConLNode
+// For array index arithmetic
+#define MulXNode MulLNode
+#define AndXNode AndLNode
+#define OrXNode OrLNode
+#define CmpXNode CmpLNode
+#define SubXNode SubLNode
+#define LShiftXNode LShiftLNode
+// For object size computation:
+#define AddXNode AddLNode
+// For card marks and hashcodes
+#define URShiftXNode URShiftLNode
+// Opcodes
+#define Op_LShiftX Op_LShiftL
+#define Op_AndX Op_AndL
+#define Op_AddX Op_AddL
+#define Op_SubX Op_SubL
+// conversions
+#define ConvI2X(x) ConvI2L(x)
+#define ConvL2X(x) (x)
+#define ConvX2I(x) ConvL2I(x)
+#define ConvX2L(x) (x)
+
+#else
+
+// For type queries and asserts
+#define is_intptr_t is_int
+#define isa_intptr_t isa_int
+#define find_intptr_t_type find_int_type
+#define find_intptr_t_con find_int_con
+#define TypeX TypeInt
+#define Type_X Type::Int
+#define TypeX_X TypeInt::INT
+#define TypeX_ZERO TypeInt::ZERO
+// For 'ideal_reg' machine registers
+#define Op_RegX Op_RegI
+// For phase->intcon variants
+#define MakeConX intcon
+#define ConXNode ConINode
+// For array index arithmetic
+#define MulXNode MulINode
+#define AndXNode AndINode
+#define OrXNode OrINode
+#define CmpXNode CmpINode
+#define SubXNode SubINode
+#define LShiftXNode LShiftINode
+// For object size computation:
+#define AddXNode AddINode
+// For card marks and hashcodes
+#define URShiftXNode URShiftINode
+// Opcodes
+#define Op_LShiftX Op_LShiftI
+#define Op_AndX Op_AndI
+#define Op_AddX Op_AddI
+#define Op_SubX Op_SubI
+// conversions
+#define ConvI2X(x) (x)
+#define ConvL2X(x) ConvL2I(x)
+#define ConvX2I(x) (x)
+#define ConvX2L(x) ConvI2L(x)
+
+#endif
diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp
new file mode 100644
index 000000000..f13751605
--- /dev/null
+++ b/src/share/vm/opto/vectornode.cpp
@@ -0,0 +1,478 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vectornode.cpp.incl"
+
+//------------------------------VectorNode--------------------------------------
+
+// Return vector type for an element type and vector length.
+const Type* VectorNode::vect_type(BasicType elt_bt, uint len) {
+ assert(len <= VectorNode::max_vlen(elt_bt), "len in range");
+ switch(elt_bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ switch(len) {
+ case 2: return TypeInt::CHAR;
+ case 4: return TypeInt::INT;
+ case 8: return TypeLong::LONG;
+ }
+ break;
+ case T_CHAR:
+ case T_SHORT:
+ switch(len) {
+ case 2: return TypeInt::INT;
+ case 4: return TypeLong::LONG;
+ }
+ break;
+ case T_INT:
+ switch(len) {
+ case 2: return TypeLong::LONG;
+ }
+ break;
+ case T_LONG:
+ break;
+ case T_FLOAT:
+ switch(len) {
+ case 2: return Type::DOUBLE;
+ }
+ break;
+ case T_DOUBLE:
+ break;
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Scalar promotion
+VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
+ BasicType bt = opd_t->array_element_basic_type();
+ assert(vlen <= VectorNode::max_vlen(bt), "vlen in range");
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ if (vlen == 16) return new (C, 2) Replicate16BNode(s);
+ if (vlen == 8) return new (C, 2) Replicate8BNode(s);
+ if (vlen == 4) return new (C, 2) Replicate4BNode(s);
+ break;
+ case T_CHAR:
+ if (vlen == 8) return new (C, 2) Replicate8CNode(s);
+ if (vlen == 4) return new (C, 2) Replicate4CNode(s);
+ if (vlen == 2) return new (C, 2) Replicate2CNode(s);
+ break;
+ case T_SHORT:
+ if (vlen == 8) return new (C, 2) Replicate8SNode(s);
+ if (vlen == 4) return new (C, 2) Replicate4SNode(s);
+ if (vlen == 2) return new (C, 2) Replicate2SNode(s);
+ break;
+ case T_INT:
+ if (vlen == 4) return new (C, 2) Replicate4INode(s);
+ if (vlen == 2) return new (C, 2) Replicate2INode(s);
+ break;
+ case T_LONG:
+ if (vlen == 2) return new (C, 2) Replicate2LNode(s);
+ break;
+ case T_FLOAT:
+ if (vlen == 4) return new (C, 2) Replicate4FNode(s);
+ if (vlen == 2) return new (C, 2) Replicate2FNode(s);
+ break;
+ case T_DOUBLE:
+ if (vlen == 2) return new (C, 2) Replicate2DNode(s);
+ break;
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Return initial Pack node. Additional operands added with add_opd() calls.
+PackNode* PackNode::make(Compile* C, Node* s, const Type* opd_t) {
+ BasicType bt = opd_t->array_element_basic_type();
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, 2) PackBNode(s);
+ case T_CHAR:
+ return new (C, 2) PackCNode(s);
+ case T_SHORT:
+ return new (C, 2) PackSNode(s);
+ case T_INT:
+ return new (C, 2) PackINode(s);
+ case T_LONG:
+ return new (C, 2) PackLNode(s);
+ case T_FLOAT:
+ return new (C, 2) PackFNode(s);
+ case T_DOUBLE:
+ return new (C, 2) PackDNode(s);
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Create a binary tree form for Packs. [lo, hi) (half-open) range
+Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
+ int ct = hi - lo;
+ assert(is_power_of_2(ct), "power of 2");
+ int mid = lo + ct/2;
+ Node* n1 = ct == 2 ? in(lo) : binaryTreePack(C, lo, mid);
+ Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi );
+ int rslt_bsize = ct * type2aelembytes[elt_basic_type()];
+ if (bottom_type()->is_floatingpoint()) {
+ switch (rslt_bsize) {
+ case 8: return new (C, 3) PackFNode(n1, n2);
+ case 16: return new (C, 3) PackDNode(n1, n2);
+ }
+ } else {
+ assert(bottom_type()->isa_int() || bottom_type()->isa_long(), "int or long");
+ switch (rslt_bsize) {
+ case 2: return new (C, 3) Pack2x1BNode(n1, n2);
+ case 4: return new (C, 3) Pack2x2BNode(n1, n2);
+ case 8: return new (C, 3) PackINode(n1, n2);
+ case 16: return new (C, 3) PackLNode(n1, n2);
+ }
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Return the vector operator for the specified scalar operation
+// and vector length. One use is to check if the code generator
+// supports the vector operation.
+int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
+ BasicType bt = opd_t->array_element_basic_type();
+ if (!(is_power_of_2(vlen) && vlen <= max_vlen(bt)))
+ return 0; // unimplemented
+ switch (sopc) {
+ case Op_AddI:
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE: return Op_AddVB;
+ case T_CHAR: return Op_AddVC;
+ case T_SHORT: return Op_AddVS;
+ case T_INT: return Op_AddVI;
+ }
+ ShouldNotReachHere();
+ case Op_AddL:
+ assert(bt == T_LONG, "must be");
+ return Op_AddVL;
+ case Op_AddF:
+ assert(bt == T_FLOAT, "must be");
+ return Op_AddVF;
+ case Op_AddD:
+ assert(bt == T_DOUBLE, "must be");
+ return Op_AddVD;
+ case Op_SubI:
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE: return Op_SubVB;
+ case T_CHAR: return Op_SubVC;
+ case T_SHORT: return Op_SubVS;
+ case T_INT: return Op_SubVI;
+ }
+ ShouldNotReachHere();
+ case Op_SubL:
+ assert(bt == T_LONG, "must be");
+ return Op_SubVL;
+ case Op_SubF:
+ assert(bt == T_FLOAT, "must be");
+ return Op_SubVF;
+ case Op_SubD:
+ assert(bt == T_DOUBLE, "must be");
+ return Op_SubVD;
+ case Op_MulF:
+ assert(bt == T_FLOAT, "must be");
+ return Op_MulVF;
+ case Op_MulD:
+ assert(bt == T_DOUBLE, "must be");
+ return Op_MulVD;
+ case Op_DivF:
+ assert(bt == T_FLOAT, "must be");
+ return Op_DivVF;
+ case Op_DivD:
+ assert(bt == T_DOUBLE, "must be");
+ return Op_DivVD;
+ case Op_LShiftI:
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE: return Op_LShiftVB;
+ case T_CHAR: return Op_LShiftVC;
+ case T_SHORT: return Op_LShiftVS;
+ case T_INT: return Op_LShiftVI;
+ }
+ ShouldNotReachHere();
+ case Op_URShiftI:
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE: return Op_URShiftVB;
+ case T_CHAR: return Op_URShiftVC;
+ case T_SHORT: return Op_URShiftVS;
+ case T_INT: return Op_URShiftVI;
+ }
+ ShouldNotReachHere();
+ case Op_AndI:
+ case Op_AndL:
+ return Op_AndV;
+ case Op_OrI:
+ case Op_OrL:
+ return Op_OrV;
+ case Op_XorI:
+ case Op_XorL:
+ return Op_XorV;
+
+ case Op_LoadB:
+ case Op_LoadC:
+ case Op_LoadS:
+ case Op_LoadI:
+ case Op_LoadL:
+ case Op_LoadF:
+ case Op_LoadD:
+ return VectorLoadNode::opcode(sopc, vlen);
+
+ case Op_StoreB:
+ case Op_StoreC:
+ case Op_StoreI:
+ case Op_StoreL:
+ case Op_StoreF:
+ case Op_StoreD:
+ return VectorStoreNode::opcode(sopc, vlen);
+ }
+ return 0; // Unimplemented
+}
+
+// Helper for above.
+int VectorLoadNode::opcode(int sopc, uint vlen) {
+ switch (sopc) {
+ case Op_LoadB:
+ switch (vlen) {
+ case 2: return 0; // Unimplemented
+ case 4: return Op_Load4B;
+ case 8: return Op_Load8B;
+ case 16: return Op_Load16B;
+ }
+ break;
+ case Op_LoadC:
+ switch (vlen) {
+ case 2: return Op_Load2C;
+ case 4: return Op_Load4C;
+ case 8: return Op_Load8C;
+ }
+ break;
+ case Op_LoadS:
+ switch (vlen) {
+ case 2: return Op_Load2S;
+ case 4: return Op_Load4S;
+ case 8: return Op_Load8S;
+ }
+ break;
+ case Op_LoadI:
+ switch (vlen) {
+ case 2: return Op_Load2I;
+ case 4: return Op_Load4I;
+ }
+ break;
+ case Op_LoadL:
+ if (vlen == 2) return Op_Load2L;
+ break;
+ case Op_LoadF:
+ switch (vlen) {
+ case 2: return Op_Load2F;
+ case 4: return Op_Load4F;
+ }
+ break;
+ case Op_LoadD:
+ if (vlen == 2) return Op_Load2D;
+ break;
+ }
+ return 0; // Unimplemented
+}
+
+// Helper for above
+int VectorStoreNode::opcode(int sopc, uint vlen) {
+ switch (sopc) {
+ case Op_StoreB:
+ switch (vlen) {
+ case 2: return 0; // Unimplemented
+ case 4: return Op_Store4B;
+ case 8: return Op_Store8B;
+ case 16: return Op_Store16B;
+ }
+ break;
+ case Op_StoreC:
+ switch (vlen) {
+ case 2: return Op_Store2C;
+ case 4: return Op_Store4C;
+ case 8: return Op_Store8C;
+ }
+ break;
+ case Op_StoreI:
+ switch (vlen) {
+ case 2: return Op_Store2I;
+ case 4: return Op_Store4I;
+ }
+ break;
+ case Op_StoreL:
+ if (vlen == 2) return Op_Store2L;
+ break;
+ case Op_StoreF:
+ switch (vlen) {
+ case 2: return Op_Store2F;
+ case 4: return Op_Store4F;
+ }
+ break;
+ case Op_StoreD:
+ if (vlen == 2) return Op_Store2D;
+ break;
+ }
+ return 0; // Unimplemented
+}
+
+// Return the vector version of a scalar operation node.
+VectorNode* VectorNode::make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* opd_t) {
+ int vopc = opcode(sopc, vlen, opd_t);
+
+ switch (vopc) {
+ case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vlen);
+ case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vlen);
+ case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vlen);
+ case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vlen);
+ case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vlen);
+ case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vlen);
+ case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vlen);
+
+ case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vlen);
+ case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vlen);
+ case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vlen);
+ case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vlen);
+ case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vlen);
+ case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vlen);
+ case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vlen);
+
+ case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vlen);
+ case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vlen);
+
+ case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vlen);
+ case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vlen);
+
+ case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vlen);
+ case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vlen);
+ case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vlen);
+ case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vlen);
+
+ case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vlen);
+ case Op_URShiftVC: return new (C, 3) URShiftVCNode(n1, n2, vlen);
+ case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vlen);
+ case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vlen);
+
+ case Op_AndV: return new (C, 3) AndVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+ case Op_OrV: return new (C, 3) OrVNode (n1, n2, vlen, opd_t->array_element_basic_type());
+ case Op_XorV: return new (C, 3) XorVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Return the vector version of a scalar load node.
+VectorLoadNode* VectorLoadNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, uint vlen) {
+ int vopc = opcode(opc, vlen);
+
+ switch(vopc) {
+ case Op_Load16B: return new (C, 3) Load16BNode(ctl, mem, adr, atyp);
+ case Op_Load8B: return new (C, 3) Load8BNode(ctl, mem, adr, atyp);
+ case Op_Load4B: return new (C, 3) Load4BNode(ctl, mem, adr, atyp);
+
+ case Op_Load8C: return new (C, 3) Load8CNode(ctl, mem, adr, atyp);
+ case Op_Load4C: return new (C, 3) Load4CNode(ctl, mem, adr, atyp);
+ case Op_Load2C: return new (C, 3) Load2CNode(ctl, mem, adr, atyp);
+
+ case Op_Load8S: return new (C, 3) Load8SNode(ctl, mem, adr, atyp);
+ case Op_Load4S: return new (C, 3) Load4SNode(ctl, mem, adr, atyp);
+ case Op_Load2S: return new (C, 3) Load2SNode(ctl, mem, adr, atyp);
+
+ case Op_Load4I: return new (C, 3) Load4INode(ctl, mem, adr, atyp);
+ case Op_Load2I: return new (C, 3) Load2INode(ctl, mem, adr, atyp);
+
+ case Op_Load2L: return new (C, 3) Load2LNode(ctl, mem, adr, atyp);
+
+ case Op_Load4F: return new (C, 3) Load4FNode(ctl, mem, adr, atyp);
+ case Op_Load2F: return new (C, 3) Load2FNode(ctl, mem, adr, atyp);
+
+ case Op_Load2D: return new (C, 3) Load2DNode(ctl, mem, adr, atyp);
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Return the vector version of a scalar store node.
+VectorStoreNode* VectorStoreNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, VectorNode* val,
+ uint vlen) {
+ int vopc = opcode(opc, vlen);
+
+ switch(vopc) {
+ case Op_Store16B: return new (C, 4) Store16BNode(ctl, mem, adr, atyp, val);
+ case Op_Store8B: return new (C, 4) Store8BNode(ctl, mem, adr, atyp, val);
+ case Op_Store4B: return new (C, 4) Store4BNode(ctl, mem, adr, atyp, val);
+
+ case Op_Store8C: return new (C, 4) Store8CNode(ctl, mem, adr, atyp, val);
+ case Op_Store4C: return new (C, 4) Store4CNode(ctl, mem, adr, atyp, val);
+ case Op_Store2C: return new (C, 4) Store2CNode(ctl, mem, adr, atyp, val);
+
+ case Op_Store4I: return new (C, 4) Store4INode(ctl, mem, adr, atyp, val);
+ case Op_Store2I: return new (C, 4) Store2INode(ctl, mem, adr, atyp, val);
+
+ case Op_Store2L: return new (C, 4) Store2LNode(ctl, mem, adr, atyp, val);
+
+ case Op_Store4F: return new (C, 4) Store4FNode(ctl, mem, adr, atyp, val);
+ case Op_Store2F: return new (C, 4) Store2FNode(ctl, mem, adr, atyp, val);
+
+ case Op_Store2D: return new (C, 4) Store2DNode(ctl, mem, adr, atyp, val);
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
+
+// Extract a scalar element of vector.
+Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
+ BasicType bt = opd_t->array_element_basic_type();
+ assert(position < VectorNode::max_vlen(bt), "pos in range");
+ ConINode* pos = ConINode::make(C, (int)position);
+ switch (bt) {
+ case T_BOOLEAN:
+ case T_BYTE:
+ return new (C, 3) ExtractBNode(v, pos);
+ case T_CHAR:
+ return new (C, 3) ExtractCNode(v, pos);
+ case T_SHORT:
+ return new (C, 3) ExtractSNode(v, pos);
+ case T_INT:
+ return new (C, 3) ExtractINode(v, pos);
+ case T_LONG:
+ return new (C, 3) ExtractLNode(v, pos);
+ case T_FLOAT:
+ return new (C, 3) ExtractFNode(v, pos);
+ case T_DOUBLE:
+ return new (C, 3) ExtractDNode(v, pos);
+ }
+ ShouldNotReachHere();
+ return NULL;
+}
diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp
new file mode 100644
index 000000000..c06386777
--- /dev/null
+++ b/src/share/vm/opto/vectornode.hpp
@@ -0,0 +1,1134 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+//------------------------------VectorNode--------------------------------------
+// Vector Operation
+class VectorNode : public Node {
+ protected:
+ uint _length; // vector length
+ virtual BasicType elt_basic_type() const = 0; // Vector element basic type
+
+ static const Type* vect_type(BasicType elt_bt, uint len);
+ static const Type* vect_type(const Type* elt_type, uint len) {
+ return vect_type(elt_type->array_element_basic_type(), len);
+ }
+
+ public:
+ friend class VectorLoadNode; // For vect_type
+ friend class VectorStoreNode; // ditto.
+
+ VectorNode(Node* n1, uint vlen) : Node(NULL, n1), _length(vlen) {
+ init_flags(Flag_is_Vector);
+ }
+ VectorNode(Node* n1, Node* n2, uint vlen) : Node(NULL, n1, n2), _length(vlen) {
+ init_flags(Flag_is_Vector);
+ }
+ virtual int Opcode() const;
+
+ uint length() const { return _length; } // Vector length
+
+ static uint max_vlen(BasicType bt) { // max vector length
+ return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes[bt]);
+ }
+
+ // Element and vector type
+ const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
+ const Type* vect_type() const { return vect_type(elt_basic_type(), length()); }
+
+ virtual const Type *bottom_type() const { return vect_type(); }
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
+
+ // Vector opcode from scalar opcode
+ static int opcode(int sopc, uint vlen, const Type* opd_t);
+
+ static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t);
+
+ static VectorNode* make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* elt_t);
+
+};
+
+//===========================Vector=ALU=Operations====================================
+
+//------------------------------AddVBNode---------------------------------------
+// Vector add byte
+class AddVBNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ AddVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AddVCNode---------------------------------------
+// Vector add char
+class AddVCNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ AddVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AddVSNode---------------------------------------
+// Vector add short
+class AddVSNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ AddVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AddVINode---------------------------------------
+// Vector add int
+class AddVINode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ AddVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AddVLNode---------------------------------------
+// Vector add long
+class AddVLNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+ AddVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AddVFNode---------------------------------------
+// Vector add float
+class AddVFNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ AddVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AddVDNode---------------------------------------
+// Vector add double
+class AddVDNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ AddVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------SubVBNode---------------------------------------
+// Vector subtract byte
+class SubVBNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ SubVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------SubVCNode---------------------------------------
+// Vector subtract char
+class SubVCNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ SubVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------SubVSNode---------------------------------------
+// Vector subtract short
+class SubVSNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ SubVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------SubVINode---------------------------------------
+// Vector subtract int
+class SubVINode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ SubVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------SubVLNode---------------------------------------
+// Vector subtract long
+class SubVLNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+ SubVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------SubVFNode---------------------------------------
+// Vector subtract float
+class SubVFNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ SubVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------SubVDNode---------------------------------------
+// Vector subtract double
+class SubVDNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ SubVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------MulVFNode---------------------------------------
+// Vector multiply float
+class MulVFNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ MulVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------MulVDNode---------------------------------------
+// Vector multiply double
+class MulVDNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ MulVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------DivVFNode---------------------------------------
+// Vector divide float
+class DivVFNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ DivVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------DivVDNode---------------------------------------
+// Vector Divide double
+class DivVDNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ DivVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------LShiftVBNode---------------------------------------
+// Vector lshift byte
+class LShiftVBNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ LShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------LShiftVCNode---------------------------------------
+// Vector lshift chars
+class LShiftVCNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ LShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------LShiftVSNode---------------------------------------
+// Vector lshift shorts
+class LShiftVSNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ LShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------LShiftVINode---------------------------------------
+// Vector lshift ints
+class LShiftVINode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ LShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------URShiftVBNode---------------------------------------
+// Vector urshift bytes
+class URShiftVBNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ URShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------URShiftVCNode---------------------------------------
+// Vector urshift char
+class URShiftVCNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ URShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------URShiftVSNode---------------------------------------
+// Vector urshift shorts
+class URShiftVSNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ URShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------URShiftVINode---------------------------------------
+// Vector urshift ints
+class URShiftVINode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ URShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------AndVNode---------------------------------------
+// Vector and
+class AndVNode : public VectorNode {
+ protected:
+ BasicType _bt;
+ virtual BasicType elt_basic_type() const { return _bt; }
+ public:
+ AndVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------OrVNode---------------------------------------
+// Vector or
+class OrVNode : public VectorNode {
+ protected:
+ BasicType _bt;
+ virtual BasicType elt_basic_type() const { return _bt; }
+ public:
+ OrVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------XorVNode---------------------------------------
+// Vector xor
+class XorVNode : public VectorNode {
+ protected:
+ BasicType _bt;
+ virtual BasicType elt_basic_type() const { return _bt; }
+ public:
+ XorVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+ virtual int Opcode() const;
+};
+
+//================================= M E M O R Y ==================================
+
+
+//------------------------------VectorLoadNode--------------------------------------
+// Vector Load from memory
+class VectorLoadNode : public LoadNode {
+ virtual uint size_of() const { return sizeof(*this); }
+
+ protected:
+ virtual BasicType elt_basic_type() const = 0; // Vector element basic type
+ // For use in constructor
+ static const Type* vect_type(const Type* elt_type, uint len) {
+ return VectorNode::vect_type(elt_type, len);
+ }
+
+ public:
+ VectorLoadNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *rt)
+ : LoadNode(c,mem,adr,at,rt) {
+ init_flags(Flag_is_Vector);
+ }
+ virtual int Opcode() const;
+
+ virtual uint length() const = 0; // Vector length
+
+ // Element and vector type
+ const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
+ const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
+ virtual BasicType memory_type() const { return T_VOID; }
+ virtual int memory_size() const { return length()*type2aelembytes[elt_basic_type()]; }
+
+ // Vector opcode from scalar opcode
+ static int opcode(int sopc, uint vlen);
+
+ static VectorLoadNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, uint vlen);
+};
+
+//------------------------------Load16BNode--------------------------------------
+// Vector load of 16 bytes (8bits signed) from memory
+class Load16BNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Load16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,16)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store16B; }
+ virtual uint length() const { return 16; }
+};
+
+//------------------------------Load8BNode--------------------------------------
+// Vector load of 8 bytes (8bits signed) from memory
+class Load8BNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Load8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store8B; }
+ virtual uint length() const { return 8; }
+};
+
+//------------------------------Load4BNode--------------------------------------
+// Vector load of 4 bytes (8bits signed) from memory
+class Load4BNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Load4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store4B; }
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Load8CNode--------------------------------------
+// Vector load of 8 chars (16bits unsigned) from memory
+class Load8CNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Load8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store8C; }
+ virtual uint length() const { return 8; }
+};
+
+//------------------------------Load4CNode--------------------------------------
+// Vector load of 4 chars (16bits unsigned) from memory
+class Load4CNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Load4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store4C; }
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2CNode--------------------------------------
+// Vector load of 2 chars (16bits unsigned) from memory
+class Load2CNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Load2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store2C; }
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Load8SNode--------------------------------------
+// Vector load of 8 shorts (16bits signed) from memory
+class Load8SNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ Load8SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store8C; }
+ virtual uint length() const { return 8; }
+};
+
+//------------------------------Load4SNode--------------------------------------
+// Vector load of 4 shorts (16bits signed) from memory
+class Load4SNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ Load4SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store4C; }
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2SNode--------------------------------------
+// Vector load of 2 shorts (16bits signed) from memory
+class Load2SNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ Load2SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store2C; }
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Load4INode--------------------------------------
+// Vector load of 4 integers (32bits signed) from memory
+class Load4INode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ Load4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store4I; }
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2INode--------------------------------------
+// Vector load of 2 integers (32bits signed) from memory
+class Load2INode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ Load2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
+ : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store2I; }
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Load2LNode--------------------------------------
+// Vector load of 2 longs (64bits signed) from memory
+class Load2LNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+ Load2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong *tl = TypeLong::LONG)
+ : VectorLoadNode(c,mem,adr,at,vect_type(tl,2)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store2L; }
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Load4FNode--------------------------------------
+// Vector load of 4 floats (32bits) from memory
+class Load4FNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ Load4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
+ : VectorLoadNode(c,mem,adr,at,vect_type(t,4)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store4F; }
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2FNode--------------------------------------
+// Vector load of 2 floats (32bits) from memory
+class Load2FNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ Load2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
+ : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store2F; }
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Load2DNode--------------------------------------
+// Vector load of 2 doubles (64bits) from memory
+class Load2DNode : public VectorLoadNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ Load2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::DOUBLE)
+ : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
+ virtual int Opcode() const;
+ virtual int store_Opcode() const { return Op_Store2D; }
+ virtual uint length() const { return 2; }
+};
+
+
+//------------------------------VectorStoreNode--------------------------------------
+// Vector Store to memory
+class VectorStoreNode : public StoreNode {
+ virtual uint size_of() const { return sizeof(*this); }
+
+ protected:
+ virtual BasicType elt_basic_type() const = 0; // Vector element basic type
+
+ public:
+ VectorStoreNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : StoreNode(c,mem,adr,at,val) {
+ init_flags(Flag_is_Vector);
+ }
+ virtual int Opcode() const;
+
+ virtual uint length() const = 0; // Vector length
+
+ // Element and vector type
+ const Type* elt_type() const { return Type::get_const_basic_type(elt_basic_type()); }
+ const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+
+ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); }
+ virtual BasicType memory_type() const { return T_VOID; }
+ virtual int memory_size() const { return length()*type2aelembytes[elt_basic_type()]; }
+
+ // Vector opcode from scalar opcode
+ static int opcode(int sopc, uint vlen);
+
+ static VectorStoreNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+ Node* adr, const TypePtr* atyp, VectorNode* val,
+ uint vlen);
+};
+
+//------------------------------Store16BNode--------------------------------------
+// Vector store of 16 bytes (8bits signed) to memory
+class Store16BNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Store16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 16; }
+};
+
+//------------------------------Store8BNode--------------------------------------
+// Vector store of 8 bytes (8bits signed) to memory
+class Store8BNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Store8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 8; }
+};
+
+//------------------------------Store4BNode--------------------------------------
+// Vector store of 4 bytes (8bits signed) to memory
+class Store4BNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Store4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Store8CNode--------------------------------------
+// Vector store of 8 chars (16bits signed/unsigned) to memory
+class Store8CNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Store8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 8; }
+};
+
+//------------------------------Store4CNode--------------------------------------
+// Vector store of 4 chars (16bits signed/unsigned) to memory
+class Store4CNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Store4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Store2CNode--------------------------------------
+// Vector store of 2 chars (16bits signed/unsigned) to memory
+class Store2CNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Store2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Store4INode--------------------------------------
+// Vector store of 4 integers (32bits signed) to memory
+class Store4INode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ Store4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Store2INode--------------------------------------
+// Vector store of 2 integers (32bits signed) to memory
+class Store2INode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ Store2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Store2LNode--------------------------------------
+// Vector store of 2 longs (64bits signed) to memory
+class Store2LNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+ Store2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Store4FNode--------------------------------------
+// Vector store of 4 floats (32bits) to memory
+class Store4FNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ Store4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 4; }
+};
+
+//------------------------------Store2FNode--------------------------------------
+// Vector store of 2 floats (32bits) to memory
+class Store2FNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ Store2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 2; }
+};
+
+//------------------------------Store2DNode--------------------------------------
+// Vector store of 2 doubles (64bits) to memory
+class Store2DNode : public VectorStoreNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ Store2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+ : VectorStoreNode(c,mem,adr,at,val) {}
+ virtual int Opcode() const;
+ virtual uint length() const { return 2; }
+};
+
+//=========================Promote_Scalar_to_Vector====================================
+
+//------------------------------Replicate16BNode---------------------------------------
+// Replicate byte scalar to be vector of 16 bytes
+class Replicate16BNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Replicate16BNode(Node* in1) : VectorNode(in1, 16) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate8BNode---------------------------------------
+// Replicate byte scalar to be vector of 8 bytes
+class Replicate8BNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Replicate8BNode(Node* in1) : VectorNode(in1, 8) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate4BNode---------------------------------------
+// Replicate byte scalar to be vector of 4 bytes
+class Replicate4BNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Replicate4BNode(Node* in1) : VectorNode(in1, 4) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate8CNode---------------------------------------
+// Replicate char scalar to be vector of 8 chars
+class Replicate8CNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Replicate8CNode(Node* in1) : VectorNode(in1, 8) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate4CNode---------------------------------------
+// Replicate char scalar to be vector of 4 chars
+class Replicate4CNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Replicate4CNode(Node* in1) : VectorNode(in1, 4) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate2CNode---------------------------------------
+// Replicate char scalar to be vector of 2 chars
+class Replicate2CNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Replicate2CNode(Node* in1) : VectorNode(in1, 2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate8SNode---------------------------------------
+// Replicate short scalar to be vector of 8 shorts
+class Replicate8SNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ Replicate8SNode(Node* in1) : VectorNode(in1, 8) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate4SNode---------------------------------------
+// Replicate short scalar to be vector of 4 shorts
+class Replicate4SNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ Replicate4SNode(Node* in1) : VectorNode(in1, 4) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate2SNode---------------------------------------
+// Replicate short scalar to be vector of 2 shorts
+class Replicate2SNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ Replicate2SNode(Node* in1) : VectorNode(in1, 2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate4INode---------------------------------------
+// Replicate int scalar to be vector of 4 ints
+class Replicate4INode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ Replicate4INode(Node* in1) : VectorNode(in1, 4) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate2INode---------------------------------------
+// Replicate int scalar to be vector of 2 ints
+class Replicate2INode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ Replicate2INode(Node* in1) : VectorNode(in1, 2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate2LNode---------------------------------------
+// Replicate long scalar to be vector of 2 longs
+class Replicate2LNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+ Replicate2LNode(Node* in1) : VectorNode(in1, 2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate4FNode---------------------------------------
+// Replicate float scalar to be vector of 4 floats
+class Replicate4FNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ Replicate4FNode(Node* in1) : VectorNode(in1, 4) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate2FNode---------------------------------------
+// Replicate float scalar to be vector of 2 floats
+class Replicate2FNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ Replicate2FNode(Node* in1) : VectorNode(in1, 2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------Replicate2DNode---------------------------------------
+// Replicate double scalar to be vector of 2 doubles
+class Replicate2DNode : public VectorNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ Replicate2DNode(Node* in1) : VectorNode(in1, 2) {}
+ virtual int Opcode() const;
+};
+
+//========================Pack_Scalars_into_a_Vector==============================
+
+//------------------------------PackNode---------------------------------------
+// Pack parent class (not for code generation).
+class PackNode : public VectorNode {
+ public:
+ PackNode(Node* in1) : VectorNode(in1, 1) {}
+ PackNode(Node* in1, Node* n2) : VectorNode(in1, n2, 2) {}
+ virtual int Opcode() const;
+
+ void add_opd(Node* n) {
+ add_req(n);
+ _length++;
+ assert(_length == req() - 1, "vector length matches edge count");
+ }
+
+ // Create a binary tree form for Packs. [lo, hi) (half-open) range
+ Node* binaryTreePack(Compile* C, int lo, int hi);
+
+ static PackNode* make(Compile* C, Node* s, const Type* elt_t);
+};
+
+//------------------------------PackBNode---------------------------------------
+// Pack byte scalars into vector
+class PackBNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ PackBNode(Node* in1) : PackNode(in1) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------PackCNode---------------------------------------
+// Pack char scalars into vector
+class PackCNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ PackCNode(Node* in1) : PackNode(in1) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------PackSNode---------------------------------------
+// Pack short scalars into a vector
+class PackSNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+ PackSNode(Node* in1) : PackNode(in1) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------PackINode---------------------------------------
+// Pack integer scalars into a vector
+class PackINode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+ PackINode(Node* in1) : PackNode(in1) {}
+ PackINode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------PackLNode---------------------------------------
+// Pack long scalars into a vector
+class PackLNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+ PackLNode(Node* in1) : PackNode(in1) {}
+ PackLNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------PackFNode---------------------------------------
+// Pack float scalars into vector
+class PackFNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+ PackFNode(Node* in1) : PackNode(in1) {}
+ PackFNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ virtual int Opcode() const;
+};
+
+//------------------------------PackDNode---------------------------------------
+// Pack double scalars into a vector
+class PackDNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+ PackDNode(Node* in1) : PackNode(in1) {}
+ PackDNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+ virtual int Opcode() const;
+};
+
+// The Pack2xN nodes assist code generation. They are created from
+// Pack4C, etc. nodes in final_graph_reshape in the form of a
+// balanced, binary tree.
+
+//------------------------------Pack2x1BNode-----------------------------------------
+// Pack 2 1-byte integers into vector of 2 bytes
+class Pack2x1BNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+ Pack2x1BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------Pack2x2BNode---------------------------------------
+// Pack 2 2-byte integers into vector of 4 bytes
+class Pack2x2BNode : public PackNode {
+ protected:
+ virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+ Pack2x2BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+ virtual int Opcode() const;
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//========================Extract_Scalar_from_Vector===============================
+
+//------------------------------ExtractNode---------------------------------------
+// Extract a scalar from a vector at position "pos"
+class ExtractNode : public Node {
+ public:
+ ExtractNode(Node* src, ConINode* pos) : Node(NULL, src, (Node*)pos) {
+ assert(in(2)->get_int() >= 0, "positive constants");
+ }
+ virtual int Opcode() const;
+ uint pos() const { return in(2)->get_int(); }
+
+ static Node* make(Compile* C, Node* v, uint position, const Type* opd_t);
+};
+
+//------------------------------ExtractBNode---------------------------------------
+// Extract a byte from a vector at position "pos"
+class ExtractBNode : public ExtractNode {
+ public:
+ ExtractBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractCNode---------------------------------------
+// Extract a char from a vector at position "pos"
+class ExtractCNode : public ExtractNode {
+ public:
+ ExtractCNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractSNode---------------------------------------
+// Extract a short from a vector at position "pos"
+class ExtractSNode : public ExtractNode {
+ public:
+ ExtractSNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractINode---------------------------------------
+// Extract an int from a vector at position "pos"
+class ExtractINode : public ExtractNode {
+ public:
+ ExtractINode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeInt::INT; }
+ virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractLNode---------------------------------------
+// Extract a long from a vector at position "pos"
+class ExtractLNode : public ExtractNode {
+ public:
+ ExtractLNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return TypeLong::LONG; }
+ virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ExtractFNode---------------------------------------
+// Extract a float from a vector at position "pos"
+class ExtractFNode : public ExtractNode {
+ public:
+ ExtractFNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::FLOAT; }
+ virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ExtractDNode---------------------------------------
+// Extract a double from a vector at position "pos"
+class ExtractDNode : public ExtractNode {
+ public:
+ ExtractDNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+ virtual int Opcode() const;
+ virtual const Type *bottom_type() const { return Type::DOUBLE; }
+ virtual uint ideal_reg() const { return Op_RegD; }
+};