/* The analysis "engine". Copyright (C) 2019-2022 Free Software Foundation, Inc. Contributed by David Malcolm . This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ #include "config.h" #define INCLUDE_MEMORY #include "system.h" #include "coretypes.h" #include "tree.h" #include "fold-const.h" #include "gcc-rich-location.h" #include "alloc-pool.h" #include "fibonacci_heap.h" #include "shortest-paths.h" #include "diagnostic-core.h" #include "diagnostic-event-id.h" #include "diagnostic-path.h" #include "function.h" #include "pretty-print.h" #include "sbitmap.h" #include "bitmap.h" #include "tristate.h" #include "ordered-hash-map.h" #include "selftest.h" #include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "analyzer/call-string.h" #include "analyzer/program-point.h" #include "analyzer/store.h" #include "analyzer/region-model.h" #include "analyzer/constraint-manager.h" #include "analyzer/sm.h" #include "analyzer/pending-diagnostic.h" #include "analyzer/diagnostic-manager.h" #include "cfg.h" #include "basic-block.h" #include "gimple.h" #include "gimple-iterator.h" #include "gimple-pretty-print.h" #include "cgraph.h" #include "digraph.h" #include "analyzer/supergraph.h" #include "analyzer/program-state.h" #include "analyzer/exploded-graph.h" #include "analyzer/analysis-plan.h" #include "analyzer/checker-path.h" #include "analyzer/state-purge.h" #include "analyzer/bar-chart.h" #include "analyzer/call-info.h" #include #include "plugin.h" #include "target.h" #include #include "stringpool.h" #include "attribs.h" #include "tree-dfa.h" /* For an overview, see gcc/doc/analyzer.texi. */ #if ENABLE_ANALYZER namespace ana { /* class impl_region_model_context : public region_model_context. */ impl_region_model_context:: impl_region_model_context (exploded_graph &eg, exploded_node *enode_for_diag, const program_state *old_state, program_state *new_state, uncertainty_t *uncertainty, path_context *path_ctxt, const gimple *stmt, stmt_finder *stmt_finder) : m_eg (&eg), m_logger (eg.get_logger ()), m_enode_for_diag (enode_for_diag), m_old_state (old_state), m_new_state (new_state), m_stmt (stmt), m_stmt_finder (stmt_finder), m_ext_state (eg.get_ext_state ()), m_uncertainty (uncertainty), m_path_ctxt (path_ctxt) { } impl_region_model_context:: impl_region_model_context (program_state *state, const extrinsic_state &ext_state, uncertainty_t *uncertainty, logger *logger) : m_eg (NULL), m_logger (logger), m_enode_for_diag (NULL), m_old_state (NULL), m_new_state (state), m_stmt (NULL), m_stmt_finder (NULL), m_ext_state (ext_state), m_uncertainty (uncertainty), m_path_ctxt (NULL) { } bool impl_region_model_context::warn (pending_diagnostic *d) { LOG_FUNC (get_logger ()); if (m_stmt == NULL && m_stmt_finder == NULL) { if (get_logger ()) get_logger ()->log ("rejecting diagnostic: no stmt"); delete d; return false; } if (m_eg) return m_eg->get_diagnostic_manager ().add_diagnostic (m_enode_for_diag, m_enode_for_diag->get_supernode (), m_stmt, m_stmt_finder, d); else { delete d; return false; } } void impl_region_model_context::add_note (pending_note *pn) { LOG_FUNC (get_logger ()); if (m_eg) m_eg->get_diagnostic_manager ().add_note (pn); else delete pn; } void impl_region_model_context::on_svalue_leak (const svalue *sval) { for (sm_state_map *smap : m_new_state->m_checker_states) smap->on_svalue_leak (sval, this); } void impl_region_model_context:: on_liveness_change (const svalue_set &live_svalues, const region_model *model) { for (sm_state_map *smap : m_new_state->m_checker_states) smap->on_liveness_change (live_svalues, model, this); } void impl_region_model_context::on_unknown_change (const svalue *sval, bool is_mutable) { for (sm_state_map *smap : m_new_state->m_checker_states) smap->on_unknown_change (sval, is_mutable, m_ext_state); } void impl_region_model_context::on_escaped_function (tree fndecl) { m_eg->on_escaped_function (fndecl); } uncertainty_t * impl_region_model_context::get_uncertainty () { return m_uncertainty; } /* Purge state involving SVAL. The region_model has already been purged, so we only need to purge other state in the program_state: the sm-state. */ void impl_region_model_context::purge_state_involving (const svalue *sval) { int i; sm_state_map *smap; FOR_EACH_VEC_ELT (m_new_state->m_checker_states, i, smap) smap->purge_state_involving (sval, m_ext_state); } void impl_region_model_context::bifurcate (custom_edge_info *info) { if (m_path_ctxt) m_path_ctxt->bifurcate (info); else delete info; } void impl_region_model_context::terminate_path () { if (m_path_ctxt) return m_path_ctxt->terminate_path (); } bool impl_region_model_context::get_malloc_map (sm_state_map **out_smap, const state_machine **out_sm, unsigned *out_sm_idx) { unsigned malloc_sm_idx; if (!m_ext_state.get_sm_idx_by_name ("malloc", &malloc_sm_idx)) return false; *out_smap = m_new_state->m_checker_states[malloc_sm_idx]; *out_sm = &m_ext_state.get_sm (malloc_sm_idx); *out_sm_idx = malloc_sm_idx; return true; } bool impl_region_model_context::get_taint_map (sm_state_map **out_smap, const state_machine **out_sm, unsigned *out_sm_idx) { if (!m_new_state) return false; unsigned taint_sm_idx; if (!m_ext_state.get_sm_idx_by_name ("taint", &taint_sm_idx)) return false; *out_smap = m_new_state->m_checker_states[taint_sm_idx]; *out_sm = &m_ext_state.get_sm (taint_sm_idx); *out_sm_idx = taint_sm_idx; return true; } /* struct setjmp_record. */ int setjmp_record::cmp (const setjmp_record &rec1, const setjmp_record &rec2) { if (int cmp_enode = rec1.m_enode->m_index - rec2.m_enode->m_index) return cmp_enode; gcc_assert (&rec1 == &rec2); return 0; } /* class setjmp_svalue : public svalue. */ /* Implementation of svalue::accept vfunc for setjmp_svalue. */ void setjmp_svalue::accept (visitor *v) const { v->visit_setjmp_svalue (this); } /* Implementation of svalue::dump_to_pp vfunc for setjmp_svalue. */ void setjmp_svalue::dump_to_pp (pretty_printer *pp, bool simple) const { if (simple) pp_printf (pp, "SETJMP(EN: %i)", get_enode_index ()); else pp_printf (pp, "setjmp_svalue(EN%i)", get_enode_index ()); } /* Get the index of the stored exploded_node. */ int setjmp_svalue::get_enode_index () const { return m_setjmp_record.m_enode->m_index; } /* Concrete implementation of sm_context, wiring it up to the rest of this file. */ class impl_sm_context : public sm_context { public: impl_sm_context (exploded_graph &eg, int sm_idx, const state_machine &sm, exploded_node *enode_for_diag, const program_state *old_state, program_state *new_state, const sm_state_map *old_smap, sm_state_map *new_smap, path_context *path_ctxt, stmt_finder *stmt_finder = NULL, bool unknown_side_effects = false) : sm_context (sm_idx, sm), m_logger (eg.get_logger ()), m_eg (eg), m_enode_for_diag (enode_for_diag), m_old_state (old_state), m_new_state (new_state), m_old_smap (old_smap), m_new_smap (new_smap), m_path_ctxt (path_ctxt), m_stmt_finder (stmt_finder), m_unknown_side_effects (unknown_side_effects) { } logger *get_logger () const { return m_logger.get_logger (); } tree get_fndecl_for_call (const gcall *call) FINAL OVERRIDE { impl_region_model_context old_ctxt (m_eg, m_enode_for_diag, NULL, NULL, NULL/*m_enode->get_state ()*/, NULL, call); region_model *model = m_new_state->m_region_model; return model->get_fndecl_for_call (call, &old_ctxt); } state_machine::state_t get_state (const gimple *stmt ATTRIBUTE_UNUSED, tree var) { logger * const logger = get_logger (); LOG_FUNC (logger); /* Use NULL ctxt on this get_rvalue call to avoid triggering uninitialized value warnings. */ const svalue *var_old_sval = m_old_state->m_region_model->get_rvalue (var, NULL); state_machine::state_t current = m_old_smap->get_state (var_old_sval, m_eg.get_ext_state ()); return current; } state_machine::state_t get_state (const gimple *stmt ATTRIBUTE_UNUSED, const svalue *sval) { logger * const logger = get_logger (); LOG_FUNC (logger); state_machine::state_t current = m_old_smap->get_state (sval, m_eg.get_ext_state ()); return current; } void set_next_state (const gimple *stmt, tree var, state_machine::state_t to, tree origin) { logger * const logger = get_logger (); LOG_FUNC (logger); impl_region_model_context new_ctxt (m_eg, m_enode_for_diag, m_old_state, m_new_state, NULL, NULL, stmt); const svalue *var_new_sval = m_new_state->m_region_model->get_rvalue (var, &new_ctxt); const svalue *origin_new_sval = m_new_state->m_region_model->get_rvalue (origin, &new_ctxt); /* We use the new sval here to avoid issues with uninitialized values. */ state_machine::state_t current = m_old_smap->get_state (var_new_sval, m_eg.get_ext_state ()); if (logger) logger->log ("%s: state transition of %qE: %s -> %s", m_sm.get_name (), var, current->get_name (), to->get_name ()); m_new_smap->set_state (m_new_state->m_region_model, var_new_sval, to, origin_new_sval, m_eg.get_ext_state ()); } void set_next_state (const gimple *stmt, const svalue *sval, state_machine::state_t to, tree origin) { logger * const logger = get_logger (); LOG_FUNC (logger); impl_region_model_context old_ctxt (m_eg, m_enode_for_diag, NULL, NULL, NULL/*m_enode->get_state ()*/, NULL, stmt); impl_region_model_context new_ctxt (m_eg, m_enode_for_diag, m_old_state, m_new_state, NULL, NULL, stmt); const svalue *origin_new_sval = m_new_state->m_region_model->get_rvalue (origin, &new_ctxt); state_machine::state_t current = m_old_smap->get_state (sval, m_eg.get_ext_state ()); if (logger) { logger->start_log_line (); logger->log_partial ("%s: state transition of ", m_sm.get_name ()); sval->dump_to_pp (logger->get_printer (), true); logger->log_partial (": %s -> %s", current->get_name (), to->get_name ()); logger->end_log_line (); } m_new_smap->set_state (m_new_state->m_region_model, sval, to, origin_new_sval, m_eg.get_ext_state ()); } void warn (const supernode *snode, const gimple *stmt, tree var, pending_diagnostic *d) FINAL OVERRIDE { LOG_FUNC (get_logger ()); gcc_assert (d); // take ownership impl_region_model_context old_ctxt (m_eg, m_enode_for_diag, m_old_state, m_new_state, NULL, NULL, NULL); const svalue *var_old_sval = m_old_state->m_region_model->get_rvalue (var, &old_ctxt); state_machine::state_t current = (var ? m_old_smap->get_state (var_old_sval, m_eg.get_ext_state ()) : m_old_smap->get_global_state ()); m_eg.get_diagnostic_manager ().add_diagnostic (&m_sm, m_enode_for_diag, snode, stmt, m_stmt_finder, var, var_old_sval, current, d); } /* Hook for picking more readable trees for SSA names of temporaries, so that rather than e.g. "double-free of ''" we can print: "double-free of 'inbuf.data'". */ tree get_diagnostic_tree (tree expr) FINAL OVERRIDE { /* Only for SSA_NAMEs of temporaries; otherwise, return EXPR, as it's likely to be the least surprising tree to report. */ if (TREE_CODE (expr) != SSA_NAME) return expr; if (SSA_NAME_VAR (expr) != NULL) return expr; gcc_assert (m_new_state); const svalue *sval = m_new_state->m_region_model->get_rvalue (expr, NULL); /* Find trees for all regions storing the value. */ if (tree t = m_new_state->m_region_model->get_representative_tree (sval)) return t; else return expr; } tree get_diagnostic_tree (const svalue *sval) FINAL OVERRIDE { return m_new_state->m_region_model->get_representative_tree (sval); } state_machine::state_t get_global_state () const FINAL OVERRIDE { return m_old_state->m_checker_states[m_sm_idx]->get_global_state (); } void set_global_state (state_machine::state_t state) FINAL OVERRIDE { m_new_state->m_checker_states[m_sm_idx]->set_global_state (state); } void on_custom_transition (custom_transition *transition) FINAL OVERRIDE { transition->impl_transition (&m_eg, const_cast (m_enode_for_diag), m_sm_idx); } tree is_zero_assignment (const gimple *stmt) FINAL OVERRIDE { const gassign *assign_stmt = dyn_cast (stmt); if (!assign_stmt) return NULL_TREE; impl_region_model_context old_ctxt (m_eg, m_enode_for_diag, m_old_state, m_new_state, NULL, NULL, stmt); if (const svalue *sval = m_new_state->m_region_model->get_gassign_result (assign_stmt, &old_ctxt)) if (tree cst = sval->maybe_get_constant ()) if (::zerop(cst)) return gimple_assign_lhs (assign_stmt); return NULL_TREE; } path_context *get_path_context () const FINAL OVERRIDE { return m_path_ctxt; } bool unknown_side_effects_p () const FINAL OVERRIDE { return m_unknown_side_effects; } const program_state *get_old_program_state () const FINAL OVERRIDE { return m_old_state; } log_user m_logger; exploded_graph &m_eg; exploded_node *m_enode_for_diag; const program_state *m_old_state; program_state *m_new_state; const sm_state_map *m_old_smap; sm_state_map *m_new_smap; path_context *m_path_ctxt; stmt_finder *m_stmt_finder; /* Are we handling an external function with unknown side effects? */ bool m_unknown_side_effects; }; /* Subclass of stmt_finder for finding the best stmt to report the leak at, given the emission path. */ class leak_stmt_finder : public stmt_finder { public: leak_stmt_finder (const exploded_graph &eg, tree var) : m_eg (eg), m_var (var) {} stmt_finder *clone () const FINAL OVERRIDE { return new leak_stmt_finder (m_eg, m_var); } const gimple *find_stmt (const exploded_path &epath) FINAL OVERRIDE { logger * const logger = m_eg.get_logger (); LOG_FUNC (logger); if (m_var && TREE_CODE (m_var) == SSA_NAME) { /* Locate the final write to this SSA name in the path. */ const gimple *def_stmt = SSA_NAME_DEF_STMT (m_var); int idx_of_def_stmt; bool found = epath.find_stmt_backwards (def_stmt, &idx_of_def_stmt); if (!found) goto not_found; /* What was the next write to the underlying var after the SSA name was set? (if any). */ for (unsigned idx = idx_of_def_stmt + 1; idx < epath.m_edges.length (); ++idx) { const exploded_edge *eedge = epath.m_edges[idx]; if (logger) logger->log ("eedge[%i]: EN %i -> EN %i", idx, eedge->m_src->m_index, eedge->m_dest->m_index); const exploded_node *dst_node = eedge->m_dest; const program_point &dst_point = dst_node->get_point (); const gimple *stmt = dst_point.get_stmt (); if (!stmt) continue; if (const gassign *assign = dyn_cast (stmt)) { tree lhs = gimple_assign_lhs (assign); if (TREE_CODE (lhs) == SSA_NAME && SSA_NAME_VAR (lhs) == SSA_NAME_VAR (m_var)) return assign; } } } not_found: /* Look backwards for the first statement with a location. */ int i; const exploded_edge *eedge; FOR_EACH_VEC_ELT_REVERSE (epath.m_edges, i, eedge) { if (logger) logger->log ("eedge[%i]: EN %i -> EN %i", i, eedge->m_src->m_index, eedge->m_dest->m_index); const exploded_node *dst_node = eedge->m_dest; const program_point &dst_point = dst_node->get_point (); const gimple *stmt = dst_point.get_stmt (); if (stmt) if (get_pure_location (stmt->location) != UNKNOWN_LOCATION) return stmt; } gcc_unreachable (); return NULL; } private: const exploded_graph &m_eg; tree m_var; }; /* A measurement of how good EXPR is for presenting to the user, so that e.g. we can say prefer printing "leak of 'tmp.m_ptr'" over: "leak of ''". */ static int readability (const_tree expr) { /* Arbitrarily-chosen "high readability" value. */ const int HIGH_READABILITY = 65536; gcc_assert (expr); switch (TREE_CODE (expr)) { case COMPONENT_REF: case MEM_REF: /* Impose a slight readability penalty relative to that of operand 0. */ return readability (TREE_OPERAND (expr, 0)) - 16; case SSA_NAME: { if (tree var = SSA_NAME_VAR (expr)) { if (DECL_ARTIFICIAL (var)) { /* If we have an SSA name for an artificial var, only use it if it has a debug expr associated with it that fixup_tree_for_diagnostic can use. */ if (VAR_P (var) && DECL_HAS_DEBUG_EXPR_P (var)) return readability (DECL_DEBUG_EXPR (var)) - 1; } else { /* Slightly favor the underlying var over the SSA name to avoid having them compare equal. */ return readability (var) - 1; } } /* Avoid printing '' for SSA names for temporaries. */ return -1; } break; case PARM_DECL: case VAR_DECL: if (DECL_NAME (expr)) return HIGH_READABILITY; else /* We don't want to print temporaries. For example, the C FE prints them as e.g. "" where "xxxx" is the low 16 bits of the tree pointer (see pp_c_tree_decl_identifier). */ return -1; case RESULT_DECL: /* Printing "" isn't ideal, but is less awful than trying to print a temporary. */ return HIGH_READABILITY / 2; case NOP_EXPR: { /* Impose a moderate readability penalty for casts. */ const int CAST_PENALTY = 32; return readability (TREE_OPERAND (expr, 0)) - CAST_PENALTY; } case INTEGER_CST: return HIGH_READABILITY; default: return 0; } return 0; } /* A qsort comparator for trees to sort them into most user-readable to least user-readable. */ int readability_comparator (const void *p1, const void *p2) { path_var pv1 = *(path_var const *)p1; path_var pv2 = *(path_var const *)p2; const int tree_r1 = readability (pv1.m_tree); const int tree_r2 = readability (pv2.m_tree); /* Favor items that are deeper on the stack and hence more recent; this also favors locals over globals. */ const int COST_PER_FRAME = 64; const int depth_r1 = pv1.m_stack_depth * COST_PER_FRAME; const int depth_r2 = pv2.m_stack_depth * COST_PER_FRAME; /* Combine the scores from the tree and from the stack depth. This e.g. lets us have a slightly penalized cast in the most recent stack frame "beat" an uncast value in an older stack frame. */ const int sum_r1 = tree_r1 + depth_r1; const int sum_r2 = tree_r2 + depth_r2; if (int cmp = sum_r2 - sum_r1) return cmp; /* Otherwise, more readable trees win. */ if (int cmp = tree_r2 - tree_r1) return cmp; /* Otherwise, if they have the same readability, then impose an arbitrary deterministic ordering on them. */ if (int cmp = TREE_CODE (pv1.m_tree) - TREE_CODE (pv2.m_tree)) return cmp; switch (TREE_CODE (pv1.m_tree)) { default: break; case SSA_NAME: if (int cmp = (SSA_NAME_VERSION (pv1.m_tree) - SSA_NAME_VERSION (pv2.m_tree))) return cmp; break; case PARM_DECL: case VAR_DECL: case RESULT_DECL: if (int cmp = DECL_UID (pv1.m_tree) - DECL_UID (pv2.m_tree)) return cmp; break; } /* TODO: We ought to find ways of sorting such cases. */ return 0; } /* Return true is SNODE is the EXIT node of a function, or is one of the final snodes within its function. Specifically, handle the final supernodes before the EXIT node, for the case of clobbers that happen immediately before exiting. We need a run of snodes leading to the return_p snode, where all edges are intraprocedural, and every snode has just one successor. We use this when suppressing leak reports at the end of "main". */ static bool returning_from_function_p (const supernode *snode) { if (!snode) return false; unsigned count = 0; const supernode *iter = snode; while (true) { if (iter->return_p ()) return true; if (iter->m_succs.length () != 1) return false; const superedge *sedge = iter->m_succs[0]; if (sedge->get_kind () != SUPEREDGE_CFG_EDGE) return false; iter = sedge->m_dest; /* Impose a limit to ensure we terminate for pathological cases. We only care about the final 3 nodes, due to cases like: BB: (clobber causing leak) BB: