diff options
author | Chris Matthews <cmatthews5@apple.com> | 2018-12-11 18:47:42 +0000 |
---|---|---|
committer | Chris Matthews <cmatthews5@apple.com> | 2018-12-11 18:47:42 +0000 |
commit | 08fa3c3a1ab7806f1732c6738d666e7a5b773795 (patch) | |
tree | 5f6ad3150da8dfbe11718c6546b234cf0e0889cc /lnt/server | |
parent | a9f05c8f991479fff6f9b8ef801a28770a9b5a4c (diff) |
Keep a lookup table to avoid querying for FieldChange existence
On large data imports, this existence test is very expensive. In my
testing, querying up front, then checking saved ~20k queries and 1/3 of
the runtime of regenerate_fieldchanges_for_run.
git-svn-id: https://llvm.org/svn/llvm-project/lnt/trunk@348876 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lnt/server')
-rw-r--r-- | lnt/server/db/fieldchange.py | 30 |
1 files changed, 25 insertions, 5 deletions
diff --git a/lnt/server/db/fieldchange.py b/lnt/server/db/fieldchange.py index e6d993a..9282ad7 100644 --- a/lnt/server/db/fieldchange.py +++ b/lnt/server/db/fieldchange.py @@ -102,6 +102,24 @@ def regenerate_fieldchanges_for_run(session, ts, run_id): # Only store fieldchanges for "metric" samples like execution time; # not for fields with other data, e.g. hash of a binary + field_ids = [x.id for x in ts.Sample.get_metric_fields()] + + # We need to make sure if a field change already exists we use it. + # Since querying on every possible test*field is expensive, get the + # interesting locations ahead of time, and keep them in memory to + # check if we should actually query. + changes_of_interest = set(session.query(ts.FieldChange.start_order_id, + ts.FieldChange.end_order_id, + ts.FieldChange.test_id, + ts.FieldChange.machine_id, + ts.FieldChange.field_id) + .filter(ts.FieldChange.start_order == start_order) + .filter(ts.FieldChange.end_order == end_order) + .filter(ts.FieldChange.test_id.in_(runinfo.test_ids)) + .filter(ts.FieldChange.machine == run.machine) + .filter(ts.FieldChange.field_id.in_(field_ids)) + .all()) + for field in list(ts.Sample.get_metric_fields()): for test_id in runinfo.test_ids: f = None @@ -109,17 +127,19 @@ def regenerate_fieldchanges_for_run(session, ts, run_id): runs, previous_runs, test_id, field, ts.Sample.get_hash_of_binary_field()) # Try and find a matching FC and update, else create one. - try: + target = (start_order.id, run.order.id, run.machine.id, test_id, field.id) + should_search = target in changes_of_interest + + if should_search: f = session.query(ts.FieldChange) \ .filter(ts.FieldChange.start_order == start_order) \ .filter(ts.FieldChange.end_order == end_order) \ .filter(ts.FieldChange.test_id == test_id) \ .filter(ts.FieldChange.machine == run.machine) \ .filter(ts.FieldChange.field_id == field.id) \ - .one() - except sqlalchemy.orm.exc.NoResultFound: - f = None - + .one_or_none() + if not f: + logger.warning("Fell back to field lookup. Should not happen.") if not result.is_result_performance_change() and f: # With more data, its not a regression. Kill it! logger.info("Removing field change: {}".format(f.id)) |