diff options
author | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2024-04-01 12:26:44 +0000 |
---|---|---|
committer | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2024-04-03 15:09:15 +0000 |
commit | c001402337a0e44bccc146b8aef4c8d24c64f10e (patch) | |
tree | ca468048449350533e0a71ca6cd85d395f157a11 | |
parent | c025364e2d0e83fdb2e6c52cd0f384b8ae18b13d (diff) |
[contrib] validate_failures.py: Add statistics for test results
It is useful to know the aggregate statistics for test results
like number of stable fails, flaky tests, total number of tests,
etc. Some of these statistics are reasonably easy to calculate
with the current logic, while others would be quite difficult.
This patch series adds 3 statistics:
- fails: number of stable (non-flaky) failed tests;
- flaky: number of flaky entries in the manifest;
- total: total number of tests in the results including
flaky passes and fails.
This patch adds the new class ResultsStats and associated
logic to handle "failed" statistic. Subsequent patches will
add calculation of "flaky" and "total" statistics.
Note, it is difficult to calculate the number of passed tests,
since we can't easily distinguish flaky tests that happened
to pass from the genuine passed tests.
Similarly, we can't easily calculate number of flaky tests
encountered in the current run (again, it's non-trivial
to detect flaky tests that passed), so instead we report
the number of flaky entries in the manifest.
Change-Id: I6d26d177e32fca9a63796e69faa486fc6d018c05
-rwxr-xr-x | contrib/testsuite-management/validate_failures.py | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py index d7d0c91..1d69376 100755 --- a/contrib/testsuite-management/validate_failures.py +++ b/contrib/testsuite-management/validate_failures.py @@ -252,7 +252,6 @@ class ResultSet(set): outfile.write(_EXP_LINE_FORMAT % (current_tool, current_exp)) outfile.write('%s\n' % result) - outfile.write(_SUMMARY_LINE_FORMAT % 'Results') # Check if testsuite of expected_result is present in current results. # This is used to compare partial test results against a full manifest. @@ -466,6 +465,28 @@ def GetResults(sum_files, build_results = None): build_results.update(ParseSummary(sum_fname)) return build_results +class ResultsStats: + """Describes statistics of DejaGNU test results. + + Attributes: + fails: Number of non-flaky failed tests in the results. + flaky: Number of flaky entries in the manifest. + total: Total number of tests in the results, including flaky passes and + fails. + """ + + def __init__(self): + self.fails = 0 + self.flaky = 0 + self.total = 0 + + def Print(self, outfile=sys.stdout): + outfile.write(_SUMMARY_LINE_FORMAT % 'Results') + outfile.write(f'\n') + outfile.write(f'# of stable fails\t\t{self.fails}\n') + outfile.write(f'# of flaky entries\t\t{self.flaky}\n') + outfile.write(f'# of all tests\t\t\t{self.total}\n') + def CompareResults(manifest, actual): """Compare sets of results and return two lists: @@ -552,6 +573,9 @@ def GetSumFiles(results, build_dir): def PerformComparison(expected, actual): + stats = ResultsStats() + stats.fails = len(actual) + actual_vs_expected, expected_vs_actual = CompareResults(expected, actual) if _OPTIONS.inverse_match: @@ -560,6 +584,7 @@ def PerformComparison(expected, actual): # but PASSed in actual set. actual_vs_expected, expected_vs_actual \ = expected_vs_actual, actual_vs_expected + stats = None tests_ok = True if len(actual_vs_expected) > 0: @@ -569,6 +594,9 @@ def PerformComparison(expected, actual): PrintSummary(actual_vs_expected) tests_ok = False + if _OPTIONS.verbosity >= 1 and stats: + stats.Print() + if _OPTIONS.verbosity >= 2 and len(expected_vs_actual) > 0: print('\n\nExpected results not present in this build (fixed tests)' '\n\nNOTE: This is not a failure. It just means that these ' |