summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2024-04-01 12:26:44 +0000
committerMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2024-04-03 15:09:15 +0000
commitc001402337a0e44bccc146b8aef4c8d24c64f10e (patch)
treeca468048449350533e0a71ca6cd85d395f157a11
parentc025364e2d0e83fdb2e6c52cd0f384b8ae18b13d (diff)
[contrib] validate_failures.py: Add statistics for test results
It is useful to know the aggregate statistics for test results like number of stable fails, flaky tests, total number of tests, etc. Some of these statistics are reasonably easy to calculate with the current logic, while others would be quite difficult. This patch series adds 3 statistics: - fails: number of stable (non-flaky) failed tests; - flaky: number of flaky entries in the manifest; - total: total number of tests in the results including flaky passes and fails. This patch adds the new class ResultsStats and associated logic to handle "failed" statistic. Subsequent patches will add calculation of "flaky" and "total" statistics. Note, it is difficult to calculate the number of passed tests, since we can't easily distinguish flaky tests that happened to pass from the genuine passed tests. Similarly, we can't easily calculate number of flaky tests encountered in the current run (again, it's non-trivial to detect flaky tests that passed), so instead we report the number of flaky entries in the manifest. Change-Id: I6d26d177e32fca9a63796e69faa486fc6d018c05
-rwxr-xr-xcontrib/testsuite-management/validate_failures.py30
1 files changed, 29 insertions, 1 deletions
diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py
index d7d0c91..1d69376 100755
--- a/contrib/testsuite-management/validate_failures.py
+++ b/contrib/testsuite-management/validate_failures.py
@@ -252,7 +252,6 @@ class ResultSet(set):
outfile.write(_EXP_LINE_FORMAT % (current_tool, current_exp))
outfile.write('%s\n' % result)
- outfile.write(_SUMMARY_LINE_FORMAT % 'Results')
# Check if testsuite of expected_result is present in current results.
# This is used to compare partial test results against a full manifest.
@@ -466,6 +465,28 @@ def GetResults(sum_files, build_results = None):
build_results.update(ParseSummary(sum_fname))
return build_results
+class ResultsStats:
+ """Describes statistics of DejaGNU test results.
+
+ Attributes:
+ fails: Number of non-flaky failed tests in the results.
+ flaky: Number of flaky entries in the manifest.
+ total: Total number of tests in the results, including flaky passes and
+ fails.
+ """
+
+ def __init__(self):
+ self.fails = 0
+ self.flaky = 0
+ self.total = 0
+
+ def Print(self, outfile=sys.stdout):
+ outfile.write(_SUMMARY_LINE_FORMAT % 'Results')
+ outfile.write(f'\n')
+ outfile.write(f'# of stable fails\t\t{self.fails}\n')
+ outfile.write(f'# of flaky entries\t\t{self.flaky}\n')
+ outfile.write(f'# of all tests\t\t\t{self.total}\n')
+
def CompareResults(manifest, actual):
"""Compare sets of results and return two lists:
@@ -552,6 +573,9 @@ def GetSumFiles(results, build_dir):
def PerformComparison(expected, actual):
+ stats = ResultsStats()
+ stats.fails = len(actual)
+
actual_vs_expected, expected_vs_actual = CompareResults(expected, actual)
if _OPTIONS.inverse_match:
@@ -560,6 +584,7 @@ def PerformComparison(expected, actual):
# but PASSed in actual set.
actual_vs_expected, expected_vs_actual \
= expected_vs_actual, actual_vs_expected
+ stats = None
tests_ok = True
if len(actual_vs_expected) > 0:
@@ -569,6 +594,9 @@ def PerformComparison(expected, actual):
PrintSummary(actual_vs_expected)
tests_ok = False
+ if _OPTIONS.verbosity >= 1 and stats:
+ stats.Print()
+
if _OPTIONS.verbosity >= 2 and len(expected_vs_actual) > 0:
print('\n\nExpected results not present in this build (fixed tests)'
'\n\nNOTE: This is not a failure. It just means that these '