[contrib] validate_failures.py: Add statistics for test results

It is useful to know the aggregate statistics for test results like number of stable fails, flaky tests, total number of tests, etc. Some of these statistics are reasonably easy to calculate with the current logic, while others would be quite difficult. This patch series adds 3 statistics: - fails: number of stable (non-flaky) failed tests; - flaky: number of flaky entries in the manifest; - total: total number of tests in the results including flaky passes and fails. This patch adds the new class ResultsStats and associated logic to handle "failed" statistic. Subsequent patches will add calculation of "flaky" and "total" statistics. Note, it is difficult to calculate the number of passed tests, since we can't easily distinguish flaky tests that happened to pass from the genuine passed tests. Similarly, we can't easily calculate number of flaky tests encountered in the current run (again, it's non-trivial to detect flaky tests that passed), so instead we report the number of flaky entries in the manifest. Change-Id: I6d26d177e32fca9a63796e69faa486fc6d018c05
author: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> 2024-04-01 12:26:44 +0000
committer: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> 2024-04-03 15:09:15 +0000
commit: c001402337a0e44bccc146b8aef4c8d24c64f10e (patch)
tree: ca468048449350533e0a71ca6cd85d395f157a11
parent: c025364e2d0e83fdb2e6c52cd0f384b8ae18b13d (diff)
1 files changed, 29 insertions, 1 deletions
diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py
index d7d0c91..1d69376 100755
--- a/contrib/testsuite-management/validate_failures.py
+++ b/contrib/testsuite-management/validate_failures.py
@@ -252,7 +252,6 @@ class ResultSet(set):
         outfile.write(_EXP_LINE_FORMAT % (current_tool, current_exp))
       outfile.write('%s\n' % result)
 
-    outfile.write(_SUMMARY_LINE_FORMAT % 'Results')
 
   # Check if testsuite of expected_result is present in current results.
   # This is used to compare partial test results against a full manifest.
@@ -466,6 +465,28 @@ def GetResults(sum_files, build_results = None):
     build_results.update(ParseSummary(sum_fname))
   return build_results
 
+class ResultsStats:
+  """Describes statistics of DejaGNU test results.
+
+  Attributes:
+    fails: Number of non-flaky failed tests in the results.
+    flaky: Number of flaky entries in the manifest.
+    total: Total number of tests in the results, including flaky passes and
+           fails.
+  """
+
+  def __init__(self):
+    self.fails = 0
+    self.flaky = 0
+    self.total = 0
+
+  def Print(self, outfile=sys.stdout):
+    outfile.write(_SUMMARY_LINE_FORMAT % 'Results')
+    outfile.write(f'\n')
+    outfile.write(f'# of stable fails\t\t{self.fails}\n')
+    outfile.write(f'# of flaky entries\t\t{self.flaky}\n')
+    outfile.write(f'# of all tests\t\t\t{self.total}\n')
+
 
 def CompareResults(manifest, actual):
   """Compare sets of results and return two lists:
@@ -552,6 +573,9 @@ def GetSumFiles(results, build_dir):
 
 
 def PerformComparison(expected, actual):
+  stats = ResultsStats()
+  stats.fails = len(actual)
+
   actual_vs_expected, expected_vs_actual = CompareResults(expected, actual)
 
   if _OPTIONS.inverse_match:
@@ -560,6 +584,7 @@ def PerformComparison(expected, actual):
     # but PASSed in actual set.
     actual_vs_expected, expected_vs_actual \
       = expected_vs_actual, actual_vs_expected
+    stats = None
 
   tests_ok = True
   if len(actual_vs_expected) > 0:
@@ -569,6 +594,9 @@ def PerformComparison(expected, actual):
       PrintSummary(actual_vs_expected)
     tests_ok = False
 
+  if _OPTIONS.verbosity >= 1 and stats:
+    stats.Print()
+
   if _OPTIONS.verbosity >= 2 and len(expected_vs_actual) > 0:
     print('\n\nExpected results not present in this build (fixed tests)'
           '\n\nNOTE: This is not a failure.  It just means that these '
author	Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>	2024-04-01 12:26:44 +0000
committer	Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>	2024-04-03 15:09:15 +0000
commit	c001402337a0e44bccc146b8aef4c8d24c64f10e (patch)
tree	ca468048449350533e0a71ca6cd85d395f157a11
parent	c025364e2d0e83fdb2e6c52cd0f384b8ae18b13d (diff)