[contrib] validate_failures.py: Add "flaky" statistic.

Calculate number of flaky entries in the manifest, and remove flaky tests from the set of failed tests, thus excluding flaky tests from the "failed" count. Change-Id: Id3ee1ce5ef60d570cbc0a534744d5197656b3442
author: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> 2024-04-01 12:30:19 +0000
committer: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> 2024-04-03 15:09:15 +0000
commit: eb482c708ec9ffe606f0add8e4a9a442897886ef (patch)
tree: 7a1143f91ee91959bbe64d4ef80d7ccdee8fb392
parent: c001402337a0e44bccc146b8aef4c8d24c64f10e (diff)
1 files changed, 16 insertions, 4 deletions
diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py
index 1d69376..9b8d0f5 100755
--- a/contrib/testsuite-management/validate_failures.py
+++ b/contrib/testsuite-management/validate_failures.py
@@ -252,7 +252,6 @@ class ResultSet(set):
         outfile.write(_EXP_LINE_FORMAT % (current_tool, current_exp))
       outfile.write('%s\n' % result)
 
-
   # Check if testsuite of expected_result is present in current results.
   # This is used to compare partial test results against a full manifest.
   def HasTestsuite(self, expected_result):
@@ -506,9 +505,6 @@ def CompareResults(manifest, actual):
   # they are expected failures that are not failing anymore).
   manifest_vs_actual = ResultSet()
   for expected_result in manifest:
-    # Ignore tests marked flaky.
-    if 'flaky' in expected_result.attrs:
-      continue
     # We try to support comparing partial results vs full manifest
     # (e.g., manifest has failures for gcc, g++, gfortran, but we ran only
     # g++ testsuite).  To achieve this we record encountered testsuites in
@@ -571,9 +567,25 @@ def GetSumFiles(results, build_dir):
     sum_files = results.split()
   return sum_files
 
+def DiscardFlaky(expected, actual):
+  flaky_list = []
+  for expected_result in expected:
+    if 'flaky' in expected_result.attrs:
+      flaky_list.append(expected_result)
+
+  for expected_result in flaky_list:
+    expected.remove(expected_result)
+    actual.discard(expected_result)
+
+  return len(flaky_list)
+
 
 def PerformComparison(expected, actual):
   stats = ResultsStats()
+  stats.total = actual.total
+  # We need to ignore flaky tests in comparison, so remove them now from
+  # both expected and actual sets.
+  stats.flaky = DiscardFlaky(expected, actual)
   stats.fails = len(actual)
 
   actual_vs_expected, expected_vs_actual = CompareResults(expected, actual)
author	Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>	2024-04-01 12:30:19 +0000
committer	Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>	2024-04-03 15:09:15 +0000
commit	eb482c708ec9ffe606f0add8e4a9a442897886ef (patch)
tree	7a1143f91ee91959bbe64d4ef80d7ccdee8fb392
parent	c001402337a0e44bccc146b8aef4c8d24c64f10e (diff)