summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2024-04-01 12:30:19 +0000
committerMaxim Kuvyrkov <maxim.kuvyrkov@linaro.org>2024-04-03 15:09:15 +0000
commiteb482c708ec9ffe606f0add8e4a9a442897886ef (patch)
tree7a1143f91ee91959bbe64d4ef80d7ccdee8fb392
parentc001402337a0e44bccc146b8aef4c8d24c64f10e (diff)
[contrib] validate_failures.py: Add "flaky" statistic.
Calculate number of flaky entries in the manifest, and remove flaky tests from the set of failed tests, thus excluding flaky tests from the "failed" count. Change-Id: Id3ee1ce5ef60d570cbc0a534744d5197656b3442
-rwxr-xr-xcontrib/testsuite-management/validate_failures.py20
1 files changed, 16 insertions, 4 deletions
diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py
index 1d69376..9b8d0f5 100755
--- a/contrib/testsuite-management/validate_failures.py
+++ b/contrib/testsuite-management/validate_failures.py
@@ -252,7 +252,6 @@ class ResultSet(set):
outfile.write(_EXP_LINE_FORMAT % (current_tool, current_exp))
outfile.write('%s\n' % result)
-
# Check if testsuite of expected_result is present in current results.
# This is used to compare partial test results against a full manifest.
def HasTestsuite(self, expected_result):
@@ -506,9 +505,6 @@ def CompareResults(manifest, actual):
# they are expected failures that are not failing anymore).
manifest_vs_actual = ResultSet()
for expected_result in manifest:
- # Ignore tests marked flaky.
- if 'flaky' in expected_result.attrs:
- continue
# We try to support comparing partial results vs full manifest
# (e.g., manifest has failures for gcc, g++, gfortran, but we ran only
# g++ testsuite). To achieve this we record encountered testsuites in
@@ -571,9 +567,25 @@ def GetSumFiles(results, build_dir):
sum_files = results.split()
return sum_files
+def DiscardFlaky(expected, actual):
+ flaky_list = []
+ for expected_result in expected:
+ if 'flaky' in expected_result.attrs:
+ flaky_list.append(expected_result)
+
+ for expected_result in flaky_list:
+ expected.remove(expected_result)
+ actual.discard(expected_result)
+
+ return len(flaky_list)
+
def PerformComparison(expected, actual):
stats = ResultsStats()
+ stats.total = actual.total
+ # We need to ignore flaky tests in comparison, so remove them now from
+ # both expected and actual sets.
+ stats.flaky = DiscardFlaky(expected, actual)
stats.fails = len(actual)
actual_vs_expected, expected_vs_actual = CompareResults(expected, actual)