diff options
author | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2024-04-01 12:30:19 +0000 |
---|---|---|
committer | Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org> | 2024-04-03 15:09:15 +0000 |
commit | eb482c708ec9ffe606f0add8e4a9a442897886ef (patch) | |
tree | 7a1143f91ee91959bbe64d4ef80d7ccdee8fb392 | |
parent | c001402337a0e44bccc146b8aef4c8d24c64f10e (diff) |
[contrib] validate_failures.py: Add "flaky" statistic.
Calculate number of flaky entries in the manifest, and
remove flaky tests from the set of failed tests, thus
excluding flaky tests from the "failed" count.
Change-Id: Id3ee1ce5ef60d570cbc0a534744d5197656b3442
-rwxr-xr-x | contrib/testsuite-management/validate_failures.py | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py index 1d69376..9b8d0f5 100755 --- a/contrib/testsuite-management/validate_failures.py +++ b/contrib/testsuite-management/validate_failures.py @@ -252,7 +252,6 @@ class ResultSet(set): outfile.write(_EXP_LINE_FORMAT % (current_tool, current_exp)) outfile.write('%s\n' % result) - # Check if testsuite of expected_result is present in current results. # This is used to compare partial test results against a full manifest. def HasTestsuite(self, expected_result): @@ -506,9 +505,6 @@ def CompareResults(manifest, actual): # they are expected failures that are not failing anymore). manifest_vs_actual = ResultSet() for expected_result in manifest: - # Ignore tests marked flaky. - if 'flaky' in expected_result.attrs: - continue # We try to support comparing partial results vs full manifest # (e.g., manifest has failures for gcc, g++, gfortran, but we ran only # g++ testsuite). To achieve this we record encountered testsuites in @@ -571,9 +567,25 @@ def GetSumFiles(results, build_dir): sum_files = results.split() return sum_files +def DiscardFlaky(expected, actual): + flaky_list = [] + for expected_result in expected: + if 'flaky' in expected_result.attrs: + flaky_list.append(expected_result) + + for expected_result in flaky_list: + expected.remove(expected_result) + actual.discard(expected_result) + + return len(flaky_list) + def PerformComparison(expected, actual): stats = ResultsStats() + stats.total = actual.total + # We need to ignore flaky tests in comparison, so remove them now from + # both expected and actual sets. + stats.flaky = DiscardFlaky(expected, actual) stats.fails = len(actual) actual_vs_expected, expected_vs_actual = CompareResults(expected, actual) |