wa/output_processors/csvproc.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

#    Copyright 2018 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import sys

from devlib.utils.csvutil import csvwriter

from wa import OutputProcessor, Parameter
from wa.framework.exception import ConfigError
from wa.utils.types import list_of_strings


class CsvReportProcessor(OutputProcessor):

    name = 'csv'
    description = """
    Creates a ``results.csv`` in the output directory containing results for
    all iterations in CSV format, each line containing a single metric.

    """

    parameters = [
        Parameter('use_all_classifiers', kind=bool, default=False,
                  global_alias='use_all_classifiers',
                  description="""
                  If set to ``True``, this will add a column for every classifier
                  that features in at least one collected metric.

                  .. note:: This cannot be ``True`` if ``extra_columns`` is set.

                  """),
        Parameter('extra_columns', kind=list_of_strings,
                  description="""
                  List of classifiers to use as columns.

                   .. note:: This cannot be set if ``use_all_classifiers`` is
                             ``True``.

                  """),
    ]

    def validate(self):
        super(CsvReportProcessor, self).validate()
        if self.use_all_classifiers and self.extra_columns:
            msg = 'extra_columns cannot be specified when '\
                  'use_all_classifiers is True'
            raise ConfigError(msg)

    def initialize(self):
        self.outputs_so_far = []  # pylint: disable=attribute-defined-outside-init
        self.artifact_added = False

    def process_job_output(self, output, target_info, run_output):
        self.outputs_so_far.append(output)
        self._write_outputs(self.outputs_so_far, run_output)
        if not self.artifact_added:
            run_output.add_artifact('run_result_csv', 'results.csv', 'export')
            self.artifact_added = True

    def process_run_output(self, output, target_info):
        self.outputs_so_far.append(output)
        self._write_outputs(self.outputs_so_far, output)
        if not self.artifact_added:
            output.add_artifact('run_result_csv', 'results.csv', 'export')
            self.artifact_added = True

    def _write_outputs(self, outputs, output):
        if self.use_all_classifiers:
            classifiers = set([])
            for out in outputs:
                for metric in out.metrics:
                    classifiers.update(list(metric.classifiers.keys()))
            extra_columns = list(classifiers)
        elif self.extra_columns:
            extra_columns = self.extra_columns
        else:
            extra_columns = []

        outfile = output.get_path('results.csv')
        with csvwriter(outfile) as writer:
            writer.writerow(['id', 'workload', 'iteration', 'metric', ] +
                            extra_columns + ['value', 'units'])

            for o in outputs:
                if o.kind == 'job':
                    header = [o.id, o.label, o.iteration]
                elif o.kind == 'run':
                    # Should be a RunOutput. Run-level metrics aren't attached
                    # to any job so we leave 'id' and 'iteration' blank, and use
                    # the run name for the 'label' field.
                    header = [None, o.info.run_name, None]
                else:
                    raise RuntimeError(
                        'Output of kind "{}" unrecognised by csvproc'.format(o.kind))

                for metric in o.result.metrics:
                    row = (header + [metric.name] +
                           [str(metric.classifiers.get(c, ''))
                            for c in extra_columns] +
                           [str(metric.value), metric.units or ''])
                    writer.writerow(row)