wlauto/workloads/spec2000/__init__.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350

#    Copyright 2014-2015 ARM Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


#pylint: disable=E1101,W0201
import os
import re
import string
import tarfile
from collections import defaultdict

from wlauto import Workload, Parameter, Alias
from wlauto.exceptions import ConfigError, WorkloadError
from wlauto.common.resources import ExtensionAsset
from wlauto.utils.misc import get_cpu_mask
from wlauto.utils.types import boolean, list_or_string


class Spec2000(Workload):

    name = 'spec2000'
    description = """
    SPEC2000 benchmarks measuring processor, memory and compiler.

    http://www.spec.org/cpu2000/

    From the web site:

    SPEC CPU2000 is the next-generation industry-standardized CPU-intensive benchmark suite. SPEC
    designed CPU2000 to provide a comparative measure of compute intensive performance across the
    widest practical range of hardware. The implementation resulted in source code benchmarks
    developed from real user applications. These benchmarks measure the performance of the
    processor, memory and compiler on the tested system.

    .. note:: At the moment, this workload relies on pre-built SPEC binaries (included in an
              asset bundle). These binaries *must* be built according to rules outlined here::

                  http://www.spec.org/cpu2000/docs/runrules.html#toc_2.0

              in order for the results to be valid SPEC2000 results.

    .. note:: This workload does not attempt to generate results in an admissible SPEC format. No
              metadata is provided (though some, but not all, of the required metdata is colleted
              by WA elsewhere). It is upto the user to post-process results to generated
              SPEC-admissible results file, if that is their intention.

    *base vs peak*

    SPEC2000 defines two build/test configuration: base and peak. Base is supposed to use basic
    configuration (e.g. default compiler flags) with no tuning, and peak is specifically optimized for
    a system. Since this workload uses externally-built binaries, there is no way for WA to be sure
    what configuration is used -- the user is expected to keep track of that. Be aware that
    base/peak also come with specfic requirements for the way workloads are run (e.g. how many instances
    on multi-core systems)::

        http://www.spec.org/cpu2000/docs/runrules.html#toc_3

    These are not enforced by WA, so it is again up to the user to ensure that correct workload
    parameters are specfied inthe agenda, if they intend to collect "official" SPEC results. (Those
    interested in collecting official SPEC results should also note that setting runtime parameters
    would violate SPEC runs rules that state that no configuration must be done to the platform
    after boot).

    *bundle structure*

    This workload expects the actual benchmark binaries to be provided in a tarball "bundle" that has
    a very specific structure. At the top level of the tarball, there should be two directories: "fp"
    and "int" -- for each of the SPEC2000 categories. Under those, there is a sub-directory per benchmark.
    Each benchmark sub-directory contains three sub-sub-directorie:

    - "cpus" contains a subdirector for each supported cpu (e.g. a15) with a single executable binary
      for that cpu, in addition to a "generic" subdirectory that has not been optimized for a specific
      cpu and should run on any ARM system.
    - "data" contains all additional files (input, configuration, etc) that  the benchmark executable
      relies on.
    - "scripts" contains one or more one-liner shell scripts that invoke the benchmark binary with
      appropriate command line parameters. The name of the script must be in the format
      <benchmark name>[.<variant name>].sh, i.e. name of benchmark, optionally followed by variant
      name, followed by ".sh" extension. If there is more than one script, then all of them must
      have  a variant; if there is only one script the it should not cotain a variant.

    A typical bundle may look like this::

        |- fp
        |  |-- ammp
        |  |   |-- cpus
        |  |   |   |-- generic
        |  |   |   |   |-- ammp
        |  |   |   |-- a15
        |  |   |   |   |-- ammp
        |  |   |   |-- a7
        |  |   |   |   |-- ammp
        |  |   |-- data
        |  |   |   |-- ammp.in
        |  |   |-- scripts
        |  |   |   |-- ammp.sh
        |  |-- applu
        .  .   .
        .  .   .
        .  .   .
        |- int
        .

    """

    # TODO: This is a bit of a hack. Need to re-think summary metric indication
    #      (also more than just summary/non-summary classification?)
    class _SPECSummaryMetrics(object):
        def __contains__(self, item):
            return item.endswith('_real')

    asset_file = 'spec2000-assets.tar.gz'

    aliases = [
        Alias('spec2k'),
    ]

    summary_metrics = _SPECSummaryMetrics()

    parameters = [
        Parameter('benchmarks', kind=list_or_string,
                  description='Specfiles the SPEC benchmarks to run.'),
        Parameter('mode', kind=str, allowed_values=['speed', 'rate'], default='speed',
                  description='SPEC benchmarks can report either speed to execute or throughput/rate. '
                              'In the latter case, several "threads" will be spawned.'),
        Parameter('number_of_threads', kind=int, default=None,
                  description='Specify the number of "threads" to be used in \'rate\' mode. (Note: '
                              'on big.LITTLE systems this is the number of threads, for *each cluster*). '),

        Parameter('force_extract_assets', kind=boolean, default=False,
                  description='if set to ``True``, will extract assets from the bundle, even if they are '
                              'already extracted. Note: this option implies ``force_push_assets``.'),
        Parameter('force_push_assets', kind=boolean, default=False,
                  description='If set to ``True``, assets will be pushed to device even if they\'re already '
                              'present.'),
        Parameter('timeout', kind=int, default=20 * 60,
                  description='Timemout, in seconds, for the execution of single spec test.'),
    ]

    speed_run_template = 'cd {datadir}; time ({launch_command})'
    rate_run_template = 'cd {datadir}; time ({loop}; wait)'
    loop_template = 'for i in $(busybox seq 1 {threads}); do {launch_command} 1>/dev/null 2>&1 & done'
    launch_template = 'busybox taskset {cpumask} {command} 1>/dev/null 2>&1'

    timing_regex = re.compile(r'(?P<minutes>\d+)m(?P<seconds>[\d.]+)s\s+(?P<category>\w+)')

    def init_resources(self, context):
        self._load_spec_benchmarks(context)

    def setup(self, context):
        cpus = self.device.core_names
        if not cpus:
            raise WorkloadError('Device has not specifed CPU cores configruation.')
        cpumap = defaultdict(list)
        for i, cpu in enumerate(cpus):
            cpumap[cpu.lower()].append(i)
        for benchspec in self.benchmarks:
            commandspecs = self._verify_and_deploy_benchmark(benchspec, cpumap)
            self._build_command(benchspec, commandspecs)

    def run(self, context):
        for name, command in self.commands:
            self.timings[name] = self.device.execute(command, timeout=self.timeout)

    def update_result(self, context):
        for benchmark, output in self.timings.iteritems():
            matches = self.timing_regex.finditer(output)
            found = False
            for match in matches:
                category = match.group('category')
                mins = float(match.group('minutes'))
                secs = float(match.group('seconds'))
                total = secs + 60 * mins
                context.result.add_metric('_'.join([benchmark, category]),
                                          total, 'seconds',
                                          lower_is_better=True)
                found = True
            if not found:
                self.logger.error('Could not get timings for {}'.format(benchmark))

    def validate(self):
        if self.force_extract_assets:
            self.force_push_assets = True
        if self.benchmarks is None:  # pylint: disable=access-member-before-definition
            self.benchmarks = ['all']
        for benchname in self.benchmarks:
            if benchname == 'all':
                self.benchmarks = self.loaded_benchmarks.keys()
                break
            if benchname not in self.loaded_benchmarks:
                raise ConfigError('Unknown SPEC benchmark: {}'.format(benchname))
        if self.mode == 'speed':
            if self.number_of_threads is not None:
                raise ConfigError('number_of_threads cannot be specified in speed mode.')
        else:
            raise ValueError('Unexpected SPEC2000 mode: {}'.format(self.mode))  # Should never get here
        self.commands = []
        self.timings = {}

    def _load_spec_benchmarks(self, context):
        self.loaded_benchmarks = {}
        self.categories = set()
        if self.force_extract_assets or len(os.listdir(self.dependencies_directory)) < 2:
            bundle = context.resolver.get(ExtensionAsset(self, self.asset_file))
            with tarfile.open(bundle, 'r:gz') as tf:
                tf.extractall(self.dependencies_directory)
        for entry in os.listdir(self.dependencies_directory):
            entrypath = os.path.join(self.dependencies_directory, entry)
            if os.path.isdir(entrypath):
                for bench in os.listdir(entrypath):
                    self.categories.add(entry)
                    benchpath = os.path.join(entrypath, bench)
                    self._load_benchmark(benchpath, entry)

    def _load_benchmark(self, path, category):
        datafiles = []
        cpus = []
        for df in os.listdir(os.path.join(path, 'data')):
            datafiles.append(os.path.join(path, 'data', df))
        for cpu in os.listdir(os.path.join(path, 'cpus')):
            cpus.append(cpu)
        commandsdir = os.path.join(path, 'commands')
        for command in os.listdir(commandsdir):
            bench = SpecBenchmark()
            bench.name = os.path.splitext(command)[0]
            bench.path = path
            bench.category = category
            bench.datafiles = datafiles
            bench.cpus = cpus
            with open(os.path.join(commandsdir, command)) as fh:
                bench.command_template = string.Template(fh.read().strip())
            self.loaded_benchmarks[bench.name] = bench

    def _verify_and_deploy_benchmark(self, benchspec, cpumap):  # pylint: disable=R0914
        """Verifies that the supplied benchmark spec is valid and deploys the required assets
        to the device (if necessary). Returns a list of command specs (one for each CPU cluster)
        that can then be used to construct the final command."""
        bench = self.loaded_benchmarks[benchspec]
        basename = benchspec.split('.')[0]
        datadir = self.device.path.join(self.device.working_directory, self.name, basename)
        if self.force_push_assets or not self.device.file_exists(datadir):
            self.device.execute('mkdir -p {}'.format(datadir))
            for datafile in bench.datafiles:
                self.device.push_file(datafile, self.device.path.join(datadir, os.path.basename(datafile)))

        if self.mode == 'speed':
            cpus = [self._get_fastest_cpu().lower()]
        else:
            cpus = cpumap.keys()

        cmdspecs = []
        for cpu in cpus:
            try:
                host_bin_file = bench.get_binary(cpu)
            except ValueError, e:
                try:
                    msg = e.message
                    msg += ' Attempting to use generic binary instead.'
                    self.logger.debug(msg)
                    host_bin_file = bench.get_binary('generic')
                    cpu = 'generic'
                except ValueError, e:
                    raise ConfigError(e.message)  # re-raising as user error
            binname = os.path.basename(host_bin_file)
            binary = self.device.install(host_bin_file, with_name='.'.join([binname, cpu]))
            commandspec = CommandSpec()
            commandspec.command = bench.command_template.substitute({'binary': binary})
            commandspec.datadir = datadir
            commandspec.cpumask = get_cpu_mask(cpumap[cpu])
            cmdspecs.append(commandspec)
        return cmdspecs

    def _build_command(self, name, commandspecs):
        if self.mode == 'speed':
            if len(commandspecs) != 1:
                raise AssertionError('Must be exactly one command spec specifed in speed mode.')
            spec = commandspecs[0]
            launch_command = self.launch_template.format(command=spec.command, cpumask=spec.cpumask)
            self.commands.append((name,
                                  self.speed_run_template.format(datadir=spec.datadir,
                                                                 launch_command=launch_command)))
        elif self.mode == 'rate':
            loops = []
            for spec in commandspecs:
                launch_command = self.launch_template.format(command=spec.command, cpumask=spec.cpumask)
                loops.append(self.loop_template.format(launch_command=launch_command, threads=spec.threads))
                self.commands.append((name,
                                      self.rate_run_template.format(datadir=spec.datadir,
                                                                    loop='; '.join(loops))))
        else:
            raise ValueError('Unexpected SPEC2000 mode: {}'.format(self.mode))  # Should never get here

    def _get_fastest_cpu(self):
        cpu_types = set(self.device.core_names)
        if len(cpu_types) == 1:
            return cpu_types.pop()
        fastest_cpu = None
        fastest_freq = 0
        for cpu_type in cpu_types:
            try:
                idx = self.device.get_core_online_cpu(cpu_type)
                freq = self.device.get_cpu_max_frequency(idx)
                if freq > fastest_freq:
                    fastest_freq = freq
                    fastest_cpu = cpu_type
            except ValueError:
                pass
        if not fastest_cpu:
            raise WorkloadError('No active CPUs found on device. Something is very wrong...')
        return fastest_cpu


class SpecBenchmark(object):

    def __init__(self):
        self.name = None
        self.path = None
        self.category = None
        self.command_template = None
        self.cpus = []
        self.datafiles = []

    def get_binary(self, cpu):
        if cpu not in self.cpus:
            raise ValueError('CPU {} is not supported by {}.'.format(cpu, self.name))
        binpath = os.path.join(self.path, 'cpus', cpu, self.name.split('.')[0])
        if not os.path.isfile(binpath):
            raise ValueError('CPU {} is not supported by {}.'.format(cpu, self.name))
        return binpath


class CommandSpec(object):

    def __init__(self):
        self.cpumask = None
        self.datadir = None
        self.command = None
        self.threads = None