aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Trofimov <sergei.trofimov@arm.com>2015-06-03 16:15:44 +0100
committerSergei Trofimov <sergei.trofimov@arm.com>2015-06-03 16:20:48 +0100
commit02af02f0cbf98ea2d54660187e788ae13c49438a (patch)
tree2d537b84a829cd972e26c481014cf1430e54cc62
parent9971041e453fd6d27625449cdac1a9bfb6ced9f2 (diff)
Adding cpustates result processor (and script)
-rw-r--r--scripts/cpustates17
-rw-r--r--wlauto/result_processors/cpustate.py186
-rw-r--r--wlauto/utils/power.py636
-rw-r--r--wlauto/utils/trace_cmd.py2
4 files changed, 840 insertions, 1 deletions
diff --git a/scripts/cpustates b/scripts/cpustates
new file mode 100644
index 00000000..1f36961c
--- /dev/null
+++ b/scripts/cpustates
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# Copyright 2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from wlauto.utils.power import main
+main()
diff --git a/wlauto/result_processors/cpustate.py b/wlauto/result_processors/cpustate.py
new file mode 100644
index 00000000..303a6ede
--- /dev/null
+++ b/wlauto/result_processors/cpustate.py
@@ -0,0 +1,186 @@
+# Copyright 2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import csv
+from collections import OrderedDict
+
+from wlauto import ResultProcessor, Parameter
+from wlauto.exceptions import ConfigError
+from wlauto.instrumentation import instrument_is_installed
+from wlauto.utils.power import report_power_stats
+
+
+class CpuStatesProcessor(ResultProcessor):
+
+ name = 'cpustates'
+ description = '''
+ Process power ftrace to produce CPU state and parallelism stats.
+
+ Parses trace-cmd output to extract power events and uses those to generate
+ statistics about parallelism and frequency/idle core residency.
+
+ .. note:: trace-cmd instrument must be enabled and configured to collect
+ at least ``power:cpu_idle`` and ``power:cpu_frequency`` events.
+ Reporting should also be enabled (it is by default) as
+ ``cpustate`` parses the text version of the trace.
+
+ This generates two reports for the run:
+
+ *parallel.csv*
+
+ Shows what percentage of time was spent with N cores active (for N
+ from 0 to the total number of cores), for a cluster or for a system as
+ a whole. It contain the following columns:
+
+ :workload: The workload label
+ :iteration: iteration that was run
+ :cluster: The cluster for which statics are reported. The value of
+ ``"all"`` indicates that this row reports statistics for
+ the whole system.
+ :number_of_cores: number of cores active. ``0`` indicates the cluster
+ was idle.
+ :total_time: Total time spent in this state during workload execution
+ :%time: Percentage of total workload execution time spent in this state
+ :%running_time: Percentage of the time the cluster was active (i.e.
+ ignoring time the cluster was idling) spent in this
+ state.
+
+ *cpustate.csv*
+
+ Shows percentage of the time a core spent in a particular power state. The first
+ column names the state is followed by a column for each core. Power states include
+ available DVFS frequencies (for heterogeneous systems, this is the union of
+ frequencies supported by different core types) and idle states. Some shallow
+ states (e.g. ARM WFI) will consume different amount of power depending on the
+ current OPP. For such states, there will be an entry for each opp. ``"unknown"``
+ indicates the percentage of time for which a state could not be established from the
+ trace. This is usually due to core state being unknown at the beginning of the trace,
+ but may also be caused by dropped events in the middle of the trace.
+
+ '''
+
+ parameters = [
+ Parameter('first_cluster_state', kind=int, default=2,
+ description="""
+ The first idle state which is common to a cluster.
+ """),
+ Parameter('first_system_state', kind=int, default=3,
+ description="""
+ The first idle state which is common to all cores.
+ """),
+ Parameter('write_iteration_reports', kind=bool, default=False,
+ description="""
+ By default, this instrument will generate reports for the entire run
+ in the overall output directory. Enabling this option will, in addition,
+ create reports in each iteration's output directory. The formats of these
+ reports will be similar to the overall report, except they won't mention
+ the workload name or iteration number (as that is implied by their location).
+ """),
+ Parameter('user_ratios', kind=bool, default=False,
+ description="""
+ By default proportional values will be reported as percentages, if this
+ flag is enabled, they will be reported as ratios instead.
+ """),
+
+ ]
+
+ def validate(self):
+ if not instrument_is_installed('trace-cmd'):
+ message = '''
+ {} requires "trace-cmd" instrument to be installed and the collection of at
+ least "power:cpu_frequency" and "power:cpu_idle" events to be enabled during worklad
+ execution.
+ '''
+ raise ConfigError(message.format(self.name).strip())
+
+ def initialize(self, context):
+ # pylint: disable=attribute-defined-outside-init
+ device = context.device
+ if not device.has('cpuidle'):
+ raise ConfigError('Device does not appear to have cpuidle capability; is the right module installed?')
+ if not device.core_names:
+ message = '{} requires"core_names" and "core_clusters" to be specified for the device.'
+ raise ConfigError(message.format(self.name))
+ self.core_names = device.core_names
+ self.core_clusters = device.core_clusters
+ idle_states = {s.id: s.desc for s in device.get_cpuidle_states()}
+ self.idle_state_names = [idle_states[i] for i in sorted(idle_states.keys())]
+ self.num_idle_states = len(self.idle_state_names)
+ self.iteration_reports = OrderedDict()
+
+ def process_iteration_result(self, result, context):
+ trace = context.get_artifact('txttrace')
+ if not trace:
+ self.logger.debug('Text trace does not appear to have been generated; skipping this iteration.')
+ return
+ self.logger.debug('Generating power state reports from trace...')
+ parallel_report, powerstate_report = report_power_stats(
+ trace_file=trace.path,
+ idle_state_names=self.idle_state_names,
+ core_names=self.core_names,
+ core_clusters=self.core_clusters,
+ num_idle_states=self.num_idle_states,
+ first_cluster_state=self.first_cluster_state,
+ first_system_state=self.first_system_state,
+ use_ratios=self.use_ratios,
+ )
+ if parallel_report is None:
+ self.logger.warning('No power state reports generated; are power '
+ 'events enabled in the trace?')
+ return
+ else:
+ self.logger.debug('Reports generated.')
+
+ iteration_id = (context.spec.label, context.current_iteration)
+ self.iteration_reports[iteration_id] = (parallel_report, powerstate_report)
+ if self.write_iteration_reports:
+ self.logger.debug('Writing iteration reports')
+ parallel_report.write(os.path.join(context.output_directory, 'parallel.csv'))
+ powerstate_report.write(os.path.join(context.output_directory, 'cpustates.csv'))
+
+ def process_run_result(self, result, context): # pylint: disable=too-many-locals
+ if not self.iteration_reports:
+ self.logger.warning('No power state reports generated.')
+ return
+
+ parallel_rows = []
+ powerstate_rows = []
+ for iteration_id, reports in self.iteration_reports.iteritems():
+ workload, iteration = iteration_id
+ parallel_report, powerstate_report = reports
+ for record in parallel_report.values:
+ parallel_rows.append([workload, iteration] + record)
+ for state in sorted(powerstate_report.state_stats):
+ stats = powerstate_report.state_stats[state]
+ powerstate_rows.append([workload, iteration, state] +
+ ['{:.3f}'.format(s if s is not None else 0)
+ for s in stats])
+
+ with open(os.path.join(context.output_directory, 'parallel.csv'), 'w') as wfh:
+ writer = csv.writer(wfh)
+ writer.writerow(['workload', 'iteration', 'cluster',
+ 'number_of_cores', 'total_time',
+ '%time', '%running_time'])
+ writer.writerows(parallel_rows)
+
+ with open(os.path.join(context.output_directory, 'cpustate.csv'), 'w') as wfh:
+ writer = csv.writer(wfh)
+ headers = ['workload', 'iteration', 'state']
+ headers += ['{} CPU{}'.format(c, i)
+ for i, c in enumerate(powerstate_report.core_names)]
+ writer.writerow(headers)
+ writer.writerows(powerstate_rows)
+
diff --git a/wlauto/utils/power.py b/wlauto/utils/power.py
new file mode 100644
index 00000000..cfee9617
--- /dev/null
+++ b/wlauto/utils/power.py
@@ -0,0 +1,636 @@
+# Copyright 2015 ARM Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import division
+import os
+import sys
+import csv
+import logging
+from ctypes import c_int32
+from collections import defaultdict
+import argparse
+
+from wlauto.utils.trace_cmd import TraceCmdTrace
+
+
+logger = logging.getLogger('power')
+
+
+class CorePowerTransitionEvent(object):
+
+ kind = 'transition'
+ __slots__ = ['timestamp', 'cpu_id', 'frequency', 'idle_state']
+
+ def __init__(self, timestamp, cpu_id, frequency=None, idle_state=None):
+ if (frequency is None) == (idle_state is None):
+ raise ValueError('Power transition must specify a frequency or an idle_state, but not both.')
+ self.timestamp = timestamp
+ self.cpu_id = cpu_id
+ self.frequency = frequency
+ self.idle_state = idle_state
+
+ def __str__(self):
+ return 'cpu {} @ {} -> freq: {} idle: {}'.format(self.cpu_id, self.timestamp,
+ self.frequency, self.idle_state)
+
+ def __repr__(self):
+ return 'CPTE(c:{} t:{} f:{} i:{})'.format(self.cpu_id, self.timestamp,
+ self.frequency, self.idle_state)
+
+
+class CorePowerDroppedEvents(object):
+
+ kind = 'dropped_events'
+ __slots__ = ['cpu_id']
+
+ def __init__(self, cpu_id):
+ self.cpu_id = cpu_id
+
+ def __str__(self):
+ return 'DROPPED EVENTS on CPU{}'.format(self.cpu_id)
+
+ __repr__ = __str__
+
+
+class CpuPowerState(object):
+
+ __slots__ = ['frequency', 'idle_state']
+
+ @property
+ def is_idling(self):
+ return self.idle_state is not None and self.idle_state >= 0
+
+ @property
+ def is_active(self):
+ return self.idle_state == -1
+
+ def __init__(self, frequency=None, idle_state=None):
+ self.frequency = frequency
+ self.idle_state = idle_state
+
+ def __str__(self):
+ return 'CP(f:{} i:{})'.format(self.frequency, self.idle_state)
+
+ __repr__ = __str__
+
+
+class SystemPowerState(object):
+
+ __slots__ = ['timestamp', 'cpus']
+
+ @property
+ def num_cores(self):
+ return len(self.cpus)
+
+ def __init__(self, num_cores):
+ self.timestamp = None
+ self.cpus = []
+ for _ in xrange(num_cores):
+ self.cpus.append(CpuPowerState())
+
+ def copy(self):
+ new = SystemPowerState(self.num_cores)
+ new.timestamp = self.timestamp
+ for i, c in enumerate(self.cpus):
+ new.cpus[i].frequency = c.frequency
+ new.cpus[i].idle_state = c.idle_state
+ return new
+
+ def __str__(self):
+ return 'SP(t:{} Cs:{})'.format(self.timestamp, self.cpus)
+
+ __repr__ = __str__
+
+
+class PowerStateProcessor(object):
+ """
+ This takes a stream of power transition events and yields a timeline stream
+ of system power states.
+
+ """
+
+ @property
+ def cpu_states(self):
+ return self.power_state.cpus
+
+ @property
+ def current_time(self):
+ return self.power_state.timestamp
+
+ @current_time.setter
+ def current_time(self, value):
+ self.power_state.timestamp = value
+
+ def __init__(self, core_clusters, num_idle_states,
+ first_cluster_state=sys.maxint, first_system_state=sys.maxint):
+ self.power_state = SystemPowerState(len(core_clusters))
+ self.requested_states = defaultdict(lambda: -1) # cpu_id -> requeseted state
+
+ idle_state_domains = build_idle_domains(core_clusters,
+ num_states=num_idle_states,
+ first_cluster_state=first_cluster_state,
+ first_system_state=first_system_state)
+ # This tells us what other cpus we need to update when we see an idle
+ # state transition event
+ self.idle_related_cpus = defaultdict(list) # (cpu, idle_state) --> relate_cpus_list
+ for state_id, idle_state_domain in enumerate(idle_state_domains):
+ for cpu_group in idle_state_domain:
+ for cpu in cpu_group:
+ related = set(cpu_group) - set([cpu])
+ self.idle_related_cpus[(cpu, state_id)] = related
+
+ def process(self, event_stream):
+ for event in event_stream:
+ yield self.update_power_state(event)
+
+ def update_power_state(self, event):
+ """
+ Update the tracked power state based on the specified event and
+ return updated power state.
+
+ """
+ if event.kind == 'transition':
+ self._process_transition(event)
+ elif event.kind == 'dropped_events':
+ self._process_dropped_events(event)
+ else:
+ raise ValueError('Unexpected event type: {}'.format(event.kind))
+ return self.power_state.copy()
+
+ def _process_transition(self, event):
+ self.current_time = event.timestamp
+ if event.idle_state is None:
+ self.cpu_states[event.cpu_id].frequency = event.frequency
+ else:
+ if event.idle_state == -1:
+ self._process_idle_exit(event)
+ else:
+ self._process_idle_entry(event)
+
+ def _process_dropped_events(self, event):
+ self.cpu_states[event.cpu_id].frequency = None
+ old_idle_state = self.cpu_states[event.cpu_id].idle_state
+ self.cpu_states[event.cpu_id].idle_state = None
+
+ related_ids = self.idle_related_cpus[(event.cpu_id, old_idle_state)]
+ for rid in related_ids:
+ self.cpu_states[rid].idle_state = None
+
+ def _process_idle_entry(self, event):
+ if self.cpu_states[event.cpu_id].is_idling:
+ raise ValueError('Got idle state entry event for an idling core: {}'.format(event))
+ self._try_transition_to_idle_state(event.cpu_id, event.idle_state)
+
+ def _process_idle_exit(self, event):
+ if self.cpu_states[event.cpu_id].is_active:
+ raise ValueError('Got idle state exit event for an active core: {}'.format(event))
+ self.requested_states.pop(event.cpu_id, None) # remove outstanding request if there is one
+ old_state = self.cpu_states[event.cpu_id].idle_state
+ self.cpu_states[event.cpu_id].idle_state = -1
+
+ related_ids = self.idle_related_cpus[(event.cpu_id, old_state)]
+ if old_state is not None:
+ new_state = old_state - 1
+ for rid in related_ids:
+ if self.cpu_states[rid].idle_state > new_state:
+ self._try_transition_to_idle_state(rid, new_state)
+
+ def _try_transition_to_idle_state(self, cpu_id, idle_state):
+ related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
+ idle_state = idle_state
+
+ # Tristate: True - can transition, False - can't transition,
+ # None - unknown idle state on at least one related cpu
+ transition_check = self._can_enter_state(related_ids, idle_state)
+
+ if not transition_check:
+ # If we can't enter an idle state right now, record that we've
+ # requested it, so that we may enter it later (once all related
+ # cpus also want a state at least as deep).
+ self.requested_states[cpu_id] = idle_state
+
+ if transition_check is None:
+ # Unknown state on a related cpu means we're not sure whether we're
+ # entering requested state or a shallower one
+ self.cpu_states[cpu_id].idle_state = None
+ return
+
+ # Keep trying shallower states until all related
+ while not self._can_enter_state(related_ids, idle_state):
+ idle_state -= 1
+ related_ids = self.idle_related_cpus[(cpu_id, idle_state)]
+
+ self.cpu_states[cpu_id].idle_state = idle_state
+ for rid in related_ids:
+ self.cpu_states[rid].idle_state = idle_state
+ if self.requested_states[rid] == idle_state:
+ del self.requested_states[rid] # request satisfied, so remove
+
+ def _can_enter_state(self, related_ids, state):
+ """
+ This is a tri-state check. Returns ``True`` if related cpu states allow transition
+ into this state, ``False`` if related cpu states don't allow transition into this
+ state, and ``None`` if at least one of the related cpus is in an unknown state
+ (so the decision of whether a transition is possible cannot be made).
+
+ """
+ for rid in related_ids:
+ rid_requested_state = self.requested_states[rid]
+ rid_current_state = self.cpu_states[rid].idle_state
+ if rid_current_state is None:
+ return None
+ if rid_current_state < state and rid_requested_state < state:
+ return False
+ return True
+
+
+def stream_cpu_power_transitions(events):
+ for event in events:
+ if event.name == 'cpu_idle':
+ state = c_int32(event.state).value
+ yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, idle_state=state)
+ elif event.name == 'cpu_frequency':
+ yield CorePowerTransitionEvent(event.timestamp, event.cpu_id, frequency=event.state)
+ elif event.name == 'DROPPED EVENTS DETECTED':
+ yield CorePowerDroppedEvents(event.cpu_id)
+
+
+def gather_core_states(system_state_stream, freq_dependent_idle_states=None): # NOQA
+ if freq_dependent_idle_states is None:
+ freq_dependent_idle_states = [0]
+ for system_state in system_state_stream:
+ core_states = []
+ for cpu in system_state.cpus:
+ if cpu.idle_state == -1:
+ core_states.append((-1, cpu.frequency))
+ elif cpu.idle_state in freq_dependent_idle_states:
+ if cpu.frequency is not None:
+ core_states.append((cpu.idle_state, cpu.frequency))
+ else:
+ core_states.append((None, None))
+ else:
+ core_states.append((cpu.idle_state, None))
+ yield (system_state.timestamp, core_states)
+
+
+class ParallelStats(object):
+
+ def __init__(self, core_clusters, use_ratios=False):
+ self.clusters = defaultdict(set)
+ self.use_ratios = use_ratios
+ for i, clust in enumerate(core_clusters):
+ self.clusters[clust].add(i)
+ self.clusters['all'] = set(range(len(core_clusters)))
+
+ self.first_timestamp = None
+ self.last_timestamp = None
+ self.previous_states = None
+ self.parallel_times = defaultdict(lambda: defaultdict(int))
+ self.running_times = defaultdict(int)
+
+ def update(self, timestamp, core_states):
+ if self.last_timestamp is not None:
+ delta = timestamp - self.last_timestamp
+ active_cores = [i for i, c in enumerate(self.previous_states)
+ if c and c[0] == -1]
+ for cluster, cluster_cores in self.clusters.iteritems():
+ clust_active_cores = len(cluster_cores.intersection(active_cores))
+ self.parallel_times[cluster][clust_active_cores] += delta
+ if clust_active_cores:
+ self.running_times[cluster] += delta
+ else: # initial update
+ self.first_timestamp = timestamp
+
+ self.last_timestamp = timestamp
+ self.previous_states = core_states
+
+ def report(self): # NOQA
+ if self.last_timestamp is None:
+ return None
+
+ report = ParallelReport()
+ total_time = self.last_timestamp - self.first_timestamp
+ for cluster in sorted(self.parallel_times):
+ running_time = self.running_times[cluster]
+ for n in xrange(len(self.clusters[cluster]) + 1):
+ time = self.parallel_times[cluster][n]
+ time_pc = time / total_time
+ if not self.use_ratios:
+ time_pc *= 100
+ if n:
+ running_time_pc = time / running_time
+ if not self.use_ratios:
+ running_time_pc *= 100
+ else:
+ running_time_pc = 0
+ precision = self.use_ratios and 3 or 1
+ fmt = '{{:.{}f}}'.format(precision)
+ report.add([cluster, n,
+ fmt.format(time),
+ fmt.format(time_pc),
+ fmt.format(running_time_pc),
+ ])
+ return report
+
+
+class ParallelReport(object):
+
+ def __init__(self):
+ self.values = []
+
+ def add(self, value):
+ self.values.append(value)
+
+ def write(self, filepath):
+ with open(filepath, 'w') as wfh:
+ writer = csv.writer(wfh)
+ writer.writerow(['cluster', 'number_of_cores', 'total_time', '%time', '%running_time'])
+ writer.writerows(self.values)
+
+
+class PowerStateStats(object):
+
+ def __init__(self, core_names, idle_state_names=None, use_ratios=False):
+ self.core_names = core_names
+ self.idle_state_names = idle_state_names
+ self.use_ratios = use_ratios
+ self.first_timestamp = None
+ self.last_timestamp = None
+ self.previous_states = None
+ self.cpu_states = defaultdict(lambda: defaultdict(int))
+
+ def update(self, timestamp, core_states): # NOQA
+ if self.last_timestamp is not None:
+ delta = timestamp - self.last_timestamp
+ for cpu, (idle, freq) in enumerate(self.previous_states):
+ if idle == -1 and freq is not None:
+ state = '{:07}KHz'.format(freq)
+ elif freq:
+ if self.idle_state_names:
+ state = '{}-{:07}KHz'.format(self.idle_state_names[idle], freq)
+ else:
+ state = 'idle{}-{:07}KHz'.format(idle, freq)
+ elif idle not in (None, -1):
+ if self.idle_state_names:
+ state = self.idle_state_names[idle]
+ else:
+ state = 'idle{}'.format(idle)
+ else:
+ state = 'unkown'
+ self.cpu_states[cpu][state] += delta
+ else: # initial update
+ self.first_timestamp = timestamp
+
+ self.last_timestamp = timestamp
+ self.previous_states = core_states
+
+ def report(self):
+ if self.last_timestamp is None:
+ return None
+ total_time = self.last_timestamp - self.first_timestamp
+ state_stats = defaultdict(lambda: [None] * len(self.core_names))
+
+ for cpu, states in self.cpu_states.iteritems():
+ for state in states:
+ time = states[state]
+ time_pc = time / total_time
+ if not self.use_ratios:
+ time_pc *= 100
+ state_stats[state][cpu] = time_pc
+
+ precision = self.use_ratios and 3 or 1
+ return PowerStateStatsReport(state_stats, self.core_names, precision)
+
+
+class PowerStateStatsReport(object):
+
+ def __init__(self, state_stats, core_names, precision=2):
+ self.state_stats = state_stats
+ self.core_names = core_names
+ self.precision = precision
+
+ def write(self, filepath):
+ with open(filepath, 'w') as wfh:
+ writer = csv.writer(wfh)
+ headers = ['state'] + ['{} CPU{}'.format(c, i)
+ for i, c in enumerate(self.core_names)]
+ writer.writerow(headers)
+ for state in sorted(self.state_stats):
+ stats = self.state_stats[state]
+ fmt = '{{:.{}f}}'.format(self.precision)
+ writer.writerow([state] + [fmt.format(s if s is not None else 0)
+ for s in stats])
+
+
+def build_idle_domains(core_clusters, # NOQA
+ num_states,
+ first_cluster_state=None,
+ first_system_state=None):
+ """
+ Returns a list of idle domain groups (one for each idle state). Each group is a
+ list of domains, and a domain is a list of cpu ids for which that idle state is
+ common. E.g.
+
+ [[[0], [1], [2]], [[0, 1], [2]], [[0, 1, 2]]]
+
+ This defines three idle states for a machine with three cores. The first idle state
+ has three domains with one core in each domain; the second state has two domains,
+ with cores 0 and 1 sharing one domain; the final state has only one domain shared
+ by all cores.
+
+ This mapping created based on the assumptions
+
+ - The device is an SMP or a big.LITTLE-like system with cores in one or
+ more clusters (for SMP systems, all cores are considered to be in a "cluster").
+ - Idle domain correspend to either individual cores, individual custers, or
+ the compute subsystem as a whole.
+ - Cluster states are always deeper (higher index) than core states, and
+ system states are always deeper than cluster states.
+
+ parameters:
+
+ :core_clusters: a list indicating cluster "ID" of the corresponing core, e.g.
+ ``[0, 0, 1]`` represents a three-core machines with cores 0
+ and 1 on cluster 0, and core 2 on cluster 1.
+ :num_states: total number of idle states on a device.
+ :first_cluster_state: the ID of the first idle state shared by all cores in a
+ cluster
+ :first_system_state: the ID of the first idle state shared by all cores.
+
+ """
+ if first_cluster_state is None:
+ first_cluster_state = sys.maxint
+ if first_system_state is None:
+ first_system_state = sys.maxint
+ all_cpus = range(len(core_clusters))
+ cluster_cpus = defaultdict(list)
+ for cpu, cluster in enumerate(core_clusters):
+ cluster_cpus[cluster].append(cpu)
+ cluster_domains = [cluster_cpus[c] for c in sorted(cluster_cpus)]
+ core_domains = [[c] for c in all_cpus]
+
+ idle_state_domains = []
+ for state_id in xrange(num_states):
+ if state_id >= first_system_state:
+ idle_state_domains.append([all_cpus])
+ elif state_id >= first_cluster_state:
+ idle_state_domains.append(cluster_domains)
+ else:
+ idle_state_domains.append(core_domains)
+
+ return idle_state_domains
+
+
+def report_power_stats(trace_file, idle_state_names, core_names, core_clusters,
+ num_idle_states, first_cluster_state=sys.maxint,
+ first_system_state=sys.maxint, use_ratios=False):
+ # pylint: disable=too-many-locals
+ trace = TraceCmdTrace()
+ ps_processor = PowerStateProcessor(core_clusters,
+ num_idle_states=num_idle_states,
+ first_cluster_state=first_cluster_state,
+ first_system_state=first_system_state)
+ parallel_stats = ParallelStats(core_clusters, use_ratios)
+ power_state_stats = PowerStateStats(core_names, idle_state_names, use_ratios)
+
+ event_stream = trace.parse(trace_file, names=['cpu_idle', 'cpu_frequency'])
+ transition_stream = stream_cpu_power_transitions(event_stream)
+ power_state_stream = ps_processor.process(transition_stream)
+ core_state_stream = gather_core_states(power_state_stream)
+
+ for timestamp, states in core_state_stream:
+ parallel_stats.update(timestamp, states)
+ power_state_stats.update(timestamp, states)
+
+ parallel_report = parallel_stats.report()
+ ps_report = power_state_stats.report()
+
+ return (parallel_report, ps_report)
+
+
+def main():
+ args = parse_arguments()
+
+ parallel_report, powerstate_report = report_power_stats(
+ trace_file=args.infile,
+ idle_state_names=args.idle_state_names,
+ core_names=args.core_names,
+ core_clusters=args.core_clusters,
+ num_idle_states=args.num_idle_states,
+ first_cluster_state=args.first_cluster_state,
+ first_system_state=args.first_system_state,
+ use_ratios=args.ratios,
+ )
+ parallel_report.write(os.path.join(args.output_directory, 'parallel.csv'))
+ powerstate_report.write(os.path.join(args.output_directory, 'cpustate.csv'))
+
+
+class SplitListAction(argparse.Action):
+
+ def __init__(self, option_strings, dest, nargs=None, **kwargs):
+ if nargs is not None:
+ raise ValueError('nargs not allowed')
+ super(SplitListAction, self).__init__(option_strings, dest, **kwargs)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, [v.strip() for v in values.split(',')])
+
+
+def parse_arguments(): # NOQA
+ parser = argparse.ArgumentParser(description="""
+ Produce CPU power activity statistics reports from
+ power trace.
+ """)
+ parser.add_argument('infile', metavar='TRACEFILE', help='''
+ Path to the trace file to parse. This must be in the format generated
+ by "trace-cmd report" command.
+ ''')
+ parser.add_argument('-d', '--output-directory', default='.',
+ help='''
+ Output directory where reports will be placed.
+ ''')
+ parser.add_argument('-c', '--core-names', action=SplitListAction,
+ help='''
+ Comma-separated list of core names for the device on which the trace
+ was collected.
+ ''')
+ parser.add_argument('-C', '--core-clusters', action=SplitListAction, default=[],
+ help='''
+ Comma-separated list of core cluster IDs for the device on which the
+ trace was collected. If not specified, this will be generated from
+ core names on the assumption that all cores with the same name are on the
+ same cluster.
+ ''')
+ parser.add_argument('-i', '--idle-state-names', type=SplitListAction,
+ help='''
+ Comma-separated list of idle state names. The number of names must match
+ --num-idle-states if that was explicitly specified.
+ ''')
+ parser.add_argument('-n', '--num-idle-states', type=int,
+ help='''
+ number of idle states on the device
+ ''')
+ parser.add_argument('-q', '--first-cluster-state', type=int,
+ help='''
+ ID of the first cluster state. Must be < --num-idle-states.
+ ''')
+ parser.add_argument('-s', '--first-system-state', type=int,
+ help='''
+ ID of the first system state. Must be < --numb-idle-states, and
+ > --first-cluster-state.
+ ''')
+ parser.add_argument('-R', '--ratios', action='store_true',
+ help='''
+ By default proportional values will be reported as percentages, if this
+ flag is enabled, they will be reported as ratios instead.
+ ''')
+
+ args = parser.parse_args()
+
+ if not args.core_names:
+ raise ValueError('core names must be specified using -c or --core-names')
+ if not args.core_clusters:
+ logger.debug('core clusters not specified, inferring from core names')
+ core_cluster_map = {}
+ core_clusters = []
+ current_cluster = 0
+ for cn in args.core_names:
+ if cn not in core_cluster_map:
+ core_cluster_map[cn] = current_cluster
+ current_cluster += 1
+ core_clusters.append(core_cluster_map[cn])
+ args.core_clusters = core_clusters
+ if not args.num_idle_states and args.idle_state_names:
+ args.num_idle_states = len(args.idle_state_names)
+ elif args.num_idle_states and not args.idle_state_names:
+ args.idle_state_names = ['idle{}'.format(i) for i in xrange(args.num_idle_states)]
+ elif args.num_idle_states and args.idle_state_names:
+ if len(args.idle_state_names) != args.num_idle_states:
+ raise ValueError('Number of idle state names does not match --num-idle-states')
+ else:
+ raise ValueError('Either --num-idle-states or --idle-state-names must be specified')
+
+ if not args.first_cluster_state and len(set(args.core_clusters)) > 1:
+ if args.first_system_state:
+ logger.debug('First cluster idle state not specified; state previous to first system state')
+ args.first_cluster_state = args.first_system_state - 1
+ else:
+ logger.debug('First cluster idle state not specified; assuming last available state')
+ args.first_cluster_state = args.num_idle_states - 1
+
+ return args
+
+if __name__ == '__main__':
+ main()
diff --git a/wlauto/utils/trace_cmd.py b/wlauto/utils/trace_cmd.py
index fc6d63aa..900251b2 100644
--- a/wlauto/utils/trace_cmd.py
+++ b/wlauto/utils/trace_cmd.py
@@ -31,7 +31,7 @@ TRACE_MARKER_STOP = 'TRACE_MARKER_STOP'
class TraceCmdEvent(object):
"""
- A single trace-cmd event. This will appear in the trace cmd report in the format
+ A single trace-cmd event. This will appear in the trace cmd report in the format ::
<idle>-0 [000] 3284.126993: sched_rq_runnable_load: cpu=0 load=54
| | | | |___________|