diff options
Diffstat (limited to 'tools/gator/daemon/PerfGroup.cpp')
-rw-r--r-- | tools/gator/daemon/PerfGroup.cpp | 234 |
1 files changed, 174 insertions, 60 deletions
diff --git a/tools/gator/daemon/PerfGroup.cpp b/tools/gator/daemon/PerfGroup.cpp index 4fd960a9058c..cfc62e4cc77e 100644 --- a/tools/gator/daemon/PerfGroup.cpp +++ b/tools/gator/daemon/PerfGroup.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2013-2014. All rights reserved. + * Copyright (C) ARM Limited 2013-2015. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,17 +16,21 @@ #include <unistd.h> #include "Buffer.h" +#include "DynBuf.h" #include "Logging.h" #include "Monitor.h" #include "PerfBuffer.h" #include "SessionData.h" +static const int schedSwitchKey = getEventKey(); +static const int clockKey = getEventKey(); + #define DEFAULT_PEA_ARGS(pea, additionalSampleType) \ pea.size = sizeof(pea); \ /* Emit time, read_format below, group leader id, and raw tracepoint info */ \ pea.sample_type = (gSessionData->perf.getLegacySupport() \ - ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID \ - : PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_IDENTIFIER ) | additionalSampleType; \ + ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_ID \ + : PERF_SAMPLE_IDENTIFIER ) | PERF_SAMPLE_TIME | additionalSampleType; \ /* Emit emit value in group format */ \ pea.read_format = PERF_FORMAT_ID | PERF_FORMAT_GROUP; \ /* start out disabled */ \ @@ -49,11 +53,12 @@ static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t p return fd; } -PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb) { +PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb), mSchedSwitchId(-1) { memset(&mAttrs, 0, sizeof(mAttrs)); - memset(&mPerCpu, 0, sizeof(mPerCpu)); + memset(&mFlags, 0, sizeof(mFlags)); memset(&mKeys, -1, sizeof(mKeys)); memset(&mFds, -1, sizeof(mFds)); + memset(&mLeaders, -1, sizeof(mLeaders)); } PerfGroup::~PerfGroup() { @@ -64,7 +69,7 @@ PerfGroup::~PerfGroup() { } } -bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) { +int PerfGroup::doAdd(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) { int i; for (i = 0; i < ARRAY_LENGTH(mKeys); ++i) { if (mKeys[i] < 0) { @@ -73,8 +78,8 @@ bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key } if (i >= ARRAY_LENGTH(mKeys)) { - logg->logMessage("%s(%s:%i): Too many counters", __FUNCTION__, __FILE__, __LINE__); - return false; + logg->logMessage("Too many counters"); + return -1; } DEFAULT_PEA_ARGS(mAttrs[i], sampleType); @@ -82,121 +87,230 @@ bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key mAttrs[i].config = config; mAttrs[i].sample_period = sample; // always be on the CPU but only a group leader can be pinned - mAttrs[i].pinned = (i == 0 ? 1 : 0); + mAttrs[i].pinned = (flags & PERF_GROUP_LEADER ? 1 : 0); mAttrs[i].mmap = (flags & PERF_GROUP_MMAP ? 1 : 0); mAttrs[i].comm = (flags & PERF_GROUP_COMM ? 1 : 0); mAttrs[i].freq = (flags & PERF_GROUP_FREQ ? 1 : 0); mAttrs[i].task = (flags & PERF_GROUP_TASK ? 1 : 0); mAttrs[i].sample_id_all = (flags & PERF_GROUP_SAMPLE_ID_ALL ? 1 : 0); - mPerCpu[i] = (flags & PERF_GROUP_PER_CPU); + mFlags[i] = flags; mKeys[i] = key; - buffer->pea(currTime, &mAttrs[i], key); + buffer->marshalPea(currTime, &mAttrs[i], key); + + return i; +} + +/* Counters from different hardware PMUs need to be in different + * groups. Software counters can be in the same group as the CPU and + * should be marked as PERF_GROUP_CPU. The big and little clusters can + * be in the same group as only one or the other will be available on + * a given CPU. + */ +int PerfGroup::getEffectiveType(const int type, const int flags) { + const int effectiveType = flags & PERF_GROUP_CPU ? (int)PERF_TYPE_HARDWARE : type; + if (effectiveType >= ARRAY_LENGTH(mLeaders)) { + logg->logError("perf type is too large, please increase the size of PerfGroup::mLeaders"); + handleException(); + } + return effectiveType; +} + +bool PerfGroup::createCpuGroup(const uint64_t currTime, Buffer *const buffer) { + if (mSchedSwitchId < 0) { + DynBuf b; + mSchedSwitchId = PerfDriver::getTracepointId(SCHED_SWITCH, &b); + if (mSchedSwitchId < 0) { + logg->logMessage("Unable to read sched_switch id"); + return false; + } + } + + mLeaders[PERF_TYPE_HARDWARE] = doAdd(currTime, buffer, schedSwitchKey, PERF_TYPE_TRACEPOINT, mSchedSwitchId, 1, PERF_SAMPLE_READ | PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU | PERF_GROUP_LEADER | PERF_GROUP_CPU); + if (mLeaders[PERF_TYPE_HARDWARE] < 0) { + return false; + } + + if (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU) < 0) { + return false; + } return true; } +bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) { + const int effectiveType = getEffectiveType(type, flags); + + // Does a group exist for this already? + if (!(flags & PERF_GROUP_LEADER) && mLeaders[effectiveType] < 0) { + // Create it + if (effectiveType == PERF_TYPE_HARDWARE) { + if (!createCpuGroup(currTime, buffer)) { + return false; + } + } else { + // Non-CPU PMUs are sampled every 100ms for Sample Rate: None and EBS, otherwise they would never be sampled + const uint64_t timeout = gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS ? 1000000000UL / gSessionData->mSampleRate : 100000000UL; + // PERF_SAMPLE_TID | PERF_SAMPLE_IP aren't helpful on non-CPU or 'uncore' PMUs - which CPU is the right one to sample? But removing it causes problems, remove it later. + mLeaders[effectiveType] = doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, timeout, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_LEADER); + if (mLeaders[effectiveType] < 0) { + return false; + } + } + } + + if (!(flags & PERF_GROUP_LEADER) && effectiveType != PERF_TYPE_HARDWARE && (flags & PERF_GROUP_PER_CPU)) { + logg->logError("'uncore' counters are not permitted to be per-cpu"); + handleException(); + } + + return doAdd(currTime, buffer, key, type, config, sample, sampleType, flags) >= 0; +} + int PerfGroup::prepareCPU(const int cpu, Monitor *const monitor) { - logg->logMessage("%s(%s:%i): Onlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu); + logg->logMessage("Onlining cpu %i", cpu); for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { if (mKeys[i] < 0) { continue; } - if ((cpu != 0) && !mPerCpu[i]) { + if ((cpu != 0) && !(mFlags[i] & PERF_GROUP_PER_CPU)) { continue; } - const int offset = i * gSessionData->mCores; - if (mFds[cpu + offset] >= 0) { - logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__); + const int offset = i * gSessionData->mCores + cpu; + if (mFds[offset] >= 0) { + logg->logMessage("cpu already online or not correctly cleaned up"); return PG_FAILURE; } - logg->logMessage("%s(%s:%i): perf_event_open cpu: %i type: %lli config: %lli sample: %lli sample_type: 0x%llx pinned: %i mmap: %i comm: %i freq: %i task: %i sample_id_all: %i", __FUNCTION__, __FILE__, __LINE__, cpu, (long long)mAttrs[i].type, (long long)mAttrs[i].config, (long long)mAttrs[i].sample_period, (long long)mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all); - mFds[cpu + offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, i == 0 ? -1 : mFds[cpu], i == 0 ? 0 : PERF_FLAG_FD_OUTPUT); - if (mFds[cpu + offset] < 0) { - logg->logMessage("%s(%s:%i): failed %s", __FUNCTION__, __FILE__, __LINE__, strerror(errno)); + logg->logMessage("perf_event_open cpu: %i type: %i config: %lli sample: %lli sample_type: 0x%llx pinned: %lli mmap: %lli comm: %lli freq: %lli task: %lli sample_id_all: %lli", cpu, mAttrs[i].type, mAttrs[i].config, mAttrs[i].sample_period, mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all); + mFds[offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, mAttrs[i].pinned ? -1 : mFds[mLeaders[getEffectiveType(mAttrs[i].type, mFlags[i])] * gSessionData->mCores + cpu], mAttrs[i].pinned ? 0 : PERF_FLAG_FD_OUTPUT); + if (mFds[offset] < 0) { + logg->logMessage("failed %s", strerror(errno)); if (errno == ENODEV) { + // The core is offline return PG_CPU_OFFLINE; } +#ifndef USE_STRICTER_CHECK continue; +#else + if (errno == ENOENT) { + // This event doesn't apply to this CPU but should apply to a different one, ex bL + continue; + } + logg->logMessage("perf_event_open failed"); + return PG_FAILURE; +#endif } - if (!mPb->useFd(cpu, mFds[cpu + offset])) { - logg->logMessage("%s(%s:%i): PerfBuffer::useFd failed", __FUNCTION__, __FILE__, __LINE__); + if (!mPb->useFd(cpu, mFds[offset])) { + logg->logMessage("PerfBuffer::useFd failed"); return PG_FAILURE; } - if (!monitor->add(mFds[cpu + offset])) { - logg->logMessage("%s(%s:%i): Monitor::add failed", __FUNCTION__, __FILE__, __LINE__); - return PG_FAILURE; + if (!monitor->add(mFds[offset])) { + logg->logMessage("Monitor::add failed"); + return PG_FAILURE; } } return PG_SUCCESS; } -int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer) { - __u64 ids[ARRAY_LENGTH(mKeys)]; - int coreKeys[ARRAY_LENGTH(mKeys)]; - int idCount = 0; +static bool readAndSend(const uint64_t currTime, Buffer *const buffer, const int fd, const int keyCount, const int *const keys) { + char buf[1024]; + ssize_t bytes = read(fd, buf, sizeof(buf)); + if (bytes < 0) { + logg->logMessage("read failed"); + return false; + } + buffer->marshalKeysOld(currTime, keyCount, keys, bytes, buf); - for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { - const int fd = mFds[cpu + i * gSessionData->mCores]; - if (fd < 0) { - continue; - } + return true; +} - coreKeys[idCount] = mKeys[i]; - if (!gSessionData->perf.getLegacySupport() && ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 && - // Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works - ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); - return 0; - } - ++idCount; - } +int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool enable, Buffer *const buffer) { + bool addedEvents = false; if (!gSessionData->perf.getLegacySupport()) { - buffer->keys(currTime, idCount, ids, coreKeys); + int idCount = 0; + int coreKeys[ARRAY_LENGTH(mKeys)]; + __u64 ids[ARRAY_LENGTH(mKeys)]; + + for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + const int fd = mFds[cpu + i * gSessionData->mCores]; + if (fd < 0) { + continue; + } + + coreKeys[idCount] = mKeys[i]; + if (ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 && + // Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works + ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) { + logg->logMessage("ioctl failed"); + return 0; + } + ++idCount; + addedEvents = true; + } + + buffer->marshalKeys(currTime, idCount, ids, coreKeys); } else { - char buf[1024]; - ssize_t bytes = read(mFds[cpu], buf, sizeof(buf)); - if (bytes < 0) { - logg->logMessage("read failed"); - return 0; + int idCounts[ARRAY_LENGTH(mLeaders)] = { 0 }; + int coreKeys[ARRAY_LENGTH(mLeaders)][ARRAY_LENGTH(mKeys)]; + for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + const int fd = mFds[cpu + i * gSessionData->mCores]; + if (fd < 0) { + continue; + } + + const int effectiveType = getEffectiveType(mAttrs[i].type, mFlags[i]); + if (mAttrs[i].pinned && mLeaders[effectiveType] != i) { + if (!readAndSend(currTime, buffer, fd, 1, mKeys + i)) { + return 0; + } + } else { + coreKeys[effectiveType][idCounts[effectiveType]] = mKeys[i]; + ++idCounts[effectiveType]; + addedEvents = true; + } + } + + for (int i = 0; i < ARRAY_LENGTH(mLeaders); ++i) { + if (idCounts[i] > 0 && !readAndSend(currTime, buffer, mFds[mLeaders[i] * gSessionData->mCores + cpu], idCounts[i], coreKeys[i])) { + return 0; + } } - buffer->keysOld(currTime, idCount, coreKeys, bytes, buf); } - if (start) { + if (enable) { for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { int offset = i * gSessionData->mCores + cpu; if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_ENABLE, 0) < 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("ioctl failed"); return 0; } } } - if (idCount == 0) { - logg->logMessage("%s(%s:%i): no events came online", __FUNCTION__, __FILE__, __LINE__); + if (!addedEvents) { + logg->logMessage("no events came online"); } - return idCount; + return 1; } bool PerfGroup::offlineCPU(const int cpu) { - logg->logMessage("%s(%s:%i): Offlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu); + logg->logMessage("Offlining cpu %i", cpu); - for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + for (int i = ARRAY_LENGTH(mKeys) - 1; i >= 0; --i) { int offset = i * gSessionData->mCores + cpu; if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_DISABLE, 0) < 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("ioctl failed"); return false; } } @@ -204,7 +318,7 @@ bool PerfGroup::offlineCPU(const int cpu) { // Mark the buffer so that it will be released next time it's read mPb->discard(cpu); - for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) { + for (int i = ARRAY_LENGTH(mKeys) - 1; i >= 0; --i) { if (mKeys[i] < 0) { continue; } @@ -222,7 +336,7 @@ bool PerfGroup::offlineCPU(const int cpu) { bool PerfGroup::start() { for (int pos = 0; pos < ARRAY_LENGTH(mFds); ++pos) { if (mFds[pos] >= 0 && ioctl(mFds[pos], PERF_EVENT_IOC_ENABLE, 0) < 0) { - logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__); + logg->logMessage("ioctl failed"); goto fail; } } |