1 files changed, 174 insertions, 60 deletions
diff --git a/tools/gator/daemon/PerfGroup.cpp b/tools/gator/daemon/PerfGroup.cpp
index 4fd960a9058c..cfc62e4cc77e 100644
--- a/tools/gator/daemon/PerfGroup.cpp
+++ b/tools/gator/daemon/PerfGroup.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) ARM Limited 2013-2014. All rights reserved.
+ * Copyright (C) ARM Limited 2013-2015. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -16,17 +16,21 @@
 #include <unistd.h>
 
 #include "Buffer.h"
+#include "DynBuf.h"
 #include "Logging.h"
 #include "Monitor.h"
 #include "PerfBuffer.h"
 #include "SessionData.h"
 
+static const int schedSwitchKey = getEventKey();
+static const int clockKey = getEventKey();
+
 #define DEFAULT_PEA_ARGS(pea, additionalSampleType) \
 	pea.size = sizeof(pea); \
 	/* Emit time, read_format below, group leader id, and raw tracepoint info */ \
 	pea.sample_type = (gSessionData->perf.getLegacySupport() \
-										 ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID \
-										 : PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_IDENTIFIER ) | additionalSampleType; \
+			   ? PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_ID \
+			   : PERF_SAMPLE_IDENTIFIER ) | PERF_SAMPLE_TIME | additionalSampleType; \
 	/* Emit emit value in group format */ \
 	pea.read_format = PERF_FORMAT_ID | PERF_FORMAT_GROUP; \
 	/* start out disabled */ \
@@ -49,11 +53,12 @@ static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t p
 	return fd;
 }
 
-PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb) {
+PerfGroup::PerfGroup(PerfBuffer *const pb) : mPb(pb), mSchedSwitchId(-1) {
 	memset(&mAttrs, 0, sizeof(mAttrs));
-	memset(&mPerCpu, 0, sizeof(mPerCpu));
+	memset(&mFlags, 0, sizeof(mFlags));
 	memset(&mKeys, -1, sizeof(mKeys));
 	memset(&mFds, -1, sizeof(mFds));
+	memset(&mLeaders, -1, sizeof(mLeaders));
 }
 
 PerfGroup::~PerfGroup() {
@@ -64,7 +69,7 @@ PerfGroup::~PerfGroup() {
 	}
 }
 
-bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) {
+int PerfGroup::doAdd(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) {
 	int i;
 	for (i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
 		if (mKeys[i] < 0) {
@@ -73,8 +78,8 @@ bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key
 	}
 
 	if (i >= ARRAY_LENGTH(mKeys)) {
-		logg->logMessage("%s(%s:%i): Too many counters", __FUNCTION__, __FILE__, __LINE__);
-		return false;
+		logg->logMessage("Too many counters");
+		return -1;
 	}
 
 	DEFAULT_PEA_ARGS(mAttrs[i], sampleType);
@@ -82,121 +87,230 @@ bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key
 	mAttrs[i].config = config;
 	mAttrs[i].sample_period = sample;
 	// always be on the CPU but only a group leader can be pinned
-	mAttrs[i].pinned = (i == 0 ? 1 : 0);
+	mAttrs[i].pinned = (flags & PERF_GROUP_LEADER ? 1 : 0);
 	mAttrs[i].mmap = (flags & PERF_GROUP_MMAP ? 1 : 0);
 	mAttrs[i].comm = (flags & PERF_GROUP_COMM ? 1 : 0);
 	mAttrs[i].freq = (flags & PERF_GROUP_FREQ ? 1 : 0);
 	mAttrs[i].task = (flags & PERF_GROUP_TASK ? 1 : 0);
 	mAttrs[i].sample_id_all = (flags & PERF_GROUP_SAMPLE_ID_ALL ? 1 : 0);
-	mPerCpu[i] = (flags & PERF_GROUP_PER_CPU);
+	mFlags[i] = flags;
 
 	mKeys[i] = key;
 
-	buffer->pea(currTime, &mAttrs[i], key);
+	buffer->marshalPea(currTime, &mAttrs[i], key);
+
+	return i;
+}
+
+/* Counters from different hardware PMUs need to be in different
+ * groups. Software counters can be in the same group as the CPU and
+ * should be marked as PERF_GROUP_CPU. The big and little clusters can
+ * be in the same group as only one or the other will be available on
+ * a given CPU.
+ */
+int PerfGroup::getEffectiveType(const int type, const int flags) {
+	const int effectiveType = flags & PERF_GROUP_CPU ? (int)PERF_TYPE_HARDWARE : type;
+	if (effectiveType >= ARRAY_LENGTH(mLeaders)) {
+		logg->logError("perf type is too large, please increase the size of PerfGroup::mLeaders");
+		handleException();
+	}
+	return effectiveType;
+}
+
+bool PerfGroup::createCpuGroup(const uint64_t currTime, Buffer *const buffer) {
+	if (mSchedSwitchId < 0) {
+		DynBuf b;
+		mSchedSwitchId = PerfDriver::getTracepointId(SCHED_SWITCH, &b);
+		if (mSchedSwitchId < 0) {
+			logg->logMessage("Unable to read sched_switch id");
+			return false;
+		}
+	}
+
+	mLeaders[PERF_TYPE_HARDWARE] = doAdd(currTime, buffer, schedSwitchKey, PERF_TYPE_TRACEPOINT, mSchedSwitchId, 1, PERF_SAMPLE_READ | PERF_SAMPLE_RAW, PERF_GROUP_MMAP | PERF_GROUP_COMM | PERF_GROUP_TASK | PERF_GROUP_SAMPLE_ID_ALL | PERF_GROUP_PER_CPU | PERF_GROUP_LEADER | PERF_GROUP_CPU);
+	if (mLeaders[PERF_TYPE_HARDWARE] < 0) {
+		return false;
+	}
+
+	if (gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS && doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, 1000000000UL / gSessionData->mSampleRate, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_PER_CPU | PERF_GROUP_CPU) < 0) {
+		return false;
+	}
 
 	return true;
 }
 
+bool PerfGroup::add(const uint64_t currTime, Buffer *const buffer, const int key, const __u32 type, const __u64 config, const __u64 sample, const __u64 sampleType, const int flags) {
+	const int effectiveType = getEffectiveType(type, flags);
+
+	// Does a group exist for this already?
+	if (!(flags & PERF_GROUP_LEADER) && mLeaders[effectiveType] < 0) {
+		// Create it
+		if (effectiveType == PERF_TYPE_HARDWARE) {
+			if (!createCpuGroup(currTime, buffer)) {
+				return false;
+			}
+		} else {
+			// Non-CPU PMUs are sampled every 100ms for Sample Rate: None and EBS, otherwise they would never be sampled
+			const uint64_t timeout = gSessionData->mSampleRate > 0 && !gSessionData->mIsEBS ? 1000000000UL / gSessionData->mSampleRate : 100000000UL;
+			// PERF_SAMPLE_TID | PERF_SAMPLE_IP aren't helpful on non-CPU or 'uncore' PMUs - which CPU is the right one to sample? But removing it causes problems, remove it later.
+			mLeaders[effectiveType] = doAdd(currTime, buffer, clockKey, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, timeout, PERF_SAMPLE_TID | PERF_SAMPLE_IP | PERF_SAMPLE_READ, PERF_GROUP_LEADER);
+			if (mLeaders[effectiveType] < 0) {
+				return false;
+			}
+		}
+	}
+
+	if (!(flags & PERF_GROUP_LEADER) && effectiveType != PERF_TYPE_HARDWARE && (flags & PERF_GROUP_PER_CPU)) {
+		logg->logError("'uncore' counters are not permitted to be per-cpu");
+		handleException();
+	}
+
+	return doAdd(currTime, buffer, key, type, config, sample, sampleType, flags) >= 0;
+}
+
 int PerfGroup::prepareCPU(const int cpu, Monitor *const monitor) {
-	logg->logMessage("%s(%s:%i): Onlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu);
+	logg->logMessage("Onlining cpu %i", cpu);
 
 	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
 		if (mKeys[i] < 0) {
 			continue;
 		}
 
-		if ((cpu != 0) && !mPerCpu[i]) {
+		if ((cpu != 0) && !(mFlags[i] & PERF_GROUP_PER_CPU)) {
 			continue;
 		}
 
-		const int offset = i * gSessionData->mCores;
-		if (mFds[cpu + offset] >= 0) {
-			logg->logMessage("%s(%s:%i): cpu already online or not correctly cleaned up", __FUNCTION__, __FILE__, __LINE__);
+		const int offset = i * gSessionData->mCores + cpu;
+		if (mFds[offset] >= 0) {
+			logg->logMessage("cpu already online or not correctly cleaned up");
 			return PG_FAILURE;
 		}
 
-		logg->logMessage("%s(%s:%i): perf_event_open cpu: %i type: %lli config: %lli sample: %lli sample_type: 0x%llx pinned: %i mmap: %i comm: %i freq: %i task: %i sample_id_all: %i", __FUNCTION__, __FILE__, __LINE__, cpu, (long long)mAttrs[i].type, (long long)mAttrs[i].config, (long long)mAttrs[i].sample_period, (long long)mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all);
-		mFds[cpu + offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, i == 0 ? -1 : mFds[cpu], i == 0 ? 0 : PERF_FLAG_FD_OUTPUT);
-		if (mFds[cpu + offset] < 0) {
-			logg->logMessage("%s(%s:%i): failed %s", __FUNCTION__, __FILE__, __LINE__, strerror(errno));
+		logg->logMessage("perf_event_open cpu: %i type: %i config: %lli sample: %lli sample_type: 0x%llx pinned: %lli mmap: %lli comm: %lli freq: %lli task: %lli sample_id_all: %lli", cpu, mAttrs[i].type, mAttrs[i].config, mAttrs[i].sample_period, mAttrs[i].sample_type, mAttrs[i].pinned, mAttrs[i].mmap, mAttrs[i].comm, mAttrs[i].freq, mAttrs[i].task, mAttrs[i].sample_id_all);
+		mFds[offset] = sys_perf_event_open(&mAttrs[i], -1, cpu, mAttrs[i].pinned ? -1 : mFds[mLeaders[getEffectiveType(mAttrs[i].type, mFlags[i])] * gSessionData->mCores + cpu], mAttrs[i].pinned ? 0 : PERF_FLAG_FD_OUTPUT);
+		if (mFds[offset] < 0) {
+			logg->logMessage("failed %s", strerror(errno));
 			if (errno == ENODEV) {
+				// The core is offline
 				return PG_CPU_OFFLINE;
 			}
+#ifndef USE_STRICTER_CHECK
 			continue;
+#else
+			if (errno == ENOENT) {
+				// This event doesn't apply to this CPU but should apply to a different one, ex bL
+				continue;
+			}
+			logg->logMessage("perf_event_open failed");
+			return PG_FAILURE;
+#endif
 		}
 
-		if (!mPb->useFd(cpu, mFds[cpu + offset])) {
-			logg->logMessage("%s(%s:%i): PerfBuffer::useFd failed", __FUNCTION__, __FILE__, __LINE__);
+		if (!mPb->useFd(cpu, mFds[offset])) {
+			logg->logMessage("PerfBuffer::useFd failed");
 			return PG_FAILURE;
 		}
 
 
-		if (!monitor->add(mFds[cpu + offset])) {
-		  logg->logMessage("%s(%s:%i): Monitor::add failed", __FUNCTION__, __FILE__, __LINE__);
-		  return PG_FAILURE;
+		if (!monitor->add(mFds[offset])) {
+			logg->logMessage("Monitor::add failed");
+			return PG_FAILURE;
 		}
 	}
 
 	return PG_SUCCESS;
 }
 
-int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool start, Buffer *const buffer) {
-	__u64 ids[ARRAY_LENGTH(mKeys)];
-	int coreKeys[ARRAY_LENGTH(mKeys)];
-	int idCount = 0;
+static bool readAndSend(const uint64_t currTime, Buffer *const buffer, const int fd, const int keyCount, const int *const keys) {
+	char buf[1024];
+	ssize_t bytes = read(fd, buf, sizeof(buf));
+	if (bytes < 0) {
+		logg->logMessage("read failed");
+		return false;
+	}
+	buffer->marshalKeysOld(currTime, keyCount, keys, bytes, buf);
 
-	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
-		const int fd = mFds[cpu + i * gSessionData->mCores];
-		if (fd < 0) {
-			continue;
-		}
+	return true;
+}
 
-		coreKeys[idCount] = mKeys[i];
-		if (!gSessionData->perf.getLegacySupport() && ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 &&
-				// Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works
-				ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) {
-			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
-			return 0;
-		}
-		++idCount;
-	}
+int PerfGroup::onlineCPU(const uint64_t currTime, const int cpu, const bool enable, Buffer *const buffer) {
+	bool addedEvents = false;
 
 	if (!gSessionData->perf.getLegacySupport()) {
-		buffer->keys(currTime, idCount, ids, coreKeys);
+		int idCount = 0;
+		int coreKeys[ARRAY_LENGTH(mKeys)];
+		__u64 ids[ARRAY_LENGTH(mKeys)];
+
+		for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+			const int fd = mFds[cpu + i * gSessionData->mCores];
+			if (fd < 0) {
+				continue;
+			}
+
+			coreKeys[idCount] = mKeys[i];
+			if (ioctl(fd, PERF_EVENT_IOC_ID, &ids[idCount]) != 0 &&
+					// Workaround for running 32-bit gatord on 64-bit systems, kernel patch in the works
+					ioctl(fd, (PERF_EVENT_IOC_ID & ~IOCSIZE_MASK) | (8 << _IOC_SIZESHIFT), &ids[idCount]) != 0) {
+				logg->logMessage("ioctl failed");
+				return 0;
+			}
+			++idCount;
+			addedEvents = true;
+		}
+
+		buffer->marshalKeys(currTime, idCount, ids, coreKeys);
 	} else {
-		char buf[1024];
-		ssize_t bytes = read(mFds[cpu], buf, sizeof(buf));
-		if (bytes < 0) {
-			logg->logMessage("read failed");
-			return 0;
+		int idCounts[ARRAY_LENGTH(mLeaders)] = { 0 };
+		int coreKeys[ARRAY_LENGTH(mLeaders)][ARRAY_LENGTH(mKeys)];
+		for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+			const int fd = mFds[cpu + i * gSessionData->mCores];
+			if (fd < 0) {
+				continue;
+			}
+
+			const int effectiveType = getEffectiveType(mAttrs[i].type, mFlags[i]);
+			if (mAttrs[i].pinned && mLeaders[effectiveType] != i) {
+				if (!readAndSend(currTime, buffer, fd, 1, mKeys + i)) {
+					return 0;
+				}
+			} else {
+				coreKeys[effectiveType][idCounts[effectiveType]] = mKeys[i];
+				++idCounts[effectiveType];
+				addedEvents = true;
+			}
+		}
+
+		for (int i = 0; i < ARRAY_LENGTH(mLeaders); ++i) {
+			if (idCounts[i] > 0 && !readAndSend(currTime, buffer, mFds[mLeaders[i] * gSessionData->mCores + cpu], idCounts[i], coreKeys[i])) {
+					return 0;
+			}
 		}
-		buffer->keysOld(currTime, idCount, coreKeys, bytes, buf);
 	}
 
-	if (start) {
+	if (enable) {
 		for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
 			int offset = i * gSessionData->mCores + cpu;
 			if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_ENABLE, 0) < 0) {
-				logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+				logg->logMessage("ioctl failed");
 				return 0;
 			}
 		}
 	}
 
-	if (idCount == 0) {
-		logg->logMessage("%s(%s:%i): no events came online", __FUNCTION__, __FILE__, __LINE__);
+	if (!addedEvents) {
+		logg->logMessage("no events came online");
 	}
 
-	return idCount;
+	return 1;
 }
 
 bool PerfGroup::offlineCPU(const int cpu) {
-	logg->logMessage("%s(%s:%i): Offlining cpu %i", __FUNCTION__, __FILE__, __LINE__, cpu);
+	logg->logMessage("Offlining cpu %i", cpu);
 
-	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+	for (int i = ARRAY_LENGTH(mKeys) - 1; i >= 0; --i) {
 		int offset = i * gSessionData->mCores + cpu;
 		if (mFds[offset] >= 0 && ioctl(mFds[offset], PERF_EVENT_IOC_DISABLE, 0) < 0) {
-			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+			logg->logMessage("ioctl failed");
 			return false;
 		}
 	}
@@ -204,7 +318,7 @@ bool PerfGroup::offlineCPU(const int cpu) {
 	// Mark the buffer so that it will be released next time it's read
 	mPb->discard(cpu);
 
-	for (int i = 0; i < ARRAY_LENGTH(mKeys); ++i) {
+	for (int i = ARRAY_LENGTH(mKeys) - 1; i >= 0; --i) {
 		if (mKeys[i] < 0) {
 			continue;
 		}
@@ -222,7 +336,7 @@ bool PerfGroup::offlineCPU(const int cpu) {
 bool PerfGroup::start() {
 	for (int pos = 0; pos < ARRAY_LENGTH(mFds); ++pos) {
 		if (mFds[pos] >= 0 && ioctl(mFds[pos], PERF_EVENT_IOC_ENABLE, 0) < 0) {
-			logg->logMessage("%s(%s:%i): ioctl failed", __FUNCTION__, __FILE__, __LINE__);
+			logg->logMessage("ioctl failed");
 			goto fail;
 		}
 	}