summaryrefslogtreecommitdiff
path: root/latency.c
diff options
context:
space:
mode:
authormike-holmes <mike.holmes@linaro.org>2013-09-25 14:25:22 -0400
committermike-holmes <mike.holmes@linaro.org>2013-09-25 14:25:22 -0400
commit32800c2296c71ca520c670f76170351bbabe6b37 (patch)
tree7747067bb395a2b6b04199e06ac77c55fdb29de0 /latency.c
Initial comit
Diffstat (limited to 'latency.c')
-rw-r--r--latency.c409
1 files changed, 409 insertions, 0 deletions
diff --git a/latency.c b/latency.c
new file mode 100644
index 0000000..e874147
--- /dev/null
+++ b/latency.c
@@ -0,0 +1,409 @@
+/*
+Copyright (c) 2013 <mike.holmes@linaro.org>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdint.h>
+#include <cstdlib>
+#include <cstdio>
+#include <unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <iostream>
+#include <fstream>
+using namespace std;
+
+/* Not to be changed, usually. */
+#define MAX_PATH 256
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+static int trace_fd = -1;
+static int marker_fd = -1;
+
+#ifdef __arm__
+typedef uint64_t counter_t;
+
+static inline void init_perfcounters (int32_t do_reset, int32_t enable_divider)
+{
+
+ // in general enable all counters (including cycle counter)
+ int32_t value = 1;
+
+ // peform reset:
+ if (do_reset) {
+ value |= 2; // reset all counters to zero.
+ value |= 4; // reset cycle counter to zero.
+ }
+
+ if (enable_divider)
+ value |= 8; // enable "by 64" divider for CCNT.
+
+ value |= 16;
+
+ // program the performance-counter control-register:
+ asm volatile ("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(value));
+
+ // enable all counters:
+ asm volatile ("MCR p15, 0, %0, c9, c12, 1\t\n" :: "r"(0x8000000f));
+
+ // clear overflows:
+ asm volatile ("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f));
+}
+
+counter_t get_cyclecount (void)
+{
+ unsigned int value;
+ static counter_t counter = 0;
+ static unsigned int lastvalue = 0;
+ // Read CCNT Register
+ asm volatile ("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value));
+ counter += (value - lastvalue);
+ lastvalue = value;
+ return counter;
+}
+
+
+#elif defined __i386__ || defined __x86_64__
+typedef uint64_t counter_t;
+
+//source: http://en.wikipedia.org/wiki/Time_Stamp_Counter
+static inline counter_t get_cyclecount (void)
+{
+ uint32_t lo, hi;
+ __asm__ __volatile__ (" xorl %%eax,%%eax \n"
+ " cpuid" // serialize
+ ::: "%rax", "%rbx", "%rcx", "%rdx");
+ /* We cannot use "=A", since this would use %rax on x86_64 and return only the lower 32bits of the TSC */
+ __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+ return (uint64_t)hi << 32 | lo;
+}
+
+//doesn't work on pentium D
+__inline__ counter_t rdtscp(void)
+{
+ uint32_t lo, hi;
+ __asm__ __volatile__("rdtscp" : "=a"(lo), "=d"(hi) :: "ecx" );
+ return (uint64_t)hi << 32 | lo;
+}
+#else
+#error "No high resolution timer support for this architecture.\n"
+#endif
+
+#define MY_PRIORITY (49) /* we use 49 as the PRREMPT_RT use 50
+as the priority of kernel tasklets
+and interrupt handler by default */
+
+#define MAX_SAFE_STACK (8*1024) /* The maximum stack size which is
+ guaranteed safe to access without
+ faulting */
+
+
+ void stack_prefault(void)
+{
+ unsigned char dummy[MAX_SAFE_STACK];
+
+ memset(dummy, 0, MAX_SAFE_STACK);
+ return;
+}
+
+static const char *find_debugfs(void)
+{
+ static char debugfs[MAX_PATH + 1];
+ static int debugfs_found;
+ char type[100];
+ FILE *fp;
+
+ if (debugfs_found)
+ return debugfs;
+
+ if ((fp = fopen("/proc/mounts","r")) == NULL)
+ return NULL;
+
+ while (fscanf(fp, "%*s %"
+ STR(MAX_PATH)
+ "s %99s %*s %*d %*d\n",
+ debugfs, type) == 2) {
+ if (strcmp(type, "debugfs") == 0)
+ break;
+ }
+ fclose(fp);
+
+ if (strcmp(type, "debugfs") != 0)
+ return NULL;
+
+ debugfs_found = 1;
+
+ return debugfs;
+}
+
+/* We use ftrace as it is present in the kernel. */
+static void init_tracing(void)
+{
+ const char *debugfs;
+ char path[MAX_PATH + 1];
+
+ debugfs = find_debugfs();
+ if (debugfs) {
+ ssize_t result;
+
+ strcpy(path, debugfs);
+ strcat(path,"/tracing/current_tracer");
+ trace_fd = open(path, O_WRONLY);
+ if (trace_fd < 0) {
+ printf("Could not open %s\n", path);
+ exit(-2);
+ }
+ result = write(trace_fd, "function_graph", 1);
+ if (result == -1) {
+ printf("Could not write %s\n", path);
+ exit(-2);
+ }
+
+ strcpy(path, debugfs);
+ strcat(path,"/tracing/tracing_on");
+ trace_fd = open(path, O_WRONLY);
+ if (trace_fd < 0) {
+ printf("Could not open %s\n", path);
+ exit(-2);
+ }
+ result = write(trace_fd, "1", 1);
+ if (result == -1) {
+ printf("Could not write %s\n", path);
+ exit(-2);
+ }
+
+ strcpy(path, debugfs);
+ strcat(path,"/tracing/trace_marker");
+ marker_fd = open(path, O_WRONLY);
+ if (marker_fd < 0) {
+ printf("Could not open %s\n", path);
+ exit(-2);
+ }
+ } else {
+ printf("Could not find debugfs.\n");
+ printf("Is trace support compiled into the Linux kernel? Have you mounted it?\n");
+ exit(-2);
+ }
+}
+
+
+struct perfdata {
+ perfdata(const size_t cplsize):
+ deadline_missed(0),
+ outofrange(0),
+ minrange((counter_t)-1),
+ maxrange(0),
+ clocksperloop(new uint64_t[cplsize]) {
+ memset(clocksperloop,0,cplsize*sizeof(uint64_t));
+ }
+ ~perfdata() {
+ delete[] clocksperloop;
+ clocksperloop = 0;
+ }
+
+ counter_t deadline_missed;
+ counter_t outofrange;
+ counter_t minrange;
+ counter_t maxrange;
+ uint64_t *clocksperloop;
+};
+
+double tous(counter_t value, counter_t clockspersec)
+{
+ return double(value) * 1000000 / clockspersec;
+}
+
+int main(int argc, char* argv[])
+{
+ struct sched_param param;
+ bool trace_on = false;
+
+ size_t runtime=10;
+ size_t chartsize=100000;
+ size_t chartmultiplier = 10;
+ string statsfile;
+ counter_t clockspersec = 0ull;
+ counter_t max_latency_ns = 10000ull;
+ int c=0;
+
+ while ((c = getopt (argc, argv, "tr:c:b:s:f:l:")) != -1) {
+ switch(c) {
+ case 't':
+ trace_on = true;
+ break;
+ case 'r':
+ runtime=atoll(optarg);
+ break;
+ case 'c':
+ chartsize=atoll(optarg);
+ break;
+ case 'b':
+ chartmultiplier=atoll(optarg);
+ break;
+ case 's':
+ statsfile=optarg;
+ break;
+ case 'f':
+ clockspersec=atoll(optarg) * 1000000;
+ break;
+ case 'l':
+ max_latency_ns=atoll(optarg);
+ break;
+ case 'h':
+ default:
+ fprintf(stderr, "Usage: %s [Options]\n",argv[0]);
+ fprintf(stderr,
+ "Options:\n"
+ " -h\t\tshow this help message and exit\n"
+ " -t\t\tswitch on tracing\n"
+ " -r <number>\tnumber of seconds to run the benchmark (default: 10 seconds)\n"
+ " -c <number>\tnumber of different loop time buckets to store (default: 100000 buckets)\n"
+ " -b <number>\trange of loop times to combine in a bucket (default: 10 cycles)\n"
+ " -s <name>\tname of the file to write the statistics to (no default)\n"
+ " -f <number>\tcore clock frequency to assume, value to be in MHz (default: 0 -> measure core clock)\n"
+ " -l <number>\tdesired maximum latency in nanoseconds (default: 10000ns)\n");
+
+ return -1;
+ }
+ }
+
+ cout << "runtime:" << runtime << ".\n";
+ cout << "chartsize:" << chartsize << ".\n";
+ cout << "chartmultiplier:" << chartmultiplier << ".\n";
+
+
+ perfdata dummy(chartsize), hot(chartsize);
+
+
+ counter_t loops = 0;
+
+
+
+#ifdef __arm__
+ // init counters:
+ init_perfcounters (1, 0);
+#endif
+
+ if (trace_on) {
+ init_tracing();
+ }
+
+ if(mlockall(MCL_CURRENT|MCL_FUTURE) == -1) {
+ perror("mlockall failed");
+ exit(-2);
+ }
+
+ /* Pre-fault our stack */
+ stack_prefault();
+
+ param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+
+ if(sched_setscheduler(getpid(), SCHED_FIFO, &param) != 0) {
+ perror("setschedparam error");
+ }
+
+ counter_t start=get_cyclecount();
+ if(0 == clockspersec) {
+ /* Scrap the first measurement as it might have caused a page miss */
+ usleep(10000);
+ clockspersec=(get_cyclecount()-start)*100;
+ start=get_cyclecount();
+ usleep(10000);
+ clockspersec=(get_cyclecount()-start)*100;
+ }
+
+ cout << "clockspersec=" << clockspersec << endl;
+
+ counter_t max_latency = clockspersec * max_latency_ns / 1000000000;
+ start=get_cyclecount();
+ counter_t last=start;
+ perfdata *curdata=&dummy;
+ do {
+ counter_t cur=get_cyclecount();
+ counter_t diff=cur-last;
+ ++loops;
+ if(diff < (chartsize * chartmultiplier)) {
+ ++(curdata->clocksperloop[diff/chartmultiplier]);
+ } else {
+ ++(curdata->outofrange);
+ }
+ if(diff > max_latency) {
+ ++(curdata->deadline_missed);
+ if (trace_on) {
+ ssize_t result = write(marker_fd, "Max latency exceeded\n", 17);
+
+ /* Terminate so that we can view the trace.
+ cat /sys/kernel/debug/tracing/trace | less */
+ result = write(trace_fd, "0", 1);
+ if (result == -1) {
+ printf("Could not write marker\n");
+ exit(-2);
+ }
+
+ printf("Maxlatency exceeded. %llu cycles\n", diff);
+ break;
+ }
+ }
+ if(diff>curdata->maxrange) {
+ curdata->maxrange=diff;
+ }
+ if(diff<curdata->minrange) {
+ curdata->minrange=diff;
+ }
+
+ last=cur;
+ curdata=&hot;
+
+ } while((last-start) < ( runtime *clockspersec));
+
+
+ munlockall();
+
+ cout << "outofrange=" << hot.outofrange << endl;
+ cout << "deadline missed=" << hot.deadline_missed << endl;
+ cout << "minrange=" << hot.minrange << " = " << tous(hot.minrange,clockspersec) << "µs" << endl;
+ cout << "maxrange=" << hot.maxrange << " = " << tous(hot.maxrange,clockspersec) << "µs" << endl;
+ cout << "average=" << (runtime * clockspersec / loops) << " = " << tous(runtime * clockspersec / loops,clockspersec) << "µs" << endl;
+
+
+ if(statsfile.size()) {
+ ofstream outfile;
+
+ outfile.open (statsfile.c_str(),ios_base::trunc);
+ counter_t sum = 0;
+ for(size_t i=0; i<chartsize; ++i) {
+ sum += hot.clocksperloop[i];
+ outfile << double(i*chartmultiplier ) * 1000000 / clockspersec << "\t" << 1.0 - (double(sum) / loops) << endl;
+ }
+
+ outfile.close();
+ }
+
+
+ return 0;
+
+}