diff options
author | mike-holmes <mike.holmes@linaro.org> | 2013-09-25 14:25:22 -0400 |
---|---|---|
committer | mike-holmes <mike.holmes@linaro.org> | 2013-09-25 14:25:22 -0400 |
commit | 32800c2296c71ca520c670f76170351bbabe6b37 (patch) | |
tree | 7747067bb395a2b6b04199e06ac77c55fdb29de0 /latency.c |
Initial comit
Diffstat (limited to 'latency.c')
-rw-r--r-- | latency.c | 409 |
1 files changed, 409 insertions, 0 deletions
diff --git a/latency.c b/latency.c new file mode 100644 index 0000000..e874147 --- /dev/null +++ b/latency.c @@ -0,0 +1,409 @@ +/* +Copyright (c) 2013 <mike.holmes@linaro.org> +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdint.h> +#include <cstdlib> +#include <cstdio> +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <iostream> +#include <fstream> +using namespace std; + +/* Not to be changed, usually. */ +#define MAX_PATH 256 +#define _STR(x) #x +#define STR(x) _STR(x) + +static int trace_fd = -1; +static int marker_fd = -1; + +#ifdef __arm__ +typedef uint64_t counter_t; + +static inline void init_perfcounters (int32_t do_reset, int32_t enable_divider) +{ + + // in general enable all counters (including cycle counter) + int32_t value = 1; + + // peform reset: + if (do_reset) { + value |= 2; // reset all counters to zero. + value |= 4; // reset cycle counter to zero. + } + + if (enable_divider) + value |= 8; // enable "by 64" divider for CCNT. + + value |= 16; + + // program the performance-counter control-register: + asm volatile ("MCR p15, 0, %0, c9, c12, 0\t\n" :: "r"(value)); + + // enable all counters: + asm volatile ("MCR p15, 0, %0, c9, c12, 1\t\n" :: "r"(0x8000000f)); + + // clear overflows: + asm volatile ("MCR p15, 0, %0, c9, c12, 3\t\n" :: "r"(0x8000000f)); +} + +counter_t get_cyclecount (void) +{ + unsigned int value; + static counter_t counter = 0; + static unsigned int lastvalue = 0; + // Read CCNT Register + asm volatile ("MRC p15, 0, %0, c9, c13, 0\t\n": "=r"(value)); + counter += (value - lastvalue); + lastvalue = value; + return counter; +} + + +#elif defined __i386__ || defined __x86_64__ +typedef uint64_t counter_t; + +//source: http://en.wikipedia.org/wiki/Time_Stamp_Counter +static inline counter_t get_cyclecount (void) +{ + uint32_t lo, hi; + __asm__ __volatile__ (" xorl %%eax,%%eax \n" + " cpuid" // serialize + ::: "%rax", "%rbx", "%rcx", "%rdx"); + /* We cannot use "=A", since this would use %rax on x86_64 and return only the lower 32bits of the TSC */ + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return (uint64_t)hi << 32 | lo; +} + +//doesn't work on pentium D +__inline__ counter_t rdtscp(void) +{ + uint32_t lo, hi; + __asm__ __volatile__("rdtscp" : "=a"(lo), "=d"(hi) :: "ecx" ); + return (uint64_t)hi << 32 | lo; +} +#else +#error "No high resolution timer support for this architecture.\n" +#endif + +#define MY_PRIORITY (49) /* we use 49 as the PRREMPT_RT use 50 +as the priority of kernel tasklets +and interrupt handler by default */ + +#define MAX_SAFE_STACK (8*1024) /* The maximum stack size which is + guaranteed safe to access without + faulting */ + + + void stack_prefault(void) +{ + unsigned char dummy[MAX_SAFE_STACK]; + + memset(dummy, 0, MAX_SAFE_STACK); + return; +} + +static const char *find_debugfs(void) +{ + static char debugfs[MAX_PATH + 1]; + static int debugfs_found; + char type[100]; + FILE *fp; + + if (debugfs_found) + return debugfs; + + if ((fp = fopen("/proc/mounts","r")) == NULL) + return NULL; + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + debugfs, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) + return NULL; + + debugfs_found = 1; + + return debugfs; +} + +/* We use ftrace as it is present in the kernel. */ +static void init_tracing(void) +{ + const char *debugfs; + char path[MAX_PATH + 1]; + + debugfs = find_debugfs(); + if (debugfs) { + ssize_t result; + + strcpy(path, debugfs); + strcat(path,"/tracing/current_tracer"); + trace_fd = open(path, O_WRONLY); + if (trace_fd < 0) { + printf("Could not open %s\n", path); + exit(-2); + } + result = write(trace_fd, "function_graph", 1); + if (result == -1) { + printf("Could not write %s\n", path); + exit(-2); + } + + strcpy(path, debugfs); + strcat(path,"/tracing/tracing_on"); + trace_fd = open(path, O_WRONLY); + if (trace_fd < 0) { + printf("Could not open %s\n", path); + exit(-2); + } + result = write(trace_fd, "1", 1); + if (result == -1) { + printf("Could not write %s\n", path); + exit(-2); + } + + strcpy(path, debugfs); + strcat(path,"/tracing/trace_marker"); + marker_fd = open(path, O_WRONLY); + if (marker_fd < 0) { + printf("Could not open %s\n", path); + exit(-2); + } + } else { + printf("Could not find debugfs.\n"); + printf("Is trace support compiled into the Linux kernel? Have you mounted it?\n"); + exit(-2); + } +} + + +struct perfdata { + perfdata(const size_t cplsize): + deadline_missed(0), + outofrange(0), + minrange((counter_t)-1), + maxrange(0), + clocksperloop(new uint64_t[cplsize]) { + memset(clocksperloop,0,cplsize*sizeof(uint64_t)); + } + ~perfdata() { + delete[] clocksperloop; + clocksperloop = 0; + } + + counter_t deadline_missed; + counter_t outofrange; + counter_t minrange; + counter_t maxrange; + uint64_t *clocksperloop; +}; + +double tous(counter_t value, counter_t clockspersec) +{ + return double(value) * 1000000 / clockspersec; +} + +int main(int argc, char* argv[]) +{ + struct sched_param param; + bool trace_on = false; + + size_t runtime=10; + size_t chartsize=100000; + size_t chartmultiplier = 10; + string statsfile; + counter_t clockspersec = 0ull; + counter_t max_latency_ns = 10000ull; + int c=0; + + while ((c = getopt (argc, argv, "tr:c:b:s:f:l:")) != -1) { + switch(c) { + case 't': + trace_on = true; + break; + case 'r': + runtime=atoll(optarg); + break; + case 'c': + chartsize=atoll(optarg); + break; + case 'b': + chartmultiplier=atoll(optarg); + break; + case 's': + statsfile=optarg; + break; + case 'f': + clockspersec=atoll(optarg) * 1000000; + break; + case 'l': + max_latency_ns=atoll(optarg); + break; + case 'h': + default: + fprintf(stderr, "Usage: %s [Options]\n",argv[0]); + fprintf(stderr, + "Options:\n" + " -h\t\tshow this help message and exit\n" + " -t\t\tswitch on tracing\n" + " -r <number>\tnumber of seconds to run the benchmark (default: 10 seconds)\n" + " -c <number>\tnumber of different loop time buckets to store (default: 100000 buckets)\n" + " -b <number>\trange of loop times to combine in a bucket (default: 10 cycles)\n" + " -s <name>\tname of the file to write the statistics to (no default)\n" + " -f <number>\tcore clock frequency to assume, value to be in MHz (default: 0 -> measure core clock)\n" + " -l <number>\tdesired maximum latency in nanoseconds (default: 10000ns)\n"); + + return -1; + } + } + + cout << "runtime:" << runtime << ".\n"; + cout << "chartsize:" << chartsize << ".\n"; + cout << "chartmultiplier:" << chartmultiplier << ".\n"; + + + perfdata dummy(chartsize), hot(chartsize); + + + counter_t loops = 0; + + + +#ifdef __arm__ + // init counters: + init_perfcounters (1, 0); +#endif + + if (trace_on) { + init_tracing(); + } + + if(mlockall(MCL_CURRENT|MCL_FUTURE) == -1) { + perror("mlockall failed"); + exit(-2); + } + + /* Pre-fault our stack */ + stack_prefault(); + + param.sched_priority = sched_get_priority_max(SCHED_FIFO); + + if(sched_setscheduler(getpid(), SCHED_FIFO, ¶m) != 0) { + perror("setschedparam error"); + } + + counter_t start=get_cyclecount(); + if(0 == clockspersec) { + /* Scrap the first measurement as it might have caused a page miss */ + usleep(10000); + clockspersec=(get_cyclecount()-start)*100; + start=get_cyclecount(); + usleep(10000); + clockspersec=(get_cyclecount()-start)*100; + } + + cout << "clockspersec=" << clockspersec << endl; + + counter_t max_latency = clockspersec * max_latency_ns / 1000000000; + start=get_cyclecount(); + counter_t last=start; + perfdata *curdata=&dummy; + do { + counter_t cur=get_cyclecount(); + counter_t diff=cur-last; + ++loops; + if(diff < (chartsize * chartmultiplier)) { + ++(curdata->clocksperloop[diff/chartmultiplier]); + } else { + ++(curdata->outofrange); + } + if(diff > max_latency) { + ++(curdata->deadline_missed); + if (trace_on) { + ssize_t result = write(marker_fd, "Max latency exceeded\n", 17); + + /* Terminate so that we can view the trace. + cat /sys/kernel/debug/tracing/trace | less */ + result = write(trace_fd, "0", 1); + if (result == -1) { + printf("Could not write marker\n"); + exit(-2); + } + + printf("Maxlatency exceeded. %llu cycles\n", diff); + break; + } + } + if(diff>curdata->maxrange) { + curdata->maxrange=diff; + } + if(diff<curdata->minrange) { + curdata->minrange=diff; + } + + last=cur; + curdata=&hot; + + } while((last-start) < ( runtime *clockspersec)); + + + munlockall(); + + cout << "outofrange=" << hot.outofrange << endl; + cout << "deadline missed=" << hot.deadline_missed << endl; + cout << "minrange=" << hot.minrange << " = " << tous(hot.minrange,clockspersec) << "µs" << endl; + cout << "maxrange=" << hot.maxrange << " = " << tous(hot.maxrange,clockspersec) << "µs" << endl; + cout << "average=" << (runtime * clockspersec / loops) << " = " << tous(runtime * clockspersec / loops,clockspersec) << "µs" << endl; + + + if(statsfile.size()) { + ofstream outfile; + + outfile.open (statsfile.c_str(),ios_base::trunc); + counter_t sum = 0; + for(size_t i=0; i<chartsize; ++i) { + sum += hot.clocksperloop[i]; + outfile << double(i*chartmultiplier ) * 1000000 / clockspersec << "\t" << 1.0 - (double(sum) / loops) << endl; + } + + outfile.close(); + } + + + return 0; + +} |