//
// rpistat: Run command and measure PMU events
//
//
// Author: Paul J. Drongowski
// Date: 27 June 2013
//
// Copyright (c) 2013 Paul J. Drongowski
//
#include <sys/types.h>
#include <sys/wait.h>
#include <locale.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#include "../test_common/test_common.h"
#include "../test_common/rpi_pmu.h"
#define RESULT_FILE_NAME "rpistat.txt"
char heading[256] = "rpistat: " ;
int event_set = 0 ;
//
// Number of measurement periods
//
uint64_t periods = 0 ; // All periods
uint64_t iperiods = 0 ; // Instruction periods
uint64_t dperiods = 0 ; // DC periods
uint64_t tperiods = 0 ; // TLB periods
uint64_t bperiods = 0 ; // Branch periods
//
// Cumulative event counts
//
uint64_t cycles = 0 ;
uint64_t instructions = 0 ;
uint64_t ibuf_stalls = 0 ;
uint64_t branches = 0 ;
uint64_t mispredicts = 0 ;
uint64_t data_cache_accesses = 0 ;
uint64_t data_cache_misses = 0 ;
uint64_t micro_tlb_misses = 0 ;
uint64_t main_tlb_misses = 0 ;
//
// Scale the event count by the reciprocal of the
// active event fraction. Return the scaled event
// count as a double.
//
double dscale(uint64_t count, uint64_t event_period)
{
double factor = ((double)periods/(double)event_period) ;
return( factor * (double)count ) ;
}
//
// Scale the event count by the reciprocal of the
// active event fraction. Return the scaled event
// count as an unsigned 64-bit int.
//
uint64_t scale(uint64_t count, uint64_t event_period)
{
double factor = ((double)periods/(double)event_period) ;
return( (uint64_t)(factor * (double)count) ) ;
}
//
// Switch between event sets. Accumulate current
// event counts, then reconfigure the performance
// counters for the next event set.
//
static void handle_event_sets()
{
int event_0, event_1 ;
// Increment the number of measurement periods
periods++ ;
// Accumulate the Cycle Counter Register
cycles += armv6pmu_read_counter(ARMV6_CYCLE_COUNTER) ;
switch( event_set ) {
case 0:
instructions += armv6pmu_read_counter(ARMV6_COUNTER0) ;
ibuf_stalls += armv6pmu_read_counter(ARMV6_COUNTER1) ;
iperiods++ ;
//Advance to next event set
event_set = 1 ;
event_0 = ARMV6_EVENT_DCACHE_CACCESS ;
event_1 = ARMV6_EVENT_DCACHE_MISS ;
break ;
case 1:
data_cache_accesses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
data_cache_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
dperiods++ ;
//Advance to next event set
event_set = 2 ;
event_0 = ARMV6_EVENT_DTLB_MISS ;
event_1 = ARMV6_EVENT_MAIN_TLB_MISS ;
break ;
case 2:
micro_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
main_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
tperiods++ ;
//Advance to next event set
event_set = 3 ;
event_0 = ARMV6_EVENT_BR_EXEC ;
event_1 = ARMV6_EVENT_BR_MISPREDICT ;
break ;
default:
branches += armv6pmu_read_counter(ARMV6_COUNTER0) ;
mispredicts += armv6pmu_read_counter(ARMV6_COUNTER1) ;
bperiods++ ;
//Advance to next event set
event_set = 0 ;
event_0 = ARMV6_EVENT_INSTR_EXEC ;
event_1 = ARMV6_EVENT_IBUF_STALL ;
break ;
}
// Clear and start the performance counters
armv6_pmcr_write(ARMV6_PMCR_ENABLE |
ARMV6_PMCR_CCOUNT_RESET |
// ARMV6_PMCR_CCOUNT_DIV |
ARMV6_PMCR_CTR01_RESET |
(event_0 << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
(event_1 << ARMV6_PMCR_EVT_COUNT1_SHIFT)
) ;
}
//
// Initialize the period counts and the ARM1176
// performance counters with the first event set.
// Start the counters.
//
static void handle_first_event_set()
{
int event_0, event_1 ;
periods = 0 ;
iperiods = 0 ;
dperiods = 0 ;
tperiods = 0 ;
bperiods = 0 ;
event_set = 0 ;
event_0 = ARMV6_EVENT_INSTR_EXEC ;
event_1 = ARMV6_EVENT_IBUF_STALL ;
// Clear the sticky PMU overflow bits
armv6_pmcr_write(ARMV6_PMCR_CCOUNT_OVERFLOW |
ARMV6_PMCR_COUNT0_OVERFLOW |
ARMV6_PMCR_COUNT1_OVERFLOW
) ;
// Clear and start the performance counters
armv6_pmcr_write(ARMV6_PMCR_ENABLE |
ARMV6_PMCR_CCOUNT_RESET |
// ARMV6_PMCR_CCOUNT_DIV |
ARMV6_PMCR_CTR01_RESET |
(event_0 << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
(event_1 << ARMV6_PMCR_EVT_COUNT1_SHIFT)
) ;
}
//
// Handle the last event set after the workload is
// finished. Accumulate the event counts, and
// check for counter overflow. Do not restart the
// counters.
//
static void handle_last_event_set()
{
uint32_t pmcr = 0 ;
// Increment the number of measurement periods
periods++ ;
// Accumulate the Cycle Counter Register
cycles += armv6pmu_read_counter(ARMV6_CYCLE_COUNTER) ;
switch( event_set ) {
case 0:
instructions += armv6pmu_read_counter(ARMV6_COUNTER0) ;
ibuf_stalls += armv6pmu_read_counter(ARMV6_COUNTER1) ;
iperiods++ ;
break ;
case 1:
data_cache_accesses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
data_cache_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
dperiods++ ;
break ;
case 2:
micro_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
main_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
tperiods++ ;
break ;
default:
branches += armv6pmu_read_counter(ARMV6_COUNTER0) ;
mispredicts += armv6pmu_read_counter(ARMV6_COUNTER1) ;
bperiods++ ;
break ;
}
pmcr = armv6_pmcr_read() ;
if (armv6_pmcr_has_overflowed(pmcr)) {
fprintf(result_file, "**********************************\n") ;
fprintf(result_file, " WARNING! Overflow detected!\n") ;
fprintf(result_file, "**********************************\n") ;
}
}
int main(int argc, char* argv[])
{
int status = -1 ;
pid_t child = -1 ;
pid_t pid = -1 ;
double scaled_instructions = 0 ;
if (argc < 2) {
fprintf(stderr, "*fatal* Need the command to be exected\n") ;
fprintf(stderr, "Usage: pmustat command [arguments]\n") ;
}
// Store final event counts in the result file
if (create_result_file(RESULT_FILE_NAME) == 0) {
exit( EXIT_FAILURE ) ;
}
setlocale(LC_NUMERIC, "") ;
print_heading(strcat(heading, argv[1])) ;
fprintf(result_file,"\n") ;
print_system_info() ;
// Fork/exec the workload in a child process
if ((child = fork()) == 0) {
// Child
if ((status = execvp(argv[1], &argv[1])) == -1) {
fprintf(stderr, "*fatal* execvp() failed\n") ;
exit( EXIT_FAILURE ) ;
}
} else {
// Parent
if (child == -1) {
fprintf(stderr, "*fatal* fork() failed\n") ;
exit( EXIT_FAILURE ) ;
}
handle_first_event_set() ;
for(;;) {
if ((pid = waitpid(child, 0, WNOHANG)) == 0) {
// Child is still running
usleep(100000) ;
// Stop the performance counters
armv6_pmcr_write(0) ;
handle_event_sets() ;
} else if (pid == child) {
// Child terminated
// Stop the performance counters
armv6_pmcr_write(0) ;
handle_last_event_set() ;
// fprintf(stdout, "Workload terminated\n") ;
break ;
} else if (pid == -1) {
// Error
fprintf(stderr, "*error* waitpid failed\n") ;
perror("waitpid") ;
} else {
armv6_pmcr_write(0) ;
fprintf(stderr, "*fatal* Unexpected return from waitpid()\n") ;
exit( EXIT_FAILURE ) ;
}
}
}
// Write event counts to the results file
fprintf(result_file, "\nPerformance events\n") ;
fprintf(result_file, " [ ... ] = scaled event count\n") ;
fprintf(result_file, " PTI = per thousand instructions\n") ;
fprintf(result_file, " Total periods: %lld\n", periods) ;
// Cycles are counted during all measurement periods
scaled_instructions = dscale(instructions,iperiods) ;
fprintf(result_file, "\n") ;
fprintf(result_file, " Cycles: %'lld\n", cycles) ;
fprintf(result_file, " Instructions: %'lld", instructions) ;
fprintf(result_file, " [%'lld]\n", scale(instructions, iperiods)) ;
fprintf(result_file, " IBUF stall cycles: %'lld", ibuf_stalls) ;
fprintf(result_file, " [%'lld]\n", scale(ibuf_stalls, iperiods)) ;
fprintf(result_file, " Instr periods: %lld\n", iperiods) ;
if (instructions != 0) {
fprintf(result_file, " CPI: %-7.3f\n",
(double)cycles / scaled_instructions) ;
}
fprintf(result_file, " IBUF stall percent: %-7.3f\%\n",
(dscale(ibuf_stalls,iperiods) / (double)cycles) * 100.0) ;
fprintf(result_file, "\n") ;
fprintf(result_file, " DC cached accesses: %'lld", data_cache_accesses) ;
fprintf(result_file, " [%'lld]\n", scale(data_cache_accesses, dperiods)) ;
fprintf(result_file, " DC misses: %'lld", data_cache_misses) ;
fprintf(result_file, " [%'lld]\n", scale(data_cache_misses, dperiods)) ;
fprintf(result_file, " DC periods: %lld\n", dperiods) ;
if (data_cache_accesses != 0) {
fprintf(result_file, " DC miss ratio: %-7.3f\%\n",
((double)data_cache_misses / (double)data_cache_accesses) * 100.0) ;
}
fprintf(result_file, "\n") ;
fprintf(result_file, " MicroTLB misses: %'lld", micro_tlb_misses) ;
fprintf(result_file, " [%'lld]\n", scale(micro_tlb_misses, tperiods)) ;
fprintf(result_file, " Main TLB misses: %'lld", main_tlb_misses) ;
fprintf(result_file, " [%'lld]\n", scale(main_tlb_misses, tperiods)) ;
fprintf(result_file, " TLB periods: %lld\n", tperiods) ;
if (instructions != 0) {
fprintf(result_file, " Micro miss rate: %-7.3f PTI\n",
1000.0 * dscale(micro_tlb_misses,tperiods) / scaled_instructions) ;
fprintf(result_file, " Main miss rate: %-7.3f PTI\n",
1000.0 * dscale(main_tlb_misses,tperiods) / scaled_instructions) ;
}
fprintf(result_file, "\n") ;
fprintf(result_file, " Branches: %'lld", branches) ;
fprintf(result_file, " [%'lld]\n", scale(branches, bperiods)) ;
fprintf(result_file, " Mispredicted BR: %'lld", mispredicts) ;
fprintf(result_file, " [%'lld]\n", scale(mispredicts, bperiods)) ;
fprintf(result_file, " BR periods: %lld\n", bperiods) ;
if (instructions != 0) {
fprintf(result_file, " Branch rate: %-7.3f PTI\n",
1000.0 * dscale(branches,bperiods) / scaled_instructions) ;
}
if (branches != 0) {
fprintf(result_file, " Mispredict ratio: %-7.3f\%\n",
((double)mispredicts / (double)branches) * 100.0) ;
}
close_result_file() ;
return( EXIT_SUCCESS ) ;
}