rpistat.c

//
// rpistat: Run command and measure PMU events
//

//
// Author: Paul J. Drongowski
// Date:   27 June 2013
//
// Copyright (c) 2013 Paul J. Drongowski
//

#include <sys/types.h>
#include <sys/wait.h>
#include <locale.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>

#include "../test_common/test_common.h"
#include "../test_common/rpi_pmu.h"

#define RESULT_FILE_NAME "rpistat.txt"
char heading[256] = "rpistat: " ;

int event_set = 0 ;

//
// Number of measurement periods
//
uint64_t periods = 0 ;     // All periods
uint64_t iperiods = 0 ;    // Instruction periods
uint64_t dperiods = 0 ;    // DC periods
uint64_t tperiods = 0 ;    // TLB periods
uint64_t bperiods = 0 ;    // Branch periods

//
// Cumulative event counts
//
uint64_t cycles = 0 ;
uint64_t instructions = 0 ;
uint64_t ibuf_stalls = 0 ;
uint64_t branches = 0 ;
uint64_t mispredicts = 0 ;
uint64_t data_cache_accesses = 0 ;
uint64_t data_cache_misses = 0 ;
uint64_t micro_tlb_misses = 0 ;
uint64_t main_tlb_misses = 0 ;

//
// Scale the event count by the reciprocal of the
// active event fraction. Return the scaled event
// count as a double.
//
double dscale(uint64_t count, uint64_t event_period)
{
  double factor = ((double)periods/(double)event_period) ;
  return( factor * (double)count ) ;
}

//
// Scale the event count by the reciprocal of the
// active event fraction. Return the scaled event
// count as an unsigned 64-bit int.
//
uint64_t scale(uint64_t count, uint64_t event_period)
{
  double factor = ((double)periods/(double)event_period) ;
  return( (uint64_t)(factor * (double)count) ) ;
}

//
// Switch between event sets. Accumulate current
// event counts, then reconfigure the performance
// counters for the next event set.
//
static void handle_event_sets()
{
  int event_0, event_1 ;

  // Increment the number of measurement periods
  periods++ ;

  // Accumulate the Cycle Counter Register
  cycles += armv6pmu_read_counter(ARMV6_CYCLE_COUNTER) ;

  switch( event_set ) {
  case 0:
    instructions += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    ibuf_stalls += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    iperiods++ ;
    //Advance to next event set
    event_set = 1 ;
    event_0 = ARMV6_EVENT_DCACHE_CACCESS ;
    event_1 = ARMV6_EVENT_DCACHE_MISS ;
    break ;
  case 1:
    data_cache_accesses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    data_cache_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    dperiods++ ;
    //Advance to next event set
    event_set = 2 ;
    event_0 = ARMV6_EVENT_DTLB_MISS ;
    event_1 = ARMV6_EVENT_MAIN_TLB_MISS ;
    break ;
  case 2:
    micro_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    main_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    tperiods++ ;
    //Advance to next event set
    event_set = 3 ;
    event_0 = ARMV6_EVENT_BR_EXEC ;
    event_1 = ARMV6_EVENT_BR_MISPREDICT ;
    break ;
  default:
    branches += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    mispredicts += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    bperiods++ ;
    //Advance to next event set
    event_set = 0 ;
    event_0 = ARMV6_EVENT_INSTR_EXEC ;
    event_1 = ARMV6_EVENT_IBUF_STALL ;
    break ;
  }

  // Clear and start the performance counters
  armv6_pmcr_write(ARMV6_PMCR_ENABLE |
		   ARMV6_PMCR_CCOUNT_RESET |
		   // ARMV6_PMCR_CCOUNT_DIV |
		   ARMV6_PMCR_CTR01_RESET |
		   (event_0 << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
		   (event_1 << ARMV6_PMCR_EVT_COUNT1_SHIFT)
		   ) ;
}

//
// Initialize the period counts and the ARM1176
// performance counters with the first event set.
// Start the counters.
//
static void handle_first_event_set()
{
  int event_0, event_1 ;

  periods = 0 ;
  iperiods = 0 ;
  dperiods = 0 ;
  tperiods = 0 ;
  bperiods = 0 ;

  event_set = 0 ;
  event_0 = ARMV6_EVENT_INSTR_EXEC ;
  event_1 = ARMV6_EVENT_IBUF_STALL ;

  // Clear the sticky PMU overflow bits
  armv6_pmcr_write(ARMV6_PMCR_CCOUNT_OVERFLOW |
                   ARMV6_PMCR_COUNT0_OVERFLOW |
                   ARMV6_PMCR_COUNT1_OVERFLOW
		   ) ;
 
  // Clear and start the performance counters
  armv6_pmcr_write(ARMV6_PMCR_ENABLE |
		   ARMV6_PMCR_CCOUNT_RESET |
		   // ARMV6_PMCR_CCOUNT_DIV |
		   ARMV6_PMCR_CTR01_RESET |
		   (event_0 << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
		   (event_1 << ARMV6_PMCR_EVT_COUNT1_SHIFT)
		   ) ;
}

//
// Handle the last event set after the workload is
// finished. Accumulate the event counts, and
// check for counter overflow. Do not restart the
// counters.
//
static void handle_last_event_set()
{
  uint32_t pmcr = 0 ;

  // Increment the number of measurement periods
  periods++ ;

  // Accumulate the Cycle Counter Register
  cycles += armv6pmu_read_counter(ARMV6_CYCLE_COUNTER) ;

  switch( event_set ) {
  case 0:
    instructions += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    ibuf_stalls += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    iperiods++ ;
    break ;
  case 1:
    data_cache_accesses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    data_cache_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    dperiods++ ;
    break ;
  case 2:
    micro_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    main_tlb_misses += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    tperiods++ ;
    break ;
  default:
    branches += armv6pmu_read_counter(ARMV6_COUNTER0) ;
    mispredicts += armv6pmu_read_counter(ARMV6_COUNTER1) ;
    bperiods++ ;
    break ;
  }

  pmcr = armv6_pmcr_read() ;
  if (armv6_pmcr_has_overflowed(pmcr)) {
    fprintf(result_file, "**********************************\n") ;
    fprintf(result_file, " WARNING! Overflow detected!\n") ;
    fprintf(result_file, "**********************************\n") ;
  }
}

int main(int argc, char* argv[])
{
  int status = -1 ;
  pid_t child = -1 ;
  pid_t pid = -1 ;
  double scaled_instructions = 0 ;

  if (argc < 2) {
    fprintf(stderr, "*fatal* Need the command to be exected\n") ;
    fprintf(stderr, "Usage: pmustat command [arguments]\n") ;
  }

  // Store final event counts in the result file
  if (create_result_file(RESULT_FILE_NAME) == 0) {
    exit( EXIT_FAILURE ) ;
  }
  setlocale(LC_NUMERIC, "") ;
  print_heading(strcat(heading, argv[1])) ;
  fprintf(result_file,"\n") ;
  print_system_info() ;

  // Fork/exec the workload in a child process
  if ((child = fork()) == 0) {
    // Child
    if ((status = execvp(argv[1], &argv[1])) == -1) {
      fprintf(stderr, "*fatal* execvp() failed\n") ;
      exit( EXIT_FAILURE ) ;
    }
  } else {
    // Parent
    if (child == -1) {
      fprintf(stderr, "*fatal* fork() failed\n") ;
      exit( EXIT_FAILURE ) ;
    }

    handle_first_event_set() ;
    for(;;) {
      if ((pid = waitpid(child, 0, WNOHANG)) == 0) {
	// Child is still running
	usleep(100000) ;
	// Stop the performance counters
	armv6_pmcr_write(0) ;
	handle_event_sets() ;
      } else if (pid == child) {
	// Child terminated
	// Stop the performance counters
	armv6_pmcr_write(0) ;
	handle_last_event_set() ;
	// fprintf(stdout, "Workload terminated\n") ;
	break ;
      } else if (pid == -1) {
	// Error
	fprintf(stderr, "*error* waitpid failed\n") ;
	perror("waitpid") ;
      } else {
	armv6_pmcr_write(0) ;
	fprintf(stderr, "*fatal* Unexpected return from waitpid()\n") ;
	exit( EXIT_FAILURE ) ;
      }
    }
  }

  // Write event counts to the results file
  fprintf(result_file, "\nPerformance events\n") ;
  fprintf(result_file, "  [ ... ] = scaled event count\n") ;
  fprintf(result_file, "  PTI     = per thousand instructions\n") ;
  fprintf(result_file, "  Total periods:      %lld\n", periods) ;

  // Cycles are counted during all measurement periods
  scaled_instructions = dscale(instructions,iperiods) ;
  fprintf(result_file, "\n") ;
  fprintf(result_file, "  Cycles:             %'lld\n", cycles) ;
  fprintf(result_file, "  Instructions:       %'lld", instructions) ;
  fprintf(result_file, "  [%'lld]\n", scale(instructions, iperiods)) ;
  fprintf(result_file, "  IBUF stall cycles:  %'lld", ibuf_stalls) ;
  fprintf(result_file, "  [%'lld]\n", scale(ibuf_stalls, iperiods)) ;
  fprintf(result_file, "  Instr periods:      %lld\n", iperiods) ;
  if (instructions != 0) {
    fprintf(result_file, "  CPI:                %-7.3f\n",
	    (double)cycles / scaled_instructions) ;
  }
  fprintf(result_file, "  IBUF stall percent: %-7.3f\%\n",
	  (dscale(ibuf_stalls,iperiods) / (double)cycles) * 100.0) ;

  fprintf(result_file, "\n") ;
  fprintf(result_file, "  DC cached accesses: %'lld", data_cache_accesses) ;
  fprintf(result_file, "  [%'lld]\n", scale(data_cache_accesses, dperiods)) ;
  fprintf(result_file, "  DC misses:          %'lld", data_cache_misses) ;
  fprintf(result_file, "  [%'lld]\n", scale(data_cache_misses, dperiods)) ;
  fprintf(result_file, "  DC periods:         %lld\n", dperiods) ;
  if (data_cache_accesses != 0) {
    fprintf(result_file, "  DC miss ratio:      %-7.3f\%\n",
	    ((double)data_cache_misses / (double)data_cache_accesses) * 100.0) ;
  }

  fprintf(result_file, "\n") ;
  fprintf(result_file, "  MicroTLB misses:    %'lld", micro_tlb_misses) ;
  fprintf(result_file, "  [%'lld]\n", scale(micro_tlb_misses, tperiods)) ;
  fprintf(result_file, "  Main TLB misses:    %'lld", main_tlb_misses) ;
  fprintf(result_file, "  [%'lld]\n", scale(main_tlb_misses, tperiods)) ;
  fprintf(result_file, "  TLB periods:        %lld\n", tperiods) ;
  if (instructions != 0) {
    fprintf(result_file, "  Micro miss rate:    %-7.3f PTI\n",
	    1000.0 * dscale(micro_tlb_misses,tperiods) / scaled_instructions) ;
    fprintf(result_file, "  Main miss rate:     %-7.3f PTI\n",
	    1000.0 * dscale(main_tlb_misses,tperiods) / scaled_instructions) ;
  }

  fprintf(result_file, "\n") ;
  fprintf(result_file, "  Branches:           %'lld", branches) ;
  fprintf(result_file, "  [%'lld]\n", scale(branches, bperiods)) ;
  fprintf(result_file, "  Mispredicted BR:    %'lld", mispredicts) ;
  fprintf(result_file, "  [%'lld]\n", scale(mispredicts, bperiods)) ;
  fprintf(result_file, "  BR periods:         %lld\n", bperiods) ;
  if (instructions != 0) {
    fprintf(result_file, "  Branch rate:        %-7.3f PTI\n",
	    1000.0 * dscale(branches,bperiods) / scaled_instructions) ;
  }
  if (branches != 0) {
    fprintf(result_file, "  Mispredict ratio:   %-7.3f\%\n",
	    ((double)mispredicts / (double)branches) * 100.0) ;
  }

  close_result_file() ;

  return( EXIT_SUCCESS ) ;
}