//
// Naive matrix multiplication
//
//
// Author: P.J. Drongowski
// Date: 10 June 2013
//
// Copyright (c) 2013 Paul J. Drongowski
//
#include <stdlib.h>
#include <stdio.h>
#include "../test_common/test_common.h"
#include "../test_common/rpi_pmu.h"
#define RESULT_FILE_NAME "naive.txt"
#define MAX_MSIZE 1000
#define MSIZE 500
int matrix_size = MSIZE ;
float matrix_a[MSIZE][MSIZE] ;
float matrix_b[MSIZE][MSIZE] ;
float matrix_r[MSIZE][MSIZE] ;
//
// Run options (some not implemented)
//
int c_flag = 0 ; // Measure cache behavior
int i_flag = 0 ; // Measure IPC
int m_flag = 0 ; // Set matrix size to N
int t_flag = 0 ; // Measure TLB behavior
#define OPTIONS "cdhim:tv" // Currently unimplemented (getopt)
char *usage_strings[] = {
" -c Measure cache behavior",
" -d Display debugging information",
" -h Display usage information",
" -i Measure IPC",
" -m N Set the matrix dimensions to N elements",
" -t Measure TLB behavior",
" -v Display more information (verbose output)",
NULL
} ;
void initialize_matrices()
{
int i, j ;
for (i = 0 ; i < MSIZE ; i++) {
for (j = 0 ; j < MSIZE ; j++) {
matrix_a[i][j] = (float) rand() / RAND_MAX ;
matrix_b[i][j] = (float) rand() / RAND_MAX ;
matrix_r[i][j] = 0.0 ;
}
}
}
void multiply_matrices()
{
int i, j, k ;
for (i = 0 ; i < MSIZE ; i++) {
for (j = 0 ; j < MSIZE ; j++) {
float sum = 0.0 ;
for (k = 0 ; k < MSIZE ; k++) {
sum = sum + (matrix_a[i][k] * matrix_b[k][j]) ;
}
matrix_r[i][j] = sum ;
}
}
}
void measure_cpi()
{
initialize_matrices() ;
start_counting(ARMV6_EVENT_INSTR_EXEC, ARMV6_EVENT_CPU_CYCLES) ;
multiply_matrices() ;
stop_counting() ;
}
void measure_cache()
{
initialize_matrices() ;
start_counting(ARMV6_EVENT_DCACHE_CACCESS, ARMV6_EVENT_DCACHE_MISS) ;
//start_counting(ARMV6_EVENT_DCACHE_ACCESS, ARMV6_EVENT_DCACHE_CACCESS) ;
multiply_matrices() ;
stop_counting() ;
}
void measure_tlb()
{
initialize_matrices() ;
start_counting(ARMV6_EVENT_DTLB_MISS, ARMV6_EVENT_MAIN_TLB_MISS) ;
multiply_matrices() ;
stop_counting() ;
}
void run_no_events()
{
initialize_matrices() ;
multiply_matrices() ;
}
int main(int argc, char* argv[])
{
int arg_count ;
pid_t my_process_id ;
unsigned long outer, middle, inner, total ;
unsigned long long cycles ;
unsigned int cpu_speed ;
double elapsed = 0 ;
#ifdef _DEBUG
d_flag = 1 ;
#endif
// Parse any command line arguments. Use simple one character
// option names preceded by a '-' character.
for (arg_count = 1 ; arg_count < argc ; arg_count++)
{
if( argv[arg_count][0] == '-' )
{
switch( argv[arg_count][1] )
{
case 'c' :
{
// Measure data cache behavior
c_flag = 1 ;
break ;
}
case 'd' :
{
// Enable debug output
d_flag = 1 ;
break ;
}
case 'h' :
{
// Display usage information
display_usage_info(argv[0], usage_strings) ;
h_flag = 1 ;
break ;
}
case 'i' :
{
// Measure IPC
i_flag = 1 ;
break ;
}
case 'm' :
{
// Set matrix size
fprintf(stderr, "*warning* -m is unimplemented\n") ;
m_flag = 1 ;
arg_count++ ;
if (arg_count < argc)
{
matrix_size = atoi(argv[arg_count]) ;
if ((matrix_size <= 0) || (matrix_size > MAX_MSIZE)) {
fprintf(stderr, "*warning* Matrix size is limited to %d\n",
MAX_MSIZE) ;
matrix_size = MSIZE ;
}
}
break ;
}
case 't' :
{
// Measure TLB behavior
t_flag = 1 ;
break ;
}
case 'v' :
{
v_flag = 1 ;
break ;
}
default:
{
printf("Illegal option: %c\n", argv[arg_count][1]) ;
break ;
}
}
}
}
if (create_result_file(RESULT_FILE_NAME) == 0) {
exit( EXIT_FAILURE ) ;
}
if (c_flag) {
// Measure cache behavior
measure_cache() ;
}
else if (t_flag) {
// Measure TLB behavior
measure_tlb() ;
}
else if (i_flag) {
// Measure IPC (by default)
measure_cpi() ;
} else {
// Don't measurement any events
run_no_events() ;
}
print_heading("Naive matrix multiplication") ;
print_system_info() ;
// Number of ARM instructions in the loop nest
outer = 8 * MSIZE ;
middle = 9 * MSIZE * MSIZE ;
inner = 9 * MSIZE * MSIZE * MSIZE ;
total = outer + middle + inner ;
fprintf(result_file, "Run information\n") ;
fprintf(result_file, " Outer loop instructions: %lu\n", outer) ;
fprintf(result_file, " Middle loop instructions: %lu\n", middle) ;
fprintf(result_file, " Inner loop instructions: %lu\n", inner) ;
fprintf(result_file, " Total instructions: %lu\n", total) ;
elapsed = get_elapsed_time() ;
cpu_speed = get_cpu_speed() ; // MHz
cycles = (long long int)(elapsed * 1000000.0 * (double)cpu_speed) ;
fprintf(result_file, " Estimated cycles: %llu\n", cycles) ;
fprintf(result_file, " Cycles (scaled by 64): %llu\n", cycles / 64) ;
if (c_flag || i_flag || t_flag) {
fprintf(result_file,"Performance Monitor events\n") ;
print_counts(result_file) ;
}
fprintf(result_file, "\nRun execution summary\n") ;
print_process_times() ;
print_elapsed_time() ;
close_result_file() ;
return( EXIT_SUCCESS ) ;
}