// // Naive matrix multiplication // // // Author: P.J. Drongowski // Date: 10 June 2013 // // Copyright (c) 2013 Paul J. Drongowski // #include <stdlib.h> #include <stdio.h> #include "../test_common/test_common.h" #include "../test_common/rpi_pmu.h" #define RESULT_FILE_NAME "naive.txt" #define MAX_MSIZE 1000 #define MSIZE 500 int matrix_size = MSIZE ; float matrix_a[MSIZE][MSIZE] ; float matrix_b[MSIZE][MSIZE] ; float matrix_r[MSIZE][MSIZE] ; // // Run options (some not implemented) // int c_flag = 0 ; // Measure cache behavior int i_flag = 0 ; // Measure IPC int m_flag = 0 ; // Set matrix size to N int t_flag = 0 ; // Measure TLB behavior #define OPTIONS "cdhim:tv" // Currently unimplemented (getopt) char *usage_strings[] = { " -c Measure cache behavior", " -d Display debugging information", " -h Display usage information", " -i Measure IPC", " -m N Set the matrix dimensions to N elements", " -t Measure TLB behavior", " -v Display more information (verbose output)", NULL } ; void initialize_matrices() { int i, j ; for (i = 0 ; i < MSIZE ; i++) { for (j = 0 ; j < MSIZE ; j++) { matrix_a[i][j] = (float) rand() / RAND_MAX ; matrix_b[i][j] = (float) rand() / RAND_MAX ; matrix_r[i][j] = 0.0 ; } } } void multiply_matrices() { int i, j, k ; for (i = 0 ; i < MSIZE ; i++) { for (j = 0 ; j < MSIZE ; j++) { float sum = 0.0 ; for (k = 0 ; k < MSIZE ; k++) { sum = sum + (matrix_a[i][k] * matrix_b[k][j]) ; } matrix_r[i][j] = sum ; } } } void measure_cpi() { initialize_matrices() ; start_counting(ARMV6_EVENT_INSTR_EXEC, ARMV6_EVENT_CPU_CYCLES) ; multiply_matrices() ; stop_counting() ; } void measure_cache() { initialize_matrices() ; start_counting(ARMV6_EVENT_DCACHE_CACCESS, ARMV6_EVENT_DCACHE_MISS) ; //start_counting(ARMV6_EVENT_DCACHE_ACCESS, ARMV6_EVENT_DCACHE_CACCESS) ; multiply_matrices() ; stop_counting() ; } void measure_tlb() { initialize_matrices() ; start_counting(ARMV6_EVENT_DTLB_MISS, ARMV6_EVENT_MAIN_TLB_MISS) ; multiply_matrices() ; stop_counting() ; } void run_no_events() { initialize_matrices() ; multiply_matrices() ; } int main(int argc, char* argv[]) { int arg_count ; pid_t my_process_id ; unsigned long outer, middle, inner, total ; unsigned long long cycles ; unsigned int cpu_speed ; double elapsed = 0 ; #ifdef _DEBUG d_flag = 1 ; #endif // Parse any command line arguments. Use simple one character // option names preceded by a '-' character. for (arg_count = 1 ; arg_count < argc ; arg_count++) { if( argv[arg_count][0] == '-' ) { switch( argv[arg_count][1] ) { case 'c' : { // Measure data cache behavior c_flag = 1 ; break ; } case 'd' : { // Enable debug output d_flag = 1 ; break ; } case 'h' : { // Display usage information display_usage_info(argv[0], usage_strings) ; h_flag = 1 ; break ; } case 'i' : { // Measure IPC i_flag = 1 ; break ; } case 'm' : { // Set matrix size fprintf(stderr, "*warning* -m is unimplemented\n") ; m_flag = 1 ; arg_count++ ; if (arg_count < argc) { matrix_size = atoi(argv[arg_count]) ; if ((matrix_size <= 0) || (matrix_size > MAX_MSIZE)) { fprintf(stderr, "*warning* Matrix size is limited to %d\n", MAX_MSIZE) ; matrix_size = MSIZE ; } } break ; } case 't' : { // Measure TLB behavior t_flag = 1 ; break ; } case 'v' : { v_flag = 1 ; break ; } default: { printf("Illegal option: %c\n", argv[arg_count][1]) ; break ; } } } } if (create_result_file(RESULT_FILE_NAME) == 0) { exit( EXIT_FAILURE ) ; } if (c_flag) { // Measure cache behavior measure_cache() ; } else if (t_flag) { // Measure TLB behavior measure_tlb() ; } else if (i_flag) { // Measure IPC (by default) measure_cpi() ; } else { // Don't measurement any events run_no_events() ; } print_heading("Naive matrix multiplication") ; print_system_info() ; // Number of ARM instructions in the loop nest outer = 8 * MSIZE ; middle = 9 * MSIZE * MSIZE ; inner = 9 * MSIZE * MSIZE * MSIZE ; total = outer + middle + inner ; fprintf(result_file, "Run information\n") ; fprintf(result_file, " Outer loop instructions: %lu\n", outer) ; fprintf(result_file, " Middle loop instructions: %lu\n", middle) ; fprintf(result_file, " Inner loop instructions: %lu\n", inner) ; fprintf(result_file, " Total instructions: %lu\n", total) ; elapsed = get_elapsed_time() ; cpu_speed = get_cpu_speed() ; // MHz cycles = (long long int)(elapsed * 1000000.0 * (double)cpu_speed) ; fprintf(result_file, " Estimated cycles: %llu\n", cycles) ; fprintf(result_file, " Cycles (scaled by 64): %llu\n", cycles / 64) ; if (c_flag || i_flag || t_flag) { fprintf(result_file,"Performance Monitor events\n") ; print_counts(result_file) ; } fprintf(result_file, "\nRun execution summary\n") ; print_process_times() ; print_elapsed_time() ; close_result_file() ; return( EXIT_SUCCESS ) ; }