/*
  Load Balance Diagnosis for MPP LS-DYNA
  Author: Suri Bala, Livermore Software
  Copyright: Livermore Software
  Credits: Brian Wainscott, Jason Wang
  Contact: suri@lstc.com
  Usage: exe_name d3hsp_file_name {threshold_factor}
  Description: Performs a load balance check for all processors
  Compilation: cc source_file_name.c -o exe_file_name
*/
#include<stdio.h>
#include<stdarg.h>
#include<malloc.h>

#define LINE_WIDTH 128

int main(int argc, char *argv[]) {

  FILE *d3hsp = NULL;
  char buffer[LINE_WIDTH];
  int i, num_threads=0, host_count=0;
  float *hosts, ratio, sum =0, average_timing=1, min_timing=1e20, max_timing=-1e20;;
  float threshold_factor = 0.2, max_threshold=0, min_threshold=0;


  // if no d3hsp file is specified, exit out
  if( argc == 1 ) {
     printf("Usage: %s d3hsp_file \n", argv[0]);
     exit(-1);
  }

  // open the d3hsp file
  d3hsp = fopen(argv[1], "r");
  if( !d3hsp ) {
    printf("Could not open file %s\n", argv[1]);
    exit(-2);
  }

  //if threshold is specified accept it
  if( argc==3) {
    sprintf(argv[2], "%10.4f", threshold_factor);
  }

  // start reading the lines from d3hsp file
  while( !feof(d3hsp) ) {
      fgets(buffer, LINE_WIDTH, d3hsp);
      // get the num of threads
      if( strncmp(buffer+1,"Parallel", 8) == 0 )  {
          sscanf(buffer+25,"%d", &num_threads);
	  hosts = (float *) malloc(num_threads*sizeof(float));
      }
      // store the host based timing
      if( buffer[25] == '#' ) {
          sscanf(buffer+65," %e ", &hosts[host_count]);
	  if( hosts[host_count] > max_timing) max_timing = hosts[host_count];
	  if( hosts[host_count] < min_timing) min_timing = hosts[host_count];
	  sum += hosts[host_count];
	  ++host_count;
      }
  }

  average_timing = sum/num_threads;
  max_threshold = (float)1.0+threshold_factor;
  min_threshold = (float)1.0-threshold_factor;

  fprintf(stdout, "\n  Load Balance Summary \n\n");
  fprintf(stdout, "     Processor Number            Ratio           Status            Remarks\n");
  fprintf(stdout, " --------------------------------------------------------------------------------------------\n");
  for(i=0; i<host_count; i++) {
      ratio =  hosts[i]/average_timing;
      fprintf(stdout, "     %10d                  %2.2f", i, ratio);
      if( ratio > max_threshold) fprintf(stdout,"%20s%10s%20s","Overloaded"," "," Better decomposition is needed");
      if( ratio < min_threshold ) fprintf(stdout,"%20s%10s%20s","Underloaded"," "," Better decomposition is needed");
      fprintf(stdout,"\n");
  }
  fprintf(stdout, " --------------------------------------------------------------------------------------------\n");
  fprintf(stdout, "     Total number of threads: %10d\n", num_threads);
  fprintf(stdout, "     Timing \n");
  fprintf(stdout, "     Average CPU (seconds)  : %10.4f\n", average_timing);
  fprintf(stdout, "     Maximum CPU (seconds)  : %10.4f\n", max_timing);
  fprintf(stdout, "     Minimum CPU (seconds)  : %10.4f\n", min_timing);
  fprintf(stdout, "     Thresholds \n");
  fprintf(stdout, "     Maximum threshold used : %10.4f\n", max_threshold);
  fprintf(stdout, "     Minimum threshold used : %10.4f\n", min_threshold);
  fprintf(stdout, "\n\n");

  fclose(d3hsp);
  exit(0);

}
