A simple ‘C’ program is included to extract the processor based timing information from D3HSP file and output load balance summary. Future versions of MPP-LS-DYNA may include more information such as the summary shown below.
Usage: load_balance_checker d3hsp_file_name {threshold_factor}
Source Preview
/*
Load Balance Diagnosis for MPP LS-DYNA
Author: Suri Bala, Livermore Software
Copyright: Livermore Software
Credits: Brian Wainscott, Jason Wang
Contact: suri@lstc.com
Usage: exe_name d3hsp_file_name {threshold_factor}
Description: Performs a load balance check for all processors
Compilation: cc source_file_name.c -o exe_file_name
*/
#include
#include
#include
#define LINE_WIDTH 128
int main(int argc, char *argv[]) {
FILE *d3hsp = NULL;
char buffer[LINE_WIDTH];
int i, num_threads=0, host_count=0;
float *hosts, ratio, sum =0, average_timing=1, min_timing=1e20, max_timing=-1e20;;
float threshold_factor = 0.2, max_threshold=0, min_threshold=0;
// if no d3hsp file is specified, exit out
if( argc == 1 ) {
printf("Usage: %s d3hsp_file n", argv[0]);
exit(-1);
}
// open the d3hsp file
d3hsp = fopen(argv[1], "r");
if( !d3hsp ) {
printf("Could not open file %sn", argv[1]);
exit(-2);
}
//if threshold is specified accept it
if( argc==3) {
sprintf(argv[2], "%10.4f", threshold_factor);
}
// start reading the lines from d3hsp file
while( !feof(d3hsp) ) {
fgets(buffer, LINE_WIDTH, d3hsp);
// get the num of threads
if( strncmp(buffer+1,"Parallel", 8) == 0 ) {
sscanf(buffer+25,"%d", &num_threads);
hosts = (float *) malloc(num_threads*sizeof(float));
}
// store the host based timing
if( buffer[25] == '#' ) {
sscanf(buffer+65," %e ", &hosts[host_count]);
if( hosts[host_count] > max_timing) max_timing = hosts[host_count];
if( hosts[host_count] < min_timing) min_timing = hosts[host_count];
sum += hosts[host_count];
++host_count;
}
}
average_timing = sum/num_threads;
max_threshold = (float)1.0+threshold_factor;
min_threshold = (float)1.0-threshold_factor;
fprintf(stdout, "n Load Balance Summary nn");
fprintf(stdout, " Processor Number Ratio Status Remarksn");
fprintf(stdout, " --------------------------------------------------------------------------------------------n");
for(i=0; i<host_count; i++)="" {="" ratio="hosts[i]/average_timing;" fprintf(stdout,="" "="" %10d="" %2.2f",="" i,="" ratio);="" if(=""> max_threshold) fprintf(stdout,"%20s%10s%20s","Overloaded"," "," Better decomposition is needed");
if( ratio < min_threshold ) fprintf(stdout,"%20s%10s%20s","Underloaded"," "," Better decomposition is needed");
fprintf(stdout,"n");
}
fprintf(stdout, " --------------------------------------------------------------------------------------------n");
fprintf(stdout, " Total number of threads: %10dn", num_threads);
fprintf(stdout, " Timing n");
fprintf(stdout, " Average CPU (seconds) : %10.4fn", average_timing);
fprintf(stdout, " Maximum CPU (seconds) : %10.4fn", max_timing);
fprintf(stdout, " Minimum CPU (seconds) : %10.4fn", min_timing);
fprintf(stdout, " Thresholds n");
fprintf(stdout, " Maximum threshold used : %10.4fn", max_threshold);
fprintf(stdout, " Minimum threshold used : %10.4fn", min_threshold);
fprintf(stdout, "nn");
fclose(d3hsp);
exit(0);
}
</host_count;>
Sample Output
Load Balance Summary
Processor Number Ratio Status Remarks
--------------------------------------------------------------------------------------------
0 0.81
1 0.86
2 0.87
3 1.00
4 1.36 Overloaded Better decomposition is needed
5 1.02
6 1.11
7 1.34 Overloaded Better decomposition is needed
8 1.14
9 0.89
10 0.82
11 0.78 Underloaded Better decomposition is needed
--------------------------------------------------------------------------------------------
Total number of threads: 12
Timing
Average CPU (seconds) : 31791.2500
Maximum CPU (seconds) : 43180.0000
Minimum CPU (seconds) : 24829.0000
Thresholds
Maximum threshold used : 1.2000
Minimum threshold used : 0.8000