#!/usr/local/bin/perl

# When run in a directory that has the appropriate files or given the
# directory name with -d, it extracts the time for traversals or another
# string as specified by the -s option from these files in a useful manner
# (for the graphs)

sub usage_assert { # Parameter Boolean, string
    local($val, $str) = @_;
    if ($val) {return;}
    usage ($str);
}

sub usage { # Paramaters: Error string
    local($err) = @_;
    print "ERROR: $err\n";
    $usage = join(' ', "extract_nums [-d <dir>] [-s datastring]\n");
    print "$usage\n\n";
    exit;
}

sub rec_sort {
    if ($$a{PAGE} != $$b{PAGE}) {
	return $$a{PAGE} <=> $$b{PAGE};
    }
    if ($$a{EVICT} != $$b{EVICT}) {
	return $$a{EVICT} <=> $$b{EVICT};
    }
    $$a{CACHE} <=> $$b{CACHE};
}

$nargs = $#ARGV + 1;
$dirname = ".";
$datastring = "Time for traversal";
for ($i = 0; $i < $nargs; $i += 2) {
    $option = @ARGV[$i];
    $val = @ARGV[$i+1];
    if ($option eq "-d") {
	$dirname = $val;
    } elsif ($option eq "-s") {
	$datastring = $val;
    } else {
	usage "Bad option: $option $val";
    }
}

usage_assert(-d $dirname, "Bad directory specified");
$lsoutput = `ls $dirname`;
# We need numbers for same traversal and temperature to be on the same graph
# So create a hash table that maps traversal+temp to a list
# that contains <cachesize, pagesize, pageinfo> entries
# Then we sort on cachesize and inside that on pagesize


@files = split("\n", $lsoutput);
foreach $file (@files) {
    # Get the page size, cache size, traversal name, hot or cold
    ($page, $eviction, $cache, $tname, $temp) = split("_", $file);
    $cache = substr($cache, 1);
    $page = substr($page, 1);
    $eviction = substr($eviction, 1);
    open(FILE, "$dirname/$file");

    # Get the final time for traversal. Take the average
    $num = 0;
    $data = $compaction_time =  $stackscan_time = $pagescan_time = 0;
    $total_fetch_time = $total_scans = $total_fetches = 0;
    $total_scans = -1;
    $total_fetches = -1;
    while (<FILE>) {
	if (/$datastring\s+=\s+(.*)/) {
	    $data += $1;
	    $num++;
	} elsif (/Page compaction time:\s+(.*)/) {
	    $compaction_time += $1;
	} elsif (/Stack scan time:\s+(.*)/) {
	    $stackscan_time += $1;
	} elsif (/Page scan time:\s+(.*)/) {
	    $pagescan_time += $1;
	} elsif (/Total fetch time.*including.*:\s+(.*)/) {
	    $total_fetch_time += $1;
	} elsif (/Number of scans.*compactions.*:\s+(.*)/) {
	    $total_scans += $1;
	} elsif (/Number of fetches.*:\s+(.*)/) {
	    $total_fetches += $1;
	}
    }
    if (!$num) {
	print "No information about point: $file\n";
	$num = -1;
    }
    $data = $data/$num;
    close(FILE);

    # printf("Info:      %2s, %4s, %6s, %2s\n", $tname, $temp, $cache, $page);
    $key = "$tname-$temp";
    $travinfo = $trav_hash{$key};
    # Map the traversal+temperature to array of records
    $rec = { CACHE => $cache, PAGE => $page, EVICT => $eviction, DATA => $data,
	     COMPACTION_TIME  => $compaction_time/$total_scans,
	     STACKSCAN_TIME   => $stackscan_time/$total_scans,
	     PAGESCAN_TIME    => $pagescan_time/$total_scans,
	     TOTAL_FETCH_TIME => $total_fetch_time/$total_fetches,
	     TOTAL_SCANS      => $total_scans/$num
	     };
    push(@{$travinfo}, $rec);
    $trav_hash{$key} = $travinfo;
}

foreach $trav (keys %trav_hash) {
    print "\nTRAVERSAL: $trav\n";
    $travinfo = $trav_hash{"$trav"};
    @{$travinfo} = sort rec_sort @{$travinfo};
    $page = -1;
    $eviction = -1;
    $num_entries = 0;
    $num_csizes = 0;
    @travpoints = @{$trav_hash{"$trav"}};
    $count = 0;
    foreach $rec (@travpoints) {
	if ($page != $$rec{PAGE} || $eviction != $$rec{EVICT}) {
	    # New page info
	    if ($page != -1) {
		if (!$num_entries) { $num_entries = -1;}
		$avg_perc_overhead = $avg_perc_overhead/$num_entries;
		printf("\nAvg overhead percent = %5.2f\n", $avg_perc_overhead);
		print "PAGE END\n\n";
		$avg_perc_overhead = 0;
		$num_entries = 0;
	    }
	    $page = $$rec{PAGE};
	    $eviction = $$rec{EVICT};
	    print "\n\nPage Size = ${page}K, Eviction = $eviction\n";
	    if (!$eviction) {
		$num_csizes = 0;
	    }
	}
	$total_overheads = ($$rec{COMPACTION_TIME} + $$rec{STACKSCAN_TIME} +
	    $$rec{PAGESCAN_TIME}) * $$rec{TOTAL_SCANS};
	if ($eviction) {
	    $total_overheads -= $$rec{PAGESCAN_TIME} * $$rec{TOTAL_SCANS};
	    $oldrec = $travpoints[$count - $num_csizes];
	    #print " CCC $count $num_csizes ::: $$rec{DATA} ";
	    #print "$total_overheads,$$oldrec{DATA} $$oldrec{TOTAL_OVERHEADS}\n";
	    $bgratio = $$rec{DATA}/$$oldrec{DATA};
	    $fgratio = ($$rec{DATA} + $total_overheads)/
		($$oldrec{DATA} + $$oldrec{TOTAL_OVERHEADS});
	    # print "${page}, $eviction $$rec{CACHE} :: Prev is ";
	    # print "$$oldrec{PAGE} $$oldrec{EVICT} $$oldrec{CACHE}\n";
	} else {
	    $num_csizes++;
	}
	$$rec{TOTAL_OVERHEADS} = $total_overheads;
	$trav_perc = 100 * $total_overheads /$$rec{DATA};
	printf("$$rec{CACHE} :: BG: %6.4f, FG: %6.4f",
	       $$rec{DATA}, $$rec{DATA} + $total_overheads);

	printf("$$rec{CACHE} :: Overheads = %5.2f ms, Perc = %5.2f, ",
	       1000*$total_overheads/$$rec{TOTAL_SCANS}, $trav_perc);
	if (0 && $eviction) {
	    $diff_ratio = 100 * (1 - $fgratio/$bgratio);
	    printf("BG = %5.5f, FG = %5.5f (%4.2f)",
		   $bgratio, $fgratio, $diff_ratio);
	}
	print "\n";

	if ($$rec{TOTAL_SCANS} > 0) {
	    $avg_perc_overhead += $trav_perc;
	    $num_entries++;
	}
	#printf("%5s :: Compact: %.4g, Stack: %.4g, Page: %.4g, Fetch: %.4g\n",
	#$$rec{CACHE}, $$rec{COMPACTION_TIME},  $$rec{STACKSCAN_TIME},
	#       $$rec{PAGESCAN_TIME}, $$rec{TOTAL_FETCH_TIME});
	$count++;
    }
    if (!$num_entries) { $num_entries = -1;}
    $avg_perc_overhead = $avg_perc_overhead/$num_entries;
    printf("\nAvg overhead percent = %5.2f\n", $avg_perc_overhead);
    print "PAGE END\n\n";
    $avg_perc_overhead = 0;
    $num_entries = 0;
}
