#!/usr/local/bin/perl

# When run in a directory that has the appropriate files or given the
# directory name with -d, it extracts the time for traversals or another
# string as specified by the -s option from these files in a useful manner
# (for the graphs)
# Filename format must be page, eviction, cachesize, tname, temp

sub usage_assert { # Parameter Boolean, string
    local($val, $str) = @_;
    if ($val) {return;}
    usage ($str);
}

sub usage { # Parameters: Error string
    local($err) = @_;
    print "ERROR: $err\n";
    $usage = join(' ', "extract_nums [-d <dir>] [-s datastring] [-addrot]",
    "-swizzle_overheads",
    "[-subtract-replacement] [-jgraph] [files ...]\n");  
    print "$usage\n\n";
    exit;
}

sub rec_sort {
    if ($$a{PAGE} != $$b{PAGE}) {
	return $$a{PAGE} <=> $$b{PAGE};
    }
    if ($$a{EVICT} != $$b{EVICT}) {
	return $$a{EVICT} <=> $$b{EVICT};
    }
    $$a{CACHE} <=> $$b{CACHE};
}

%hot_times = (t1 => 9.93, t3 => 14.12, t6 => 0.012688, t200 => 6.31,
	      "t89.11.0" => 17.324);
$addrot = 0;
$subrep = 0;
$dirname = ".";
$jgraph = 0;
$datastring = "Time for traversal";
print "Options: @ARGV\n";

for ($i = 0; $i <= $#ARGV; $i++) {
    $option = $ARGV[$i];
    if (!($option =~ m#^-.*$#)) {
	last;
    }
    if ($option eq "-d") {
	$dirname = $ARGV[++$i];
    } elsif ($option eq "-s") {
	$datastring = $ARGV[++$i];
    } elsif ($option eq "-swizzle_overheads") {
        $swizzle_overheads = 1;
    } elsif ($option eq "-addrot") {
	$addrot = 1;
    } elsif ($option eq "-subtract-replacement") {
	$subrep = 1;
    } elsif ($option eq "-jgraph") {
	$jgraph = 1;
    } elsif ($option eq "--") {
	$i++;
	last;
    } else {
	usage "Bad option: $option";
    }
}

if ($i > $#ARGV) {
    usage_assert(-d $dirname, "Bad directory specified");
    $lsoutput = `ls $dirname`;

    @files = split("\n", $lsoutput);
} else {
    for (; $i < $#ARGV; $i++) {
	$files[$#files + 1] = $ARGV[$i];
    }
}

# We need numbers for same traversal and temperature to be on the same graph
# So create a hash table that maps traversal+temp to a list
# that contains <cachesize, pagesize, pageinfo> entries
# Then we sort on cachesize and inside that on pagesize

foreach $file (@files) {
    # Get the page size, cache size, traversal name, hot or cold
    ($page, $eviction, $cache, $tname, $temp) = split("_", $file);
    $cache = substr($cache, 1);
    $page = substr($page, 1);
    $eviction = substr($eviction, 1);
    open(FILE, "$dirname/$file");

    # Get the final time for traversal. Take the average
    $num = 0;
    $diff = 0;
    $data = 0;
    $replacement_cost = 0;
    $fetch_install_time = 0;
    while (<FILE>) {
	if (/$datastring\s*(=|:)\s+(.*)/) {
	    if ($data) {
		# Assumes 2 points only XXX
		$diff = $2 - $data;
	    }
	    $data += $2;
	    $num++;
	}
	if (/Total number of persistent objects:\s+(.*)/) {
	    $rotsize = 0 + $1;
	}
	if ($swizzle_overheads && /Stack scan time:\s+(.*)/) {
	    $replacement_cost += $1;
	}
	if (/including pc bookkeeping\):\s+(.*)/) {
	    $fetch_install_time += $1;
	}
	if (/Page scan time:\s+(.*)/) {
	    $replacement_cost += $1;
	}
	if ($swizzle_overheads && /Page compaction time:\s+(.*)/) {
	    $replacement_cost += $1;
	}
    }
    if (!$num) {
	print "No information about point: $file\n";
    } else {
	$data = $data/$num;
	$replacement_cost = $replacement_cost/$num;
	$fetch_install_time = $fetch_install_time/$num;
    }
    close(FILE);

    if ($addrot) {
	$cache += $rotsize/64;
	if ($rotsize == 0) {
	    die "Zero objects in ROT? ($file)";
	}
    }
    # printf("Info:      %2s, %4s, %6s, %2s\n", $tname, $temp, $cache, $page);
    $key = "$tname$temp";
    $travinfo = $trav_hash{$key};
    # Map the traversal+temperature to array of records
    $trav_hot_time = $hot_times{$tname};
    $swizzle_time = $data - $fetch_install_time - $trav_hot_time;
    # print "$tname X $temp $cache $trav_hot_time $fetch_install_time $data\n";
    if ($swizzle_overheads) {
        $data -= $swizzle_time;
    }
    if ($subrep) {
	$data -= $replacement_cost;
    }

    if (!$data) {$data = -1;}

    $rec = { CACHE => $cache, PAGE => $page, EVICT => $eviction,
	     DATA => $data, SWIZZLE => $swizzle_time,
	     DIFF => $diff * 100 / $data};
    push(@{$travinfo}, $rec);
    $trav_hash{$key} = $travinfo;
}

$lc = ""; $rc = ""; $comma = ",";
if ($jgraph) {
    $lc = "(*";
    $rc = "*)";
    $comma = "";
}

foreach $trav (keys %trav_hash) {
    print "\n$trav\n";
    $travinfo = $trav_hash{"$trav"};
    @{$travinfo} = sort rec_sort @{$travinfo};
    $page = -1;
    $eviction = -1;
    foreach $rec (@{$trav_hash{"$trav"}}) {
	if ($page != $$rec{PAGE} || $eviction != $$rec{EVICT}) {
	    # New page info
	    $page = $$rec{PAGE};
	    $eviction = $$rec{EVICT};
	    print "\n\n$lc Page Size = ${page}K, Eviction = $eviction$rc\n   ";
	}
	if ($swizzle_overheads) {
	    printf("%8.2f$comma %8.2f$comma $lc sw = %5.2f (%4.2f\%) $rc\n   ",
		$$rec{CACHE}/1024.0, $$rec{DATA}, $$rec{SWIZZLE},
		$$rec{DIFF});
	} else {
	    printf("%8.2f$comma %8.2f$comma $lc (%4.2f\%) $rc\n   ",
		$$rec{CACHE}/1024.0, $$rec{DATA}, $$rec{DIFF});
	}
    }
    print "\n";
}
