#!/usr/local/bin/perl

# When run in a directory that has the appropriate files or given the
# directory name with -d, it extracts the time for traversals or another
# string as specified by the -s option from these files in a useful manner
# (for the graphs)
# Filename format must be factor, cachesize

sub usage_assert { # Parameter Boolean, string
    local($val, $str) = @_;
    if ($val) {return;}
    usage ($str);
}

sub usage { # Parameters: Error string
    local($err) = @_;
    print "ERROR: $err\n";
    $usage = join(' ', "extract_sensitivity_nums [-d <dir>] [-s datastring] [files ...]\n");
    print "$usage\n\n";
    exit;
}

sub rec_sort {
    ($acache, $afactorname, $afactor) = split("#", $a);
    ($bcache, $bfactorname, $bfactor) = split("#", $b);

    if ($afactorname ne $bfactorname) {
	return $afactorname cmp $bfactorname;
    }

    if ($acache != $bcache) {
	return $acache <=> $bcache;
    }
    return $afactor <=> $bfactor;
}

print "Options: @ARGV\n";

$dirname = ".";
$datastring = "Time for traversal";
for ($i = 0; $i <= $#ARGV; $i++) {
    $option = $ARGV[$i];
    #if (!($option =~ m#^-.*$#)) {
    #   last;
    #x}
    if ($option eq "-d") {
	$dirname = $ARGV[++$i];
    } elsif ($option eq "-s") {
	$datastring = $ARGV[++$i];
    } elsif ($option eq "--") {
	$i++;
	last;
    } else {
	usage "Bad option: $option";
    }
}

if ($i > $#ARGV) {
    usage_assert(-d $dirname, "Bad directory specified");
    $lsoutput = `ls $dirname`;

    @files = split("\n", $lsoutput);
} else {
    for (; $i < $#ARGV; $i++) {
	$files[$#files + 1] = $ARGV[$i];
    }
}


# We need numbers for same traversal and temperature to be on the same graph
# So create a hash table that maps traversal+temp to a list
# that contains <cachesize, pagesize, pageinfo> entries
# Then we sort on cachesize and inside that on pagesize

foreach $file (@files) {
    # Get the page size, cache size, traversal name, hot or cold
    ($factorval, $cache) = split("_", $file);
    $cache = substr($cache, 1);
    if ($factorval =~ /([\-a-zA-Z]+)(\d+)/) {
	$factor = $2;
	$factorname = $1;
    }
    open(FILE, "$dirname/$file");

    # Get the final time for traversal. Take the average
    $num = 0;
    $diff = 0;
    $data = 0;
    $replacement_cost = 0;
    while (<FILE>) {
	if (/$datastring\s*(=|:)\s+(.*)/) {
	    if ($data) {
		# Assumes 2 points only XXX
		$diff = $2 - $data;
	    }
	    $data += $2;
	    $num++;
	}
	if (/Total number of persistent objects:\s+(.*)/) {
	    $rotsize = 0 + $1;
	}
	if (/Stack scan time:\s+(.*)/) {
	    $replacement_cost += $1;
	}
	if (/Page scan time:\s+(.*)/) {
	    $replacement_cost += $1;
	}
	if (/Page compaction time:\s+(.*)/) {
	    $replacement_cost += $1;
	}
    }
    if (!$num) {
	print "No information about point: $file\n";
    } else {
	$data = $data/$num;
	$replacement_cost = $replacement_cost/$num;
    }
    close(FILE);

    # printf("Info:      %2s, %4s, %6s, %2s\n", $tname, $temp, $cache, $page);
    $key = "${cache}#${factorname}#${factor}";
    # Map the factor to array of records
    if (!$data) {$data = -1;}

    $rec = { CACHE => $cache, FACTORNAME => $factorname, FACTOR => $factor,
	     DATA => $data, DIFF => $diff * 100 / $data,
	     REPLACEMENT_COST => $replacement_cost};
    $trav_hash{$key} = $rec;
}

# Print out the results
$factorname = "NULL";
$cache = -1;
foreach $key (sort rec_sort keys %trav_hash) {
    $resrec = $trav_hash{$key};
    if ($$resrec{FACTORNAME} ne $factorname || $cache != $$resrec{CACHE}) {
	$factorname = $$resrec{FACTORNAME};
	$cache = $$resrec{CACHE};
	print "\n";
	printf "(* %s, Cache = %5d, Replacement costs in parens *)\n\n",
	$$resrec{FACTORNAME}, $cache + 0.5;
    }
    printf("%3d %6.2f (* %6.2fs, %4.2f\% *) \n",
	   $$resrec{FACTOR}, $$resrec{DATA}, $$resrec{REPLACEMENT_COST},
	   $$resrec{DIFF});
}
print "\n";
