#!/usr/local/bin/perl

# Usage : stats
# Generates a report of some useful or interesting statistics about the
# current state of the database.

$dbdir = "/home/vipera/detn_db/db";
$datadir = "/home/vipera/detn_db/data/plotdata";
$htmldir = "/home/vipera/detn_db/html";

# Search db.log for number of pages
open(DBLOG, "$htmldir/db.log") || die "Couldn't open $htmldir/db.log!\n";
while (<DBLOG>) {
  if ($_ =~ /pages/){
     $_ =~ s/^[\w\W]*\(//;
     $_ =~ s/\s[\w\W]*$//;
     $pages = $_;
  }
}

# Count entries in db.bbl file
$refs = 0;
open(DBBBL, "$htmldir/db.bbl") || die "Couldn't open $htmldir/db.bbl!\n";
while (<DBBBL>) {
  if ($_ =~ /bibitem/) { $refs++ }
}

# Count entries in db.lof file (number of figures / summary graphs)
$figs = 0;
open(DBLOF, "$htmldir/db.lof") || die "Couldn't open $htmldir/db.lof!\n";
while (<DBLOF>)
  { $figs++; }

print "\n";
print "General Statistics (since last LaTeX build):\n";
print "Hardcopy pages:          $pages\n";
print "Number of references:    $refs\n";
print "Number of figures:       $figs  (summary graphs)\n";
print "\n";

open(DATALIST, "$dbdir/datalist") || die "Couldn't open $dbdir/datalist!\n";

$nlines = 0;
$nfiles = 0;
$nused = 0;
$_ = <DATALIST>;
$_ = <DATALIST>;
foreach $file (<DATALIST>) {
   $nlines++;
   @fields = split(/\t/,$file);
   if (($fields[1] eq "Y") && ($fields[2] eq "Y")) {
      $nused++;
      push(@usedfiles, $fields[0]);
   }
   if ($fields[0] ne "") { $nfiles++; }
   push(@files, $fields[0]);
}

print "Datalist Statistics:\n";
print "Total lines in datalist: $nlines\n";
print "Total files in datalist: $nfiles\n";
print "Total files used:        $nused   (datasets in document)\n";
print "\n";

$dfiles = 0;
open(DATAFILES, "cd $datadir; ls *.txt |") || die "Couldn't open input pipe!\n";
foreach $file (<DATAFILES>) {
   $dfiles++;
   $match1 = 0;
   $match2 = 0;
   chop($file);
   $file =~ s/\.txt//;
   foreach $used (@files) {
     if ($file eq $used) { $match1++; }
   }
   foreach $used (@usedfiles) {
     if ($file eq $used) { $match2++; }
   }
   if ($match2 == 0) { push(@notused, $file); }
   if ($match1 == 0) { push(@notmentioned, $file); }
}

print "Raw Data Set Statistics:\n";
print "Total data sets:         $dfiles  (in data/plotdata)\n";
print "Total unused data sets:  $#notused  (not used in final document)\n";
print "Total unmentioned sets:  $#notmentioned  (not mentioned in datalist)\n";
print "\n";
print "Unused data sets: \n";
print join(" ",@notused), "\n";
print "\n";
print "Unmentioned data sets: \n";
print join(" ",@notmentioned), "\n";

