#!/usr/bin/perl

#=====================================================================
#
# spamgraph -- by greenfly <greenfly@greenfly.org>
#              version 0.10.0
#
# usage: spamgraph path/to/Maildir/.folder/cur/ [/path/to/mbox] [...]
#
# This script generates a graph of Spam scores versus # of messages
# with that score. Pass a Maildir or mbox as an argument on the command line
# and it will tally the spam scores for all messages in that folder
# and output the graph in <foldername>.png so if you pass
# /home/foo/Maildir/.quux/cur to the script, the graph will be quux.png
#
# The script works for both mbox and Maildir formats
#
# To run this on all of your Maildirs:
#    $ find Maildir/ -type d -name "*cur*" -exec spamgraph "{}" \;
#
#=====================================================================

use GD::Graph::linespoints;
use strict;

my $foldername;
my $filename;
my $outputdir;
my $file;
our %score;

foreach(@ARGV)
{
   $foldername = $_;
   $filename = $foldername;
   $outputdir = ".";	# The directory to put the .png file

   if(-d $foldername)
   {
# strip out directory info to find the maildir name
      $filename =~ s/\/cur\/*//;
      $filename =~ s/.*\/\.//;

# read all the files in the specified directory
      opendir(DIR, $foldername) or die "can't opendir $foldername: $!";
      while(defined($file = readdir(DIR)))
      { 
	 parse_file("$foldername/$file");
      } 
   }
   elsif(-f $foldername)
   {
# strip out directory info to find the mbox name
      $filename =~ s/^.*\/\.?//;
      parse_file($foldername);
   }
   else
   {
      die "$foldername not a valid mail folder\n";
   }

   graph_data($outputdir, $filename);
}


#===========================================================
# subroutines
#===========================================================
sub graph_data
{
   my $outputfile = shift;
   my $filename = shift;
   my (@x, @y);
   my $width;
   my $y_max;
   my @data;

# extract out the x and y coords from the score
   foreach(sort {$a <=> $b} keys %score)
   {
      push @x, $_;
      push @y, $score{$_};
      if($score{$_} > $y_max){ $y_max = $score{$_}; }	# figure out the max y coordinate
   }

   $y_max = int $y_max * 1.1;	# set the graph to be a bit larger than the max y coordinate

   unless(defined @x){ die "No points to graph\n";}

   @data = ( [@x], [@y] );

# width is 8 pixels per x coordinate, or 400 pixels minimum
   $width = int $#x * 8;
   if($width < 400){ $width = 400; }

   my $graph = GD::Graph::linespoints->new($width, 300);

   $graph->set(
   x_label		=> 'Spam Score',
   y_label		=> '# of messages',
   title		=> "Spam Score for $filename",
   x_label_skip		=> int ($#x / 20),
   marker_size		=> 2,
   y_max_value		=> $y_max,
   y_tick_number	=> 10,
   y_number_format	=> '%d',
   ) or die $graph->error;

   my $gd = $graph->plot(\@data) or die $graph->error;

# this outputs as .png but can be changed. Read GD::Graph docs for how
   print "outputting to $filename.png\n";
   open(IMG, ">$outputdir/$filename.png") or die $!;
   binmode IMG;
   print IMG $gd->png;
   close IMG;
}

sub parse_file
{
   my $file = shift;
   open FILE, "$file";
   while(<FILE>)
   {
# parse out the spam score and increment the counter for that score
      if(/^X-Spam-Status.*(?:score|hits)=(\-?\d+).(\d+)/)
      {
	 $score{"$1"}++; 
      }
   }
   close FILE;
}
