#!/usr/bin/perl
#
# 911Reportparse -- by greenfly <greenfly@greenfly.org>
#
# This script takes the 911 Report PDF (or .txt file converted by pdftotext)
# and splits it into text chapters, for easier eBook reading.
#
# you can get a copy of the report here:
# http://i.a.cnn.net/cnn/US/resources/9.11.report/911Report.pdf
#

$report = shift;

if($report =~ /\.pdf$/)
{
   print "Converting PDF to TXT... ";
   system("pdftotext $report");
   print "done.\n";
}

$report =~ /(.*)\.(txt|pdf)/i;
$basename = $1;
$chapter = 0;
$nextchapter = 1;

open INFILE, "$basename.txt";

open OUTFILE, "> $basename-Preface.txt";

while(<INFILE>)
{
   if(/^$nextchapter [A-Z "'-?.,]+$/)
   {
      $chapter++;
      $nextchapter++;
      $filename = "$basename-Chapter" . sprintf("%02d", $chapter) . ".txt";
      print "Creating $filename\n";
      close OUTFILE;
      open OUTFILE, "> $filename";
   }
   if(/^APPENDIX ([A-Z]) [A-Z "'-?.,]+$/)
   {
      $chapter = "Appendix$1";
      $filename = "$basename-$chapter.txt";
      print "Creating $filename\n";
      close OUTFILE;
      open OUTFILE, "> $filename";
   }

   print OUTFILE;
}

close OUTFILE;

close INFILE;

