#!/usr/bin/perl # # 911Reportparse -- by greenfly # # This script takes the 911 Report PDF (or .txt file converted by pdftotext) # and splits it into text chapters, for easier eBook reading. # # you can get a copy of the report here: # http://i.a.cnn.net/cnn/US/resources/9.11.report/911Report.pdf # $report = shift; if($report =~ /\.pdf$/) { print "Converting PDF to TXT... "; system("pdftotext $report"); print "done.\n"; } $report =~ /(.*)\.(txt|pdf)/i; $basename = $1; $chapter = 0; $nextchapter = 1; open INFILE, "$basename.txt"; open OUTFILE, "> $basename-Preface.txt"; while() { if(/^$nextchapter [A-Z "'-?.,]+$/) { $chapter++; $nextchapter++; $filename = "$basename-Chapter" . sprintf("%02d", $chapter) . ".txt"; print "Creating $filename\n"; close OUTFILE; open OUTFILE, "> $filename"; } if(/^APPENDIX ([A-Z]) [A-Z "'-?.,]+$/) { $chapter = "Appendix$1"; $filename = "$basename-$chapter.txt"; print "Creating $filename\n"; close OUTFILE; open OUTFILE, "> $filename"; } print OUTFILE; } close OUTFILE; close INFILE;