#!/usr/bin/perl -w

# This program will parse out a table

use HTML::Parser;

our $rowcount = -1;
our $colcount = 0;
our @table;
our @text;

# set up our html parser
my $p = HTML::Parser->new(api_version => 3,
     start_h => [\&t_start_handler, "self,tagname,attr"],
     end_h => [\&t_end_handler, "self,tagname,attr"],
     report_tags => [qw(tr td th)],
    );
$p->parse_file(shift || die) || die $!;
# at this point the %table array of arrays should be populated

foreach $row (0 ..  $#table)
{
   print "row $row:";
   foreach $col (@{ $table[$row] })
   {
      print "\t$col";
   }
   print "\n";
}




############################################################
# subroutines start here
############################################################

sub t_start_handler
{
    my($self, $tag, $attr) = @_;
    if($tag eq 'tr')
    {
       $rowcount++;
       $colcount = 0;
    }
    if($tag eq 'td' || $tag eq 'th')
    {
       $self->handler(text => \&hash_text, "dtext");
       $colcount++;
    }
}

sub hash_text
{
   my $text = shift;
   chomp $text;
   $text =~ s/\s{2,}//g;
#   print $text;
   if($text =~ /^$/){ return };
   if($text =~ /^\s+$/){ return };
   push(@{ $table[$rowcount] }, $text);
}

sub t_end_handler
{
    my($self, $tag) = @_;

    $self->handler("text", undef);
    $self->handler("start", \&t_start_handler);
    $self->handler("end", undef);
}

