#!/usr/bin/perl -w

######################################################################
# rss-arsforum -- by greenfly <greenfly@greenfly.org>
#
# This program will parse a specified Ars Forum Index page,
# then create an RSS feed based on that named <forum name>.rss
#
# usage: rss-arsforum [<forum name>]
# 
######################################################################

use LWP::UserAgent;
use HTTP::Request::Common;
use XML::RSS;
use Date::Manip qw(ParseDate UnixDate Date_Cmp);

my $default = 'Linux Kung Fu';
$ARGV[0] = $default unless $ARGV[0];

our $forum;
our $forum_title = $ARGV[0];

our %fora = (
	 'Audio/Visual Club' 			=> "67909965",
	 'Case and Cooling Fetish' 		=> "77909585",
	 'CPU & Motherboard Technologia' 	=> "77909774",
	 'Mobile Computing Outpost' 		=> "579009962631",
	 'Networking Matrix' 			=> "469092836",
	 'Other Hardware' 			=> "24609792",
	 'Agora Classifieds' 			=> "57909216",
	 'Battlefront' 				=> "48409524",
	 'Microsoft OS & Software Colloquium' 	=> "99609816",
	 'Linux Kung Fu' 			=> "96509133",
	 'NT, Win2K, & XP Technical Mojo' 	=> "12009443",
	 'Distributed Computing Arcana' 	=> "122097561",
	 'Macintoshian Achaia' 			=> "8300945231",
	 "Programmer's Symposium" 		=> "6330927813",
	 'Gaming, Extra Strength Caplets' 	=> "39309975",
	 'The Lounge' 				=> "34709834",
	 'The Soap Box' 			=> "28609695",
	 'The Boardroom' 			=> "599009962631",
	 'Ars Technica News & Discussion' 	=> "174096756",
	 'The Velvet Room' 			=> "8390901411",
	 'OpenForum Feedback & Suggestions' 	=> "51009562",
	 'Ars PDF Technical Library'	 	=> "5850957912",
	 'Subscription Support & Service' 	=> "6490940022",
	 'Ars Emporium Customer Service' 	=> "6550932203",
	 "The Moderators' Quorum" 		=> "4680902032",
	 "The Writers' Block" 			=> "6220903374",
	 "Dungeon Masters' back room" 		=> "103007483631",
);


$forum = $fora{$ARGV[0]};

our $base = "http://episteme.arstechnica.com/eve/ubb.x/";
our $site = "50009562";
our $index = $base . "a/frm/f/$forum";
our %index;


# grab the Forum's index page
print "Get $ARGV[0] Index\n   " if($output_grab); 
$page = grab_page($index) or die "\nCan't open $index, nothing to parse!\n"; 

# then parse it
parse_index($page);

# then output the rss
create_rss();


#==================================================
# subroutines
#==================================================


sub by_date
{
   $datea = ParseDate($index{$a}{'date'});
   $dateb = ParseDate($index{$b}{'date'});
   Date_Cmp($dateb,$datea);
}


sub grab_page
{
   my $url = shift;
   my $page;

   $| = 1;
   print "Grabbing $url..." if($output_grab);
   my $ua = new LWP::UserAgent;
   my $res = $ua->request(GET "$url");
   if($res->is_success)
   {
      print "done\n" if($output_grab);
      $page = $res->content;
      return $page;
   }
   else
   {
      return 0;
   }

}

sub parse_index
{
   my $page = shift;
   my ($message, $title, $lastpage, $author);

   while($page =~ /<td class="ev_ubbx_frm_topicicon".*?<td.*?class="ev_ubbx_frm_title".*?<a href="([^"]*")[^>]*>([^<]*)<\/a>(.*?)<td.*?class="ev_ubbx_frm_author">([^<]*)<\/td>.*?<noscript>(\w+ \d\d, \d\d\d\d.*?\d\d:\d\d)<\/noscript>.*?(?:<\/span>.*?by  ([^<]+)<\/a><\/td>)/igms)
   {
      ($message, $title, $lastpage, $author, $date, $poster) = ($1, $2, $3, $4, $5, $6);
      $message =~ s/.*m\/(\d+)"/$1/;
      $date =~ s/\&nbsp;//;
      $title =~ s/\&/&amp;/g;
      if($lastpage =~ /.*p\/(\d+)"/){ $lastpage = $1; }
      else{ $lastpage = 1; }
      $index{$message}{'title'} = $title;
      $index{$message}{'author'} = $author;
      $index{$message}{'lastpage'} = $lastpage;
      $index{$message}{"date"} = $date;
      $index{$message}{"poster"} = $poster;
   }
}

sub create_rss
{
   my $temp_date = &UnixDate("today", "%Y-%m-%dT%H:%M%z");
   $temp_date =~ s/(.*)(\d\d)$/$1:$2/;

   my $rss = new XML::RSS (version => '1.0');
   $rss->channel(title => $forum_title,
	 link  => $index,
	 description => "$forum_title",
	 dc => {
	    language => "en-us",
	    date => "$temp_date",
	    lastBuildDate => "$temp_date",
	    creator => "greenfly"
	    },
	 );

   foreach(sort by_date(keys %index))
   {
      $rss->add_item(
	    title => $index{$_}{'title'},
	    link  => $base . "a/tpc/f/$forum/m/$_/p/$index{$_}{'lastpage'}",
	    description => "Last Post $index{$_}{'date'} by $index{$_}{'poster'}"
	    );
   }

   $rss->save("$forum_title.rss");
}
