", i18n("page_title"), " ", join(", ", map { s/^\d+,//; s/,.*//; $_ } @rsspaths), "

blocks instead. # # v0.2.9 - bre # Allowed null resources, for skipping slots in generic CSSes. # Improved the blurb at bottom a little bit. # # v0.2.8 - bre # Made RSS ID's into plain digits signifying where in the URL order # they occured. This facilitates re-use of stylesheets for different # RSS combinations. Added "refresh" link. # # v0.2.7 - bre # Added &refresh=N option, which allows the caller to specify the # delay (in minutes) for the meta-refresh tag. 0 disables refreshing. # # v0.2.6 - bre # Added info page. # Improved comments. # use strict; use LWP::Simple; use MIME::Base64; use File::stat; # Declare variables my @rsspaths = ( ); my @headlines = ( ); my $stylesheet = ""; my $cachedir; my $selfurl = undef; my $config = { policy => "open", # Default policy is open - any RSS/CSS is allowed. sortby => "time", # Default sort order. refresh => 15, # Default refresh is after 15 minutes. jsrefresh => 1, # Use Javascript reloading code (default on). sortable => 0, # Print fancy header for sorting. notables => 0, # Use

etc. instead of tables. showdesc => 0, # Display channel descriptions? cache_ttl => 10, # Default cache TTL is 10 minutes. lang => "en", # Default is English... justlist => 0, # Display HTML header/footer by default. deadline => 15, # How long is this allowed to take? target => "_top", # Default URL target. headlines => 500, # Maximum headlines per page. ad_dir => 0, # Directory containing advertisements ad_pos => 4, # How many lines down do we insert ads? ad_rss => "", # RSS added to all URLs, for advertising. cnt_every => 0, # Interval between updating user counter. cnt_file => undef, # User-counter log file. }; my $urlalias = { }; my $descalias = { }; my $sourceinfo = { }; # Translation data... my $msg = { is => { btn_chooser => "V", des_chooser => "FIXME", btn_close => "X", des_close => "Fela þessa valkosti.", btn_info => "uppl.", des_info => "Birta upplýsingar um stillingar rssticker o.fl.", btn_refresh => "uppfæra", des_refresh => "Uppfæra þennan glugga", btn_sortable => "R", des_sortable => "Birta röðunarhnappa.", Error => "Villa", invalid_res => "Ógild veituskilgreining: ", invalid_root => "Ógild rót!", invalid_url => "Ógild vefslóð: ", no_conf => "Gat ekki lesið stillingaskrá!", not_a_cgi => "Ekki í CGI umhverfi?", page_title => "Fyrirsagnalisti:", policy_closed => "Þú mátt bara nota fyrirfram skilgreindar fréttaveitur.", powered => "framreitt af", sep => "::", sort_order => "Raða eftir: ", sort_id => "Nafni+", sort_rev_id => "Nafni-", sort_time => "Tíma+", sort_rev_time => "Tíma-", }, en => { btn_chooser => "C", des_chooser => "FIXME", btn_close => "X", des_close => "Hide these options.", btn_info => "info", des_info => "Information about this rssticker.", btn_refresh => "refresh list", des_refresh => "Reload the contents of this list.", btn_sortable => "S", des_sortable => "Display sorting toolbar.", Error => "Error", invalid_res => "Invalid resource specification: ", invalid_root => "Invalid root!", invalid_url => "Invalid URL in: ", no_conf => "Failed to read configuration!", not_a_cgi => "Not running via CGI?", page_title => "Headline listing:", policy_closed => "Policy is closed, you must use predefined URLs.", powered => "powered by", sep => "::", sort_order => "Sort by: ", sort_id => "ID+", sort_rev_id => "ID-", sort_time => "Time+", sort_rev_time => "Time-", } }; # Check environment... panic(i18n("not_a_cgi")) unless ($ENV{"SERVER_NAME"}); $cachedir = $ENV{"PATH_TRANSLATED"} || "."; $selfurl = $ENV{"SCRIPT_URI"} || $ENV{"SCRIPT_URL"} || "./"; panic(i18n("invalid_root") . $cachedir) unless (-e $cachedir."/rssticker.cfg"); # Read configuration file, set "conveniance" variables. open (CFG, $cachedir."/rssticker.cfg") || panic(i18n("no_conf")); while () { s/\s*#.*$//g; if (/^\s*$/) { # Do nothing, is a comment or blank line } elsif (/^([^\s=:%>]+)\s*%\s*(.*)\s*$/) { # msg % translation $msg->{ $config->{lang} }->{lc($1)} = $2; } elsif (/^([^\s=:%>]+)\s*:\s*(.*)\s*$/) { # variable : setting $config->{lc($1)} = $2; } elsif (/^([^\s=:%>]+)\s*=\s*(.*)\s*$/) { # alias = url $urlalias->{lc($1)} = $2; } elsif (/^([^\s=:%>]+)\s*>\s*(.*)\s*$/) { # alias > description $descalias->{lc($1)} .= $2 . " "; } } close(CFG); my $target = $config->{target}; my $refresh = $config->{refresh} * 60; my $cache_ttl = $config->{cache_ttl} * 60; my $runtime = int(time() / $cache_ttl) * $cache_ttl; my $deadline = time() + $config->{deadline}; # Load variables... if (my $qs = $ENV{"QUERY_STRING"}) { $qs =~ s/&?submit=[^&]*//gsi; if ($qs =~ s/&css=([^&]+)//is) { $qs = $1."&".$qs; } while ($qs =~ s/&refresh=(\d+)(j|m)?//is) { $refresh = $1 * 60; $config->{jsrefresh} = 0 if ($2 =~ /m/i); } if ($qs =~ s/¬ables(=[^&]+)?//gis) { $config->{notables} = 1; } if ($qs =~ s/&noads(=[^&]+)?//gis) { $config->{ad_dir} = 0; } while ($qs =~ s/&locale=(en|is)//gis) { $config->{lang} = 1; } while ($qs =~ s/&showdesc=(1|0)//is) { $config->{showdesc} = $1; } while ($qs =~ s/&sortable=(0|1)//is) { $config->{sortable} = $1; } while ($qs =~ s/&justlist=(0|1)//is) { $config->{justlist} = $1; } while ($qs =~ s/&max=(\d+)//is) { $config->{headlines} = $1; } while ($qs =~ s/&sort=(id|time|rev-time|rev-id)//is) { $config->{sortby} = $1; } while ($qs =~ s/&target=([^&]+)//is) { $target = urldecode($1); } $qs =~ s/\s*$//s; $qs =~ s/^\s*//s; $qs .= $config->{ad_rss} if ($config->{ad_dir}); @rsspaths = split(/\s*&r?=/, $qs); $stylesheet = urldecode(shift @rsspaths); # Look up alias for stylesheet, if necessary. if (($stylesheet) && ($stylesheet !~ /^http:\/\//i)) { $stylesheet = $urlalias->{lc($stylesheet)}; } } # Display a help page and quit if no resources were specified. show_help_page() unless (@rsspaths); # Import RSS data into headline listing... foreach my $path (unsort(@rsspaths)) { my ($i, $u); $path = urldecode($path); if ($path =~ /^((?:\d+,)?[^,]+),(.*)$/i) { ($i, $u) = ($1,$2); if (defined $urlalias->{lc($u)}) { $u = $urlalias->{lc($u)}; $path = "$i,$u"; } elsif (($config->{policy} !~ /^open$/) && ($u ne "")) { panic(i18n("policy_closed")); } elsif (($u !~ /^http:\/\//) && ($u ne "")) { panic(i18n("invalid_url").$path); } } else { panic(i18n("invalid_res").$path); } my $max = 10; my @args = split(/,/, $path, 3); $max = shift @args if (@args == 3); $sourceinfo->{$args[0]} = { }; if ($u ne "") { my @data = get_data(@args); push @headlines, splice(@data, 0, $max); } } # Assign ID numbers sequentially. my $ids = { }; my $idn = 1; foreach my $path (@rsspaths) { $path = urldecode($path); my @args = split(/,/, $path, 3); my $max = shift @args if (@args == 3); $ids->{$args[0]} = $idn++; } # Everything is OK, render the RSS data as HTML. # print "Content-Type: text/html\n", user_counter(), "\n"; if (!$config->{justlist}) { print "\n"; if ($refresh) { if (time() >= $deadline) { # We probably haven't refreshed everything, so tell browser # to try again a bit sooner than usual. $refresh /= 3; } if ($config->{jsrefresh}) { my $t = time().$$; print "\n"; } else { print "\n"; } } if ($stylesheet ne "") { print "\n"; } print "", i18n("page_title"), " ", join(", ", map { s/^\d+,//; s/,.*//; $_ } @rsspaths), " - rssticker $version\n", "

\n"; if ($config->{sortable}) { my $baseurl = $selfurl ."?". $ENV{"QUERY_STRING"}; $baseurl =~ s/&sort=(id|time|rev-time|rev-id)//i; $baseurl =~ s/&sortable=(0|1)//i; my ($burl1, $burl2) = split(/\s*&=/,$baseurl,2); print "

\n", "", i18n("btn_close"), "\n", i18n("sort_order"); print "", i18n("sort_id"), "\n" if ($config->{sortby} ne "id"); print "", i18n("sort_rev_id"), "\n" if ($config->{sortby} ne "rev-id"); print "", i18n("sort_time"), "\n" if ($config->{sortby} ne "time"); print "", i18n("sort_rev_time"), "\n" if ($config->{sortby} ne "rev-time"); print "

\n"; } print "

"; } my $table = "table"; my $tr = "tr"; my $td = "td"; my $sep = i18n("sep"); if ($config->{notables}) { $table = "div"; $tr = "div"; $td = "span"; $sep = ""; } print "<$table class=\"list\">\n"; my $count = 0; foreach my $headline (sorted_data()) { my ($age, $id, $url, $subject, $description) = @{ $headline }; $subject = make_safe($subject); $subject =~ s/^\s*$/(no subject)/s; $url = make_safe($url); $url =~ s/^\s*$/\//s; display_ad($tr, $td) if (($config->{ad_dir}) && (($config->{ad_pos} == $count) || ($config->{headlines}+$config->{ad_pos} == $count))); print "<$tr class=\"ch_", $ids->{$id}, "\">\n\t"; print "<$td class=\"id\">{$id}->{url}, "\">$id\n\t" if ($#rsspaths); print "<$td class=\"title\">", $subject, ""; if ($config->{showdesc}) { my $d = make_safe($description); $d =~ s/", $d, "" if ($d !~ /^\s*$/); } print "\n\t\n"; last if ($count++ > $config->{headlines}); } print "\n"; if (!$config->{justlist}) { print "\n"; } exit 0; ##[ FUNCTIONS ]############################################################### sub display_ad { my ($tr, $td) = @_; my $file = undef; my @text = ( "Error reading ad-dir." ); if (opendir(DIR, $config->{ad_dir})) { my @files = grep(/\.html$/, readdir(DIR)); closedir(DIR); return undef unless (@files); $file = $config->{ad_dir}."/".$files[rand() * @files]; } @text = ( "Error reading $file !" ) if ($file); if ($file && open(AD, "<$file")) { @text = ; close(AD); } print "<$tr>"; print "<$td class=\"id\"> " if ($#rsspaths); print "<$td class=\"title\" align=\"center\">", join('', @text), "\n"; } sub btn_sortable { my $sep = shift; if ($config->{sortable} ne "1") { my $baseurl = $selfurl."?".$ENV{"QUERY_STRING"}; $baseurl =~ s/&sortable=0//gi; my ($burl1, $burl2) = split(/\s*&=/,$baseurl,2); return "". i18n("btn_sortable")." $sep\n"; } } sub btn_refresh { my $sep = shift; return "". i18n("btn_refresh"). " $sep\n"; } sub btn_info { my $sep = shift; return "". i18n("btn_info"). " $sep\n"; } sub i18n { my $string = shift; return $msg->{ $config->{lang} }->{$string} || $msg->{"en"}->{$string}; } sub sorted_data { if (lc($config->{sortby}) eq "id") { return sort { ($$b[1] cmp $$a[1]) || ($$b[0] <=> $$a[0]) || ($$a[2] cmp $$b[2]) } @headlines; } elsif (lc($config->{sortby}) eq "rev-id") { return sort { ($$a[1] cmp $$b[1]) || ($$a[0] <=> $$b[0]) || ($$b[2] cmp $$a[2]) } @headlines; } elsif (lc($config->{sortby}) eq "rev-time") { return sort { ($$a[0] <=> $$b[0]) || ($$b[2] cmp $$a[2]) } @headlines; } return sort { ($$b[0] <=> $$a[0]) || ($$a[2] cmp $$b[2]) } @headlines; } sub get_data { my ($id, $path) = @_; # First we check our cache - if it is recent (< 10 minutes) we # just use it... my ($age, @allart) = load_cached_data($id, $path); my @new = ( ); if ($age >= ($cache_ttl * (1 + rand()))) { # Otherwise we update our cache from the original source... @new = sort { ($$b[0] <=> $$a[0]) } get_rss_data($id, $path); } if (@new) { my @articles; push @allart, splice(@new, 0, 50); # Trim duplicate entries (favouring the older ones) my $last = undef; my @uniqart = grep { ($$last[2] ne $$_[2]) && ($last = $_) || # This updates old data with new... (($$last[4] = $$_[4]) && 0) || (($$last[3] = $$_[3]) && 0) } sort { ($$a[2] cmp $$b[2]) || ($$a[0] <=> $$b[0]) } @allart; @allart = sort { ($$b[0] <=> $$a[0]) } @uniqart; @articles = splice(@allart, 0, 50); # Cache new headline list. save_cached_data($id, $path, @articles) if ((@articles) && ($age >= $cache_ttl)); return @articles; } else { return @allart; } } sub get_rss_data { my ($id, $path) = @_; my @articles = ( ); my $rss = undef; # Check if we have any time left... my $timeout = $deadline - time(); return ( ) unless ($timeout > 0); $rss = forkget($path, $timeout); return ( ) unless ($rss); # First we want to figure out our charset... my $encoding = "ISO-8859-1"; if ($rss =~ /<\?xml.*encoding=\"?([^\">]+)/i) { $encoding = $1; } # This is a really bad RSS parser. :-) my $i = 0; while ($rss =~ s/]*)?>(.*?)<\/item>//is) { my $item = $1; my $title = "no title"; my $link = undef; my $desc = undef; $title = $1 if ($item =~ /]*)?>(.*?)<\/title>/is); $link = $1 if ($item =~ /]*)?>(.*?)<\/link>/is); $desc = $1 if ($item =~ /]*)?>(.*?)<\/description>/is); if ($encoding =~ /utf-?8/i) { # Quick hack, ick, ick! $title =~ s/([\xC0-\xDF])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/esg; $desc =~ s/([\xC0-\xDF])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/esg; $link =~ s/([\xC0-\xDF])([\x80-\xBF])/chr(ord($1)<<6&0xC0|ord($2)&0x3F)/esg; } # Line breaks bad. $desc =~ s/\n/ /sg; $desc =~ s/\s+/ /sg; # Found something? push @articles, [ $runtime - $i, $id, xmldecode($link), xmldecode($title), xmldecode($desc) ] if (($link) && ($i < 16)); # Ensure that our time stamps reflect the order of the RSS input. $i++; } if ($rss =~ s/(.*?)<\/link>//is) { $sourceinfo->{$id}->{url} = $1; } return @articles; } sub make_safe { my $string = shift; # This allows , and tags. That should be enough for now. $string =~ s/<\/?[c-hj-z][^>]*>//gi; $string =~ s/<\/\S[^\s>][^>]*>//gi; $string =~ s/<[^\s\/>][^\s\/>][^>]*>//gi; # Chop off trailing, unclosed tags. $string =~ s/<+[^>]*$//s; # Next - map all valid pairs to Something Else, kill the leftovers. $string =~ s/á/á/g; $string =~ s/é/é/g; while ($string =~ s/<(\S+)([^>]*)>(.*?)<(\/\1)>/á$1$2é$3á$4é/gi) { }; while ($string =~ s/<+[^>]*>+//g) { }; $string =~ s/á//g; return $string; } sub xmldecode { my $string = shift; # Decode stuff which has been encoded/escaped out. $string =~ s/'/'/g; $string =~ s/<//g; $string =~ s/&#(\d+);/$_=chr($1)/ge; $string =~ s/&/&/g; return $string; } sub xmlencode { my $string = shift; # Encode stuff. $string =~ s/&/&/g; $string =~ s/\012/ /gs; $string =~ s/\015/ /gs; $string =~ s/>/>/g; $string =~ s/) { chomp $l; if ($l =~ /^si:\s*(.*?)=(.*)$/) { $sourceinfo->{$id}->{$1} = $2; } elsif (my @a = split(/<ó>/, $l, 5)) { push @articles, [ $a[0], $id, xmldecode($a[2]), xmldecode($a[3]), xmldecode($a[4]) ]; } elsif (my @a = split(/;;/, $l, 4)) { push @articles, [ $a[0], $id, xmldecode($a[2]), xmldecode($a[3]), "" ]; } } close(CF); my $cs = stat($cachefile); return (time() - $cs->mtime ,@articles); } sub save_cached_data { my ($id, $path, @articles) = @_; my $cachefile = cache_file($path); open(CF, ">$cachefile") || return undef; foreach my $key (keys(%{ $sourceinfo->{$id} })) { print CF "si: $key=", $sourceinfo->{$id}->{$key}, "\n"; } foreach my $a (@articles) { print CF join('<ó>', map { xmlencode($_) } @{ $a }), "\n"; } close(CF); return 1; } sub show_help_page { if ($config->{help} =~ /^redirect:\s*(.*)$/i) { print "Location: $1\n\n"; return; } print "Content-type: text/html\n\n"; print "rssticker $version \@ ", $ENV{"SERVER_NAME"}; print "\n"; print "This is rssticker $version \@ ", $ENV{"SERVER_NAME"}, "
\n"; if ($config->{help} !~ /^quiet$/) { my $rss = undef; print "
Available URL aliases (RSS):
\n"; foreach my $key (sort grep(!/css$/i, keys(%{ $urlalias }))) { $rss = $key unless ($rss); my $u = $urlalias->{$key}; print "
$key, $u\n"; } print "
"; print "
Available URL aliases (CSS):
\n"; foreach my $key (sort grep(/css$/i, keys(%{ $urlalias }))) { my $u = $urlalias->{$key}; print "
$key, $u\n"; } print "
"; if ($config->{policy} =~ /^open$/i) { print "
This ticker is open, you may specify your own resources.
\n"; } else { print "
This ticker is closed, you may only use the above aliases.
\n"; } print "
Default cache TTL is $cache_ttl seconds.
\n"; print "
Default page refresh time is $refresh seconds.
\n"; print "
View configuration file.
\n"; } else { print "
This ticker's configuration is a secret. Please visit\n"; print "the rssticker home page for more info.
\n"; } print "\n"; exit 0; } sub panic { my $msg = shift; print "Content-type: text/html\n\n"; print "rssticker $version: ", i18n("Error"), ", $msg\n"; print "
", i18n("Error"), ":
\n", $msg, "\n"; print "
"; if ($config->{"footer"}) { print $config->{"footer"}; } else { print "rssticker $version\n"; } exit 0; } sub urldecode { my $text = shift; $text =~ tr/+/ /; $text =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; return $text; } sub urlencode { my $text = shift; $text =~ s/([^A-Za-z0-9\s])/ $_=sprintf("%%%2.2X", ord($1))/ge; return $text; } sub unsort { return sort { rand() <=> rand() } @_; } sub forkget { my $url = shift; my $timeout = shift; my $sleeps = 1; my $pid = undef; do { $pid = open(GET, "-|"); unless (defined $pid) { warn "cannot fork: $!"; die "bailing out" if ($sleeps++ > 6); sleep 2; } } until(defined $pid); if ($pid) { my $ret = undef; eval { local $SIG{ALRM} = sub { die "timed out\n" }; alarm $timeout; $ret = join('', ); alarm 0; close(GET) || return undef; }; if ($@) { kill 'KILL' => $pid; close(GET); return undef; alarm 0; } return $ret; } else { # KID - here we should use something smarter than get and # abort if someone tries to feed us Too Much Data(tm). print get($url); exit(0); } } # This implements a two-stage cookie counter, which only counts people who # have cookies enabled and haven't been counted within the last counting # interval. # sub user_counter { return "" if ($ENV{HTTP_COOKIE} =~ /rssticker=COUNTED/i); return "" unless ($config->{cnt_every} && $config->{cnt_file}); if (open (CNT, ">>".$config->{cnt_file})) { if ($ENV{HTTP_COOKIE} =~ /rssticker=COUNTING/i) { if (my $r = $ENV{HTTP_REFERER}) { print CNT scalar localtime(), " New from: $r\n"; } else { print CNT scalar localtime(), " New user!\n"; } close(CNT); return "Set-Cookie: rssticker=COUNTED; path=/; expires=". http_time(time() + $config->{cnt_every})."\n"; } return "Set-Cookie: rssticker=COUNTING; path=/; expires=". http_time(time() + $config->{cnt_every})."\n"; } return ""; } sub http_time { my $time = shift; my $ht = gmtime($time); $ht =~ s/^(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+).*$/$1, $3-$2-$5 $4 GMT/; return $ht; }