Yes Virginia, you can filter HTML files too.
#!/usr/local/bin/perl
# Script: naughty2.pl
package HTML::Parser::FixNaughty;
require HTML::Parser;
@ISA = 'HTML::Parser';
sub start {
my ($self,$tag,$attr,$attrseq,$origtext) = @_;
print $origtext;
}
sub end {
my ($self,$tag) = @_;
print "</$tag>";
}
sub text {
my ($self,$text) = @_;
$text =~ s/\b(\w)\w{2}(\w)\b/$1**$2/g;
print $text;
}
package main;
use CGI qw/header path_info redirect path_translated/;
$file = path_translated() ||
die "must be called with additional path info";
$file .= "index.html" if $file=~m!/$!;
# this part is a hack
unless ($file=~/\.html?$/) {
print redirect(path_info());
exit 0;
}
$parser = new HTML::Parser::FixNaughty;
print header();
$parser->parse_file($file);
|
Declare naught2.pl to be the Action handler for text/html files in order to achieve automatic filtering.
<Location /clean_docs/>
Action text/html /cgi-bin/naughty2.pl
</Location>
http://localhost/clean_docs/handout.html
|
| Contents | Next |