tag if ($sect_level == 0) { print "<h1 align=\"center\">"; } elsif ($sect_level == 1) { $sect_1_count++; print "<a name=\"section_$sect_1_count\">\n"; print "<h2> $sect_1_count. "; } elsif ($sect_level == 2) { $sect_2_count++; print "<a name=\"subsection_$sect_1_count.$sect_2_count\">\n"; print "<h3> $sect_1_count.$sect_2_count "; } } elsif ($el =~ /\bauthor\b/i) { # the <author> tag print "<h2 align=\"center\">"; } elsif ($el =~ /\bversion\b/i) { # the <version> tag print "<h3 align=\"center\">Version "; } elsif ($el =~ /\bpara\b/i) { # the <para> tag print "<p>"; } elsif ($el =~ /\bprogramlisting\b/i) { # the <para> tag print "<pre><code>"; } elsif ($el =~ /\bfuncsynopsis\b/i) { # the <para> tag print "<pre><code>"; } elsif ($el =~ /\bitemizedlist\b/i) { print "<ul>\n"; } elsif ($el =~ /\borderedlist\b/i) { print "<ol>\n"; } elsif ($el =~ /\blistitem\b/i) { print "<li>"; } elsif ($el =~ /\bemphasis\b/i) { print "<em>"; } elsif ($el =~ /\bulink\b/i) { print "<a "; foreach my $attrib (keys(%attribs)) { print " $attrib = \"$attribs{$attrib}\""; } print ">"; } else { # for all unrecognized tags, we simply print them. # I did this so I can easily include HTML tags. # One problem is that it doesn't print the attributes # in order, but that's ok here. print "<$el"; foreach my $attrib (keys(%attribs)) { print " $attrib = \"$attribs{$attrib}\""; } print ">"; } } # here we handle all non-tag strings. All we need to do here is print whatever is passed sub handle_char { my ($p, $data) = @_; my $tag = $p -> current_element; # The headings stuff later needs to have all stuff in section title # on one line, so we stuff around here in order to do that - gross if ($tag =~ /\btitle\b/) { #print "Title tag\n"; if ($data eq "\n") { # print "empty\n"; } else { print $data; } } else { print $data; } } # here we handle ending tags. sub handle_end { my $p = shift; my $el = shift; my %atrribs = @_; if ($el =~ /\bfaq\b/i) { print "</html>"; } elsif ($el =~ /\bbook\b/i) { } elsif ($el =~ /\btitle\b/i) { if ($sect_level == 0) { print "</h1>"; } elsif ($sect_level == 1) { print "</h2>\n"; print "</a>\n"; } elsif ($sect_level == 2) { print "</h3>\n"; print "</a>\n"; } } elsif ($el =~ /\bauthor\b/i) { print "</h2>\n"; } elsif ($el =~ /\bversion\b/i) { print "</h3>\n"; } elsif ($el =~ /\bpara\b/i) { print "</p>\n"; } elsif ($el =~ /\bprogramlisting\b/i) { # the <para> tag print "</code></pre>\n"; } elsif ($el =~ /\bfuncsynopsis\b/i) { # the <para> tag print "</code></pre>\n"; } elsif ($el =~ /\bitemizedlist\b/i) { print "</ul>\n"; } elsif ($el =~ /\borderedlist\b/i) { print "</ol>\n"; } elsif ($el =~ /\blistitem\b/i) { print "</li>\n"; } elsif ($el =~ /\bemphasis\b/i) { print "</em>"; } elsif ($el =~ /\bsect1\b/i) { } elsif ($el =~ /\bchapter\b/i) { } elsif ($el =~ /\bchapintro\b/i) { print "</em>\n</blockquote>\n"; } elsif ($el =~ /\bulink\b/i) { print "</a>"; } else { print "</$el>"; } } # here we handle document type. sub handle_doc_type { my $p = shift; my $name = shift; print "<html>\n\ <head>\n\ </head>\n\ <body>\n"; } sub handle_final { print "</body>\n</html>\n"; } sub handle_extern_ent { my ($p, $base, $sysid, $pubid) = @_; open(INPUT, $sysid) or return "can't open external file $sysid"; my $extern = ""; while (<INPUT>) { $extern .= $_; } close INPUT; return $extern; } sub handle_proc { my ($p, $target, $data) = @_; my $anchor; my $heading; my $in_name = ""; $data =~ s/\"//g; # print "opening $data\n"; open(INPUT, $data) or print "cant open $data\n"; if ($target =~ /program/) { # sanitize awkward characters while (<INPUT>) { $_ =~ s/\&/\&/g; $_ =~ s/</\</g; $_ =~ s/>/\>/g; print $_; } close(INPUT); } elsif ($target =~ /headings/) { # THERE HAS GOTTA BE A BETTER WAY OF DOING THIS # this has the problem of requiring both XML and HTML files #search for subheadings #print " <ol>\n"; #while (<INPUT>) { # if (/<a name=/) { # $_ =~ s/<a name=\"//; # $anchor = $_; # $anchor =~ s/\">.*//; # chop $anchor; #$heading = $_; #$heading =~ s/\"><h2>//; #$heading =~ s/<\/h2>//; #print " <li> <a href=\"$data#$anchor\"> $heading </a></li>\n"; # print " <li> <a href=\"$data#$anchor\">\n"; # $heading </a></li>\n"; # set this to distinguish between <a name = ...> and <a href = ...> $in_name = 1; # } elsif (/<\/a>/) { # if ($in_name) { # $in_name = ""; # print " </a>\n </li>\n"; # } # } elsif (/\<h2\>/) { # $_ =~ s/^.*[.]//; # $_ =~ s/<\/h2>//; # print " ", $_; # } elsif (/\<h3\>/) { # $_ =~ s/^.*[.]//; # $_ =~ s/^[^ ]* //; # $_ =~ s/<\/h3>//; # print " ", $_; # } #} #close(INPUT); #print " </ol>\n"; } }

############# # FUNCTIONS # ############# my $sect_level = 0; my $sect_1_count = 0; my $sect_2_count = 0; sub handle_start { my $p = shift; # This is the reference to the parser object. # We don't use it directly, but I thought # you'd like to know anyway. my $el = shift; # ooh! This is the name of the element the # parser found. my %attribs = @_; # Everything else that's passed are name/value # pairs. Since a hash is really an array, we can # pull them into a hash. neat! # Here is where we start identifying each tag, and printing something. if ($el =~ /\bfaq\b/i) { # is the root tag, so we replace it with # print ""; } elsif ($el =~ /\bbook\b/i) { } elsif ($el =~ /\bchapter\b/i) { # the

tag $sect_level = 0; $sect_1_count = 0; } elsif ($el =~ /\bchapintro\b/i) { print "

\n\n"; } elsif ($el =~ /\bsect1\b/i) { # the
tag $sect_level = 1; $sect_2_count = 0; } elsif ($el =~ /\bsect2\b/i) { # the
tag $sect_level = 2; } elsif ($el =~ /\btitle\b/i) { # the tag if ($sect_level == 0) { print "<h1 align=\"center\">"; } elsif ($sect_level == 1) { $sect_1_count++; print "<a name=\"section_$sect_1_count\">\n"; print "<h2> $sect_1_count. "; } elsif ($sect_level == 2) { $sect_2_count++; print "<a name=\"subsection_$sect_1_count.$sect_2_count\">\n"; print "<h3> $sect_1_count.$sect_2_count "; } } elsif ($el =~ /\bauthor\b/i) { # the <author> tag print "<h2 align=\"center\">"; } elsif ($el =~ /\bversion\b/i) { # the <version> tag print "<h3 align=\"center\">Version "; } elsif ($el =~ /\bpara\b/i) { # the <para> tag print "<p>"; } elsif ($el =~ /\bprogramlisting\b/i) { # the <para> tag print "<pre><code>"; } elsif ($el =~ /\bfuncsynopsis\b/i) { # the <para> tag print "<pre><code>"; } elsif ($el =~ /\bitemizedlist\b/i) { print "<ul>\n"; } elsif ($el =~ /\borderedlist\b/i) { print "<ol>\n"; } elsif ($el =~ /\blistitem\b/i) { print "<li>"; } elsif ($el =~ /\bemphasis\b/i) { print "<em>"; } elsif ($el =~ /\bulink\b/i) { print "<a "; foreach my $attrib (keys(%attribs)) { print " $attrib = \"$attribs{$attrib}\""; } print ">"; } else { # for all unrecognized tags, we simply print them. # I did this so I can easily include HTML tags. # One problem is that it doesn't print the attributes # in order, but that's ok here. print "<$el"; foreach my $attrib (keys(%attribs)) { print " $attrib = \"$attribs{$attrib}\""; } print ">"; } } # here we handle all non-tag strings. All we need to do here is print whatever is passed sub handle_char { my ($p, $data) = @_; my $tag = $p -> current_element; # The headings stuff later needs to have all stuff in section title # on one line, so we stuff around here in order to do that - gross if ($tag =~ /\btitle\b/) { #print "Title tag\n"; if ($data eq "\n") { # print "empty\n"; } else { print $data; } } else { print $data; } } # here we handle ending tags. sub handle_end { my $p = shift; my $el = shift; my %atrribs = @_; if ($el =~ /\bfaq\b/i) { print "</html>"; } elsif ($el =~ /\bbook\b/i) { } elsif ($el =~ /\btitle\b/i) { if ($sect_level == 0) { print "</h1>"; } elsif ($sect_level == 1) { print "</h2>\n"; print "</a>\n"; } elsif ($sect_level == 2) { print "</h3>\n"; print "</a>\n"; } } elsif ($el =~ /\bauthor\b/i) { print "</h2>\n"; } elsif ($el =~ /\bversion\b/i) { print "</h3>\n"; } elsif ($el =~ /\bpara\b/i) { print "</p>\n"; } elsif ($el =~ /\bprogramlisting\b/i) { # the <para> tag print "</code></pre>\n"; } elsif ($el =~ /\bfuncsynopsis\b/i) { # the <para> tag print "</code></pre>\n"; } elsif ($el =~ /\bitemizedlist\b/i) { print "</ul>\n"; } elsif ($el =~ /\borderedlist\b/i) { print "</ol>\n"; } elsif ($el =~ /\blistitem\b/i) { print "</li>\n"; } elsif ($el =~ /\bemphasis\b/i) { print "</em>"; } elsif ($el =~ /\bsect1\b/i) { } elsif ($el =~ /\bchapter\b/i) { } elsif ($el =~ /\bchapintro\b/i) { print "</em>\n</blockquote>\n"; } elsif ($el =~ /\bulink\b/i) { print "</a>"; } else { print "</$el>"; } } # here we handle document type. sub handle_doc_type { my $p = shift; my $name = shift; print "<html>\n\ <head>\n\ </head>\n\ <body>\n"; } sub handle_final { print "</body>\n</html>\n"; } sub handle_extern_ent { my ($p, $base, $sysid, $pubid) = @_; open(INPUT, $sysid) or return "can't open external file $sysid"; my $extern = ""; while (<INPUT>) { $extern .= $_; } close INPUT; return $extern; } sub handle_proc { my ($p, $target, $data) = @_; my $anchor; my $heading; my $in_name = ""; $data =~ s/\"//g; # print "opening $data\n"; open(INPUT, $data) or print "cant open $data\n"; if ($target =~ /program/) { # sanitize awkward characters while (<INPUT>) { $_ =~ s/\&/\&/g; $_ =~ s/</\</g; $_ =~ s/>/\>/g; print $_; } close(INPUT); } elsif ($target =~ /headings/) { # THERE HAS GOTTA BE A BETTER WAY OF DOING THIS # this has the problem of requiring both XML and HTML files #search for subheadings #print " <ol>\n"; #while (<INPUT>) { # if (/<a name=/) { # $_ =~ s/<a name=\"//; # $anchor = $_; # $anchor =~ s/\">.*//; # chop $anchor; #$heading = $_; #$heading =~ s/\"><h2>//; #$heading =~ s/<\/h2>//; #print " <li> <a href=\"$data#$anchor\"> $heading </a></li>\n"; # print " <li> <a href=\"$data#$anchor\">\n"; # $heading </a></li>\n"; # set this to distinguish between <a name = ...> and <a href = ...> $in_name = 1; # } elsif (/<\/a>/) { # if ($in_name) { # $in_name = ""; # print " </a>\n </li>\n"; # } # } elsif (/\<h2\>/) { # $_ =~ s/^.*[.]//; # $_ =~ s/<\/h2>//; # print " ", $_; # } elsif (/\<h3\>/) { # $_ =~ s/^.*[.]//; # $_ =~ s/^[^ ]* //; # $_ =~ s/<\/h3>//; # print " ", $_; # } #} #close(INPUT); #print " </ol>\n"; } }