#!/usr/bin/perl
# use with perl man2html.pl whatfile > whatfile.htm
# works only on files that are already in formatted format like the
# ones in man1 and end on .1
# other files can be formatted with nroff before transfering
# Stefan Bleeck 20.2.2003
# manServer - Unix man page to HTML converter
# Rolf Howarth, rolf@squarebox.co.uk
# Version 1.07 16 July 2001
$version = "1.07";
$manServerUrl = "manServer $version";
use Socket;
$ENV{'PATH'} = "/bin:/usr/bin";
initialise();
$request = shift @ARGV;
# Usage: manServer [-dn] filename | manServer [-s port]
$root = "";
$cgiMode = 0;
$bodyTag = "BODY bgcolor=#F0F0F0 text=#000000 link=#0000ff vlink=#C000C0 alink=#ff0000";
if ($ENV{'GATEWAY_INTERFACE'} ne "")
{
*OUT = *STDOUT;
open(LOG, ">>/tmp/manServer.log");
chmod(0666, '/tmp/manServer.log');
$root = $ENV{'SCRIPT_NAME'};
$url = $ENV{'PATH_INFO'};
if ($ENV{'REQUEST_METHOD'} eq "POST")
{ $args = ; chop $args; }
else
{ $args = $ENV{'QUERY_STRING'}; }
$url .= "?".$args if ($args);
$cgiMode = 1;
$date = &fmtTime(time);
$remoteHost = $ENV{'REMOTE_HOST'} || $ENV{'REMOTE_ADDR'};
$referer = $ENV{'HTTP_REFERER'};
$userAgent = $ENV{'HTTP_USER_AGENT'};
print LOG "$date\t$remoteHost\t$url\t$referer\t$userAgent\n";
processRequest($url);
}
elsif ($request eq "-s" || $request eq "")
{
*LOG = *STDERR;
startServer();
}
else
{
$cmdLineMode = 1;
if ($request =~ m/^-d(\d)/)
{
$debug = $1;
$request = shift @ARGV;
}
*OUT = *STDOUT;
*LOG = *STDERR;
$file = findPage($request);
man2html($file);
}
exit(0);
##### Mini HTTP Server ####
sub startServer
{
($port) = @ARGV;
$port = 8888 unless $port;
$sockaddr = 'S n a4 x8';
($name, $aliases, $proto) = getprotobyname('tcp');
($name, $aliases, $port) = getservbyname($port, 'tcp')
unless $port =~ /^\d+$/;
while(1)
{
$this = pack($sockaddr, AF_INET, $port, "\0\0\0\0");
select(NS); $| = 1; select(stdout);
socket(S, PF_INET, SOCK_STREAM, $proto) || die "socket: $!";
if (bind(S, $this))
{
last;
}
else
{
print STDERR "Failed to bind to port $port: $!\n";
++$port;
}
}
listen(S, 5) || die "connect: $!";
select(S); $| = 1; select(stdout);
while(1)
{
print LOG "Waiting for connection on port $port\n";
($addr = accept(NS,S)) || die $!;
#print "accept ok\n";
($af,$rport,$inetaddr) = unpack($sockaddr,$addr);
@inetaddr = unpack('C4',$inetaddr);
print LOG "Got connection from ", join(".",@inetaddr), "\n";
while ()
{
if (m/^GET (\S+)/) { $url = $1; }
last if (m/^\s*$/);
}
*OUT = *NS;
processRequest($url);
close NS ;
}
}
sub processRequest
{
$url = $_[0];
print LOG "Request = $url, root = $root\n";
if ( ($url =~ m/^([^?]*)\?(.*)$/) || ($url =~ m/^([^&]*)&(.*)$/) )
{
$request = $1;
$args = $2;
}
else
{
$request = $url;
$args = "";
}
@params = split(/[=&]/, $args);
for ($i=0; $i<=$#params; ++$i)
{
$params[$i] =~ tr/+/ /;
$params[$i] =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack("C",hex($1))/eg;
}
%params = @params;
$request = $params{'q'} if ($params{'q'});
$searchType = $params{'t'};
$debug = $params{'d'};
$processed = 0;
$file = "";
if ($searchType)
{
print OUT "HTTP/1.0 200 Ok\n" unless ($cgiMode);
print OUT "Content-type: text/html\n\n";
print OUT "
Searching not yet implemented
\n";
print LOG "Searching not implemented\n";
$processed = 1;
}
elsif ($request eq "/" || $request eq "")
{
print OUT "HTTP/1.0 200 Ok\n" unless ($cgiMode);
print OUT "Content-type: text/html\n\n";
print LOG "Home page\n";
homePage();
$processed = 1;
}
elsif ($request =~ m,^/.*/$,)
{
print OUT "HTTP/1.0 200 Ok\n" unless ($cgiMode);
print OUT "Content-type: text/html\n\n";
print LOG "List directory\n";
listDir($request);
$processed = 1;
}
elsif (-f $request || -f "$request.gz" || -f "$request.bz2")
{
# Only allow fully specified files if they're in our manpath
foreach $md (@manpath)
{
$dir = $md;
if (substr($request,0,length($dir)) eq $dir)
{
print OUT "HTTP/1.0 200 Ok\n" unless ($cgiMode);
print OUT "Content-type: text/html\n\n";
man2html($request);
$processed = 1;
last;
}
}
}
else
{
$file = findPage($request);
if (@multipleMatches)
{
print OUT "HTTP/1.0 200 Ok\n" unless ($cgiMode);
print OUT "Content-type: text/html\n\n";
print LOG "Multiple matches\n";
printMatches();
$processed = 1;
}
elsif ($file)
{
print OUT "HTTP/1.0 301 Redirected\n" unless ($cgiMode);
$file .= "&d=$debug" if ($debug);
print OUT "Location: $root$file\n\n";
print LOG "Redirect to $root$file\n";
$processed = 1;
}
}
unless ($processed)
{
print OUT "HTTP/1.0 404 Not Found\n" unless ($cgiMode);
print OUT "Content-type: text/html\n\n";
print OUT "\nNot Found\n<$bodyTag>\n";
print OUT "
Not Found
\nFailed to find man page /$request\n";
print OUT "
Main Index\n\n";
print STDERR "Failed to find /$request\n" unless ($cgiMode);
}
}
sub homePage
{
print OUT "Manual Pages - Main Index
<$bodyTag>
Manual Reference Pages - Main Index
\n";
$uname = `uname -s -r`;
if (! $?)
{
$hostname = `hostname`;
print OUT "$uname pages on $hostname
\n";
}
# print OUT " Command name
# Keyword search Full text search\n";
print OUT "Command name:
\n";
loadManDirs();
foreach $dir (@mandirs)
{
($section) = ($dir =~ m/man([0-9A-Za-z]+)$/);
print OUT "$dir" ;
print OUT "- $sectionName{$section}" if ($sectionName{$section});
print OUT "
\n";
}
print OUT "
Generated by $manServerUrl from local unix man pages.\n\n";
}
sub listDir
{
foreach $md (@manpath)
{
$dir = $md;
if (substr($request,0,length($dir)) eq $dir)
{
$request =~ s,/$,,;
($section) = ($request =~ m/man([0-9A-Za-z]+)$/);
$sectionName = $sectionName{$section};
$sectionName = "Manual Reference Pages" unless ($sectionName);
print OUT "Contents of $request\n<$bodyTag>\n";
print OUT "
$sectionName - Index of $request
\n";
print OUT "\n";
print OUT "Command name:
\n";
if (opendir(DIR, $request))
{
@files = sort readdir DIR;
foreach $f (@files)
{
next if ($f eq "." || $f eq ".." || $f !~ m/\./);
$f =~ s/\.(gz|bz2)$//;
# ($name) = ($f =~ m,/([^/]*)$,);
print OUT "$f \n";
}
closedir DIR;
}
print OUT "
Main Index\n\n";
print OUT "
Generated by $manServerUrl from local unix man pages.\n\n";
return;
}
}
print OUT "
Directory $request not known
\n";
}
sub printMatches
{
print OUT "Ambiguous Request '$request'\n<$bodyTag>\n";
print OUT "
Ambiguous Request '$request'
\nPlease select one of the following pages:
";
foreach $f (@multipleMatches)
{
print OUT "$f
\n";
}
print OUT "
Main Index\n\n";
}
##### Process troff input using man macros into HTML #####
sub man2html
{
$file = $_[0];
$srcfile = $file;
$zfile = $file;
if (! -f $file)
{
if (-f "$file.gz")
{
$zfile = "$file.gz";
$zcat = "/usr/bin/zcat";
$zcat = "/bin/zcat" unless (-x $zcat);
$srcfile = "$zcat $zfile |";
$srcfile =~ m/^(.*)$/;
$srcfile = $1; # untaint
}
elsif (-f "$file.bz2")
{
$zfile = "$file.bz2";
$srcfile = "/usr/bin/bzcat $zfile |";
$srcfile =~ m/^(.*)$/;
$srcfile = $1; # untaint
}
}
print LOG "man2html $file\n";
$foundNroffTag = 0;
loadContents($file);
unless (open(SRC, $srcfile))
{
print OUT "
Failed to open $file
\n";
print STDERR "Failed to open $srcfile\n";
return;
}
($dir,$page,$sect) = ($file =~ m,^(.*)/([^/]+)\.([^.]+)$,);
$troffTable = 0;
%macro = ();
%renamedMacro = ();
%deletedMacro = ();
@indent = ();
@tabstops = ();
$indentLevel = 0;
$prevailingIndent = 6;
$trapLine = 0;
$blockquote = 0;
$noSpace = 0;
$firstSection = 0;
$eqnStart = "";
$eqnEnd = "";
$eqnMode = 0;
%eqndefs = ();
$defaultNm = "";
$title = $file;
$title = "Manual Page - $page($sect)" if ($page && $sect);
$_ = getLine();
if (m/^.so (man.*)$/)
{
# An .so include on the first line only is replaced by the referenced page.
# (See elsewhere for processing of included sections that occur later in document.)
man2html("$dir/../$1");
return;
}
$perlPattern = "";
if ($file =~ m/perl/)
{
&loadPerlPages();
$perlPattern = join('|', grep($_ ne $page, keys %perlPages));
}
print OUT "\n$title\n<$bodyTag>\n";
if ($foundNroffTag)
{
do
{
preProcessLine();
processLine();
}
while(getLine());
endNoFill();
endParagraph();
}
else
{
# Special case where input is not nroff at all but is preformatted text
$sectionName = "Manual Reference Pages";
$sectionNumber = $sect;
$left = "Manual Page";
$right = "Manual Page";
$macroPackage = "(preformatted text)";
$pageName = "$page($sect)";
$saveCurrentLine = $_;
outputPageHead();
$_ = $saveCurrentLine;
print OUT "\n";
do
{
print OUT $_;
}
while(getLine());
print OUT "\n";
}
outputPageFooter();
}
sub outputPageHead
{
plainOutput( "\n" );
outputLine( "
$sectionName - $pageName
\n" );
plainOutput( "\n" );
}
sub outputPageFooter
{
if ($pageName)
{
unless ($cmdLineMode)
{
plainOutput( "\n" );
plainOutput( "Jump to page or go to Top of page | \n" );
plainOutput( "Section $sectionNumber | \n" );
plainOutput( "Main Index.\n" );
plainOutput( "\n" );
}
endBlockquote();
outputLine("
\n
$left | $pageName | $right |
");
}
plainOutput("Generated by $manServerUrl from $zfile $macroPackage.\n\n");
}
sub outputContents
{
print OUT "
CONTENTS
\n";
blockquote();
for ($id=1; $id<=$#contents; ++$id)
{
$name = $contents[$id];
$pre = "";
$pre = " " if ($name =~ m/^ /);
$pre .= " " if ($name =~ m/^ /);
$name =~ s,^\s+,,;
next if ($name eq "" || $name =~ m,^/,);
unless ($name =~ m/[a-z]/)
{
$name = "\u\L$name";
$name =~ s/ (.)/ \u\1/g;
}
outputLine("$pre$name
\n");
}
endBlockquote();
}
# First pass to extract table of contents
sub loadContents
{
@contents = ();
%contents = ();
# print STDERR "SRCFILE = $srcfile\n";
open(SRC, $srcfile) || return;
while ()
{
preProcessLine();
$foundNroffTag = $foundNroffTag || (m/^\.(\\\"|TH|so) /);
if (m/^\.(S[HShs]) ([A-Z].*)\s*$/)
{
$foundNroffTag = 1;
$c = $1;
$t = $2;
$t =~ s/"//g;
$id = @contents;
if ($c eq "SH" || $c eq "Sh")
{
push(@contents, $t);
}
elsif ($t =~ m/\\f/)
{
$t =~ s/\\f.//g;
push(@contents, " $t");
}
else
{
push(@contents, " $t");
}
$contents{"\U$t"} = $id;
}
}
close SRC;
}
# Preprocess $_
sub preProcessLine
{
# Remove spurious white space to canonicise the input
chop;
$origLine = $_;
s, $,,g;
s,^',.,; # treat non breaking requests as if there was a dot
s,^\.\s*,\.,;
if ($eqnMode == 1)
{
if (m/$eqnEnd/)
{
s,^(.*?)$eqnEnd,&processEqnd($1),e;
$eqnMode = 0;
}
else
{
&processEqns($_);
}
}
if ($eqnStart && $eqnMode==0)
{
s,$eqnStart(.*?)$eqnEnd,&processEqnd($1),ge;
if (m/$eqnStart/)
{
s,$eqnStart(.*)$,&processEqns($1),e;
$eqnMode = 1;
}
}
# XXX Note: multiple levels of escaping aren't handled properly, eg. \\*.. as a macro argument
# should get interpolated as string but ends up with a literal '\' being copied through to output.
s,\\\\\*q,",g; # treat mdoc \\*q as special case
s,\\\\,_DBLSLASH_,g;
s,\\ ,_SPACE_,g;
s,\s*\\".*$,,;
s,\\$,,;
# Then apply any variable substitutions and escape < and >
# (which has to be done before we start inserting tags...)
s,\\\*\((..),$vars{$1},ge;
s/\\\*([*'`,^,:~].)/$vars{$1}||"\\*$1"/ge;
s,\\\*(.),$vars{$1},ge;
# Expand special characters for the first time (eg. \(<-
s,\\\((..),$special{$1}||"\\($1",ge;
s,<,<,g;
s,>,>,g;
# Interpolate width and number registers
s,\\w(.)(.*?)\1,&width($2),ge;
s,\\n\((..),&numreg($1),ge;
s,\\n(.),&numreg($1),ge;
}
# Undo slash escaping, normally done at output stage, also in macro defn
sub postProcessLine
{
s,_DBLSLASH_,\\,g;
s,_SPACE_, ,g;
}
# Rewrite the line, expanding escapes such as font styles, and output it.
# The line may be a plain text troff line, or it might be the expanded output of a
# macro in which case some HTML tags may already have been inserted into the text.
sub outputLine
{
$_ = $_[0];
print OUT "\n" if ($debug>1);
if ($needBreak)
{
plainOutput("
\n");
lineBreak();
}
if ($textSinceBreak && !$noFill && $_ =~ m/^\s/)
{
plainOutput("
\n");
lineBreak();
}
s,\\&\.,.,g; # \&. often used to escape dot at start of line
s,\\\.,.,g;
s,\\\^,,g;
s,\\\|,,g;
s,\\c,,g;
s,\\0, ,g;
s,\\t,\t,g;
s,\\%, ,g;
s,\\{,,g;
s,\\},,g;
s,\\$,,g;
s,\\e,\,g;
s,\\([-+_~#[]),\1,g;
# Can't implement local motion tags
s,\\[hv](.).*?\1,,g;
s,\\z,,g;
# Font changes, super/sub-scripts and font size changes
s,\\(f[^(]|f\(..|u|d|s[-+]?\d),&inlineStyle($1),ge;
# Overstrike
if (m/\\o/)
{
# handle a few special accent cases we know how to deal with
s,\\o(.)([aouAOU])"\1,\\o\1\2:\1,g;
s,\\o(.)(.)\\(.)\1,\\o\1\2\3\1,g;
s;\\o(.)([A-Za-z])(['`:,^~])\1;\\o\1\3\2\1;g;
#s,\\o(.)(.*?)\1,"".($vars{$2}||$2)."",ge;
s,\\o(.)(.*?)\1,$vars{$2}||$2,ge;
}
# Bracket building (ignore)
s,\\b(.)(.*?)\1,\2,g;
s,\\`,`,g;
s,\\',',g;
s,',,g;
s,`,,g;
# Expand special characters introduced by eqn
s,\\\((..),$special{$1}||"\\($1",ge;
s,\\\((..),\\($1,g unless (m,^\.,);
# Don't know how to handle other escapes
s,(\\[^&]),\1,g unless (m,^\.,);
postProcessLine();
# Insert links for http, ftp and mailto URLs
# Recognised URLs are sequence of alphanumerics and special chars like / and ~
# but must finish with an alphanumeric rather than punctuation like "."
s,\b(http://[-\w/~:@.%#+$?=]+\w),\1,g;
s,\b(ftp://[-\w/~:@.%#+$?=]+),\1,g;
s,([-_A-Za-z0-9.]+@[A-Za-z][-_A-Za-z0-9]*\.[-_A-Za-z0-9.]+),\1,g;
# special case for things like 'perlre' as it's so useful but the
# pod-generated pages aren't very parser friendly...
if ($perlPattern && ! m/\1,g;
}
# Do this late so \& can be used to suppress conversion of URLs etc.
s,\\&,,g;
# replace tabs with spaces to next multiple of 8
if (m/\t/)
{
$tmp = $_;
$tmp =~ s/<[^>]*>//g;
$tmp =~ s/&[^;]*;/@/g;
@tmp = split(/\t/, $tmp);
$pos = 0;
for ($i=0; $i<=$#tmp; ++$i)
{
$pos += length($tmp[$i]);
$tab[$i] = 0;
$tab[$i] = 8 - $pos%8 unless (@tabstops);
foreach $ts (@tabstops)
{
if ($pos < $ts)
{
$tab[$i] = $ts-$pos;
last;
}
}
$pos += $tab[$i];
}
while (m/\t/)
{
s,\t," " x (shift @tab),e;
}
}
$textSinceBreak = $_ unless ($textSinceBreak);
print OUT $_;
}
# Output a line consisting purely of HTML tags which shouldn't be regarded as
# a troff output line.
sub plainOutput
{
print OUT $_[0];
}
# Output the original line for debugging
sub outputOrigLine
{
print OUT "\n";
}
# Use this to read the next input line (buffered to implement lookahead)
sub getLine
{
$lookaheadPtr = 0;
if (@lookahead)
{
$_ = shift @lookahead;
return $_;
}
$_ = ;
}
# Look ahead to peek at the next input line
sub _lookahead
{
# set lookaheadPtr to 0 to re-read the lines we've looked ahead at
if ($lookaheadPtr>=0 && $lookaheadPtr <= $#lookahead)
{
return $lookahead[$lookaheadPtr++];
}
$lookaheadPtr = -1;
$ll = ;
push(@lookahead, $ll);
return $ll;
}
# Consume the last line that was returned by lookahead
sub consume
{
--$lookaheadPtr;
if ($lookaheadPtr>=0 && $lookaheadPtr <= $#lookahead)
{
$removed = $lookahead[$lookaheadPtr];
@lookahead = (@lookahead[0..$lookaheadPtr-1],@lookahead[$lookaheadPtr+1..$#lookahead]);
}
else
{
$removed = pop @lookahead;
}
chop $removed;
plainOutput("\n");
}
# Look ahead skipping comments and other common non-text tags
sub lookahead
{
$ll = _lookahead();
while ($ll =~ m/^\.(\\"|PD|IX|ns)/)
{
$ll = _lookahead();
}
return $ll;
}
# Process $_, expaning any macros into HTML and calling outputLine().
# If necessary, this method can read more lines of input from (.ig & .de)
# The following state variables are used:
# ...
sub processLine
{
$doneLine = 1; # By default, this counts as a line for trap purposes
s,^\.if t ,,;
s,^\.el ,,; # conditions assumed to evaluate false, so else must be true...
if ($troffTable)
{
processTable();
}
elsif ($eqnMode == 2)
{
plainOutput("\n");
processEqns($_);
}
elsif (m/^\./)
{
processMacro();
}
else
{
processPlainText();
}
if ($doneLine)
{
# Called after processing (most) input lines to decrement trapLine. This is needed
# to implement the .it 1 trap after one line for .TP, where the first line is outdented
if ($trapLine > 0)
{
--$trapLine;
if ($trapLine == 0)
{
&$trapAction;
}
}
}
}
# Process plain text lines
sub processPlainText
{
if ($_ eq "")
{
lineBreak();
plainOutput("
\n");
return;
}
s,(\\f[23BI])([A-Z].*?)(\\f.),$1.($contents{"\U$2"}?"$2":$2).$3,ge;
if ($currentSection eq "SEE ALSO" && ! $cmdLineMode)
{
# Some people don't use BR or IR for see also refs
s,(^|\s)([-.A-Za-z_0-9]+)\s?\(([0-9lL][0-9a-zA-Z]*)\),\1$2($3),g;
}
outputLine("$_\n");
}
# Process macros and built-in directives
sub processMacro
{
outputOrigLine();
# Place macro arguments (space delimited unless within ") into @p
# Remove " from $_, place command in $c, remainder in $joined
@p = grep($_ !~ m/^\s*$/, split(/("[^"]*"|\s+)/) );
grep(s/"//g, @p);
$_ = join(" ", @p);
$p[0] =~ s/^\.//;
$c = $p[0];
$joined = join(" ", @p[1..$#p]);
$joined2 = join(" ", @p[2..$#p]);
$joined3 = join(" ", @p[3..$#p]);
if ($macro{$c}) # Expand macro
{
# Get full macro text
$macro = $macro{$c};
# Interpolate arguments
$macro =~ s,\\\$(\d),$p[$1],ge;
#print OUT "\n";
foreach $_ (split(/\n/, $macro))
{
$_ .= "\n";
preProcessLine();
processLine();
}
$doneLine = 0;
return;
}
elsif ($renamedMacro{$c})
{
$c = $renamedMacro{$c};
}
if ($c eq "ds") # Define string
{
$vars{$p[1]} = $joined2;
$doneLine = 0;
}
elsif ($c eq "nr") # Define number register
{
$number{$p[1]} = evalnum($joined2);
$doneLine = 0;
}
elsif ($c eq "ti") # Temporary indent
{
plainOutput(" ");
}
elsif ($c eq "rm")
{
$macroName = $p[1];
if ($macro{$macroName})
{
delete $macro{$macroName};
}
else
{
$deletedMacro{$macroName} = 1;
}
}
elsif ($c eq "rn")
{
$oldName = $p[1];
$newName = $p[2];
$macro = $macro{$oldName};
if ($macro)
{
if ($newName =~ $reservedMacros && ! $deletedMacro{$newName})
{
plainOutput("\n");
}
else
{
$macro{$newName} = $macro;
delete $deletedMacro{$newName};
}
delete $macro{$oldName};
}
else
{
# Support renaming of reserved macros by mapping occurrences of new name
# to old name after macro expansion so that built in definition is still
# available, also mark the name as deleted to override reservedMacro checks.
plainOutput("\n");
$renamedMacro{$newName} = $oldName;
$deletedMacro{$oldName} = 1;
}
}
elsif ($c eq "de" || $c eq "ig") # Define macro or ignore
{
$macroName = $p[1];
if ($c eq "ig")
{ $delim = ".$p[1]"; }
else
{ $delim = ".$p[2]"; }
$delim = ".." if ($delim eq ".");
# plainOutput("\n");
$macro = "";
$_ = getLine();
preProcessLine();
while ($_ ne $delim)
{
postProcessLine();
outputOrigLine();
$macro .= "$_\n";
$_ = getLine();
last if ($_ eq "");
preProcessLine();
}
outputOrigLine();
# plainOutput("\n");
if ($c eq "de")
{
if ($macroName =~ $reservedMacros && ! $deletedMacro{$macroName})
{
plainOutput("\n");
}
else
{
$macro{$macroName} = $macro;
delete $deletedMacro{$macroName};
}
}
}
elsif ($c eq "so") # Source
{
plainOutput("
\n");
}
elsif ($c eq "TH" || $c eq "Dt") # Man page title
{
endParagraph();
$sectionNumber = $p[2];
$sectionName = $sectionName{"\L$sectionNumber"};
$sectionName = "Manual Reference Pages" unless ($sectionName);
$pageName = "$p[1] ($sectionNumber)";
outputPageHead();
if ($c eq "TH")
{
$right = $p[3];
$left = $p[4];
$left = $osver unless ($left);
$macroPackage = "using man macros";
}
else
{
$macroPackage = "using doc macros";
}
}
elsif ($c eq "Nd")
{
outputLine("- $joined\n");
}
elsif ($c eq "SH" || $c eq "SS" || $c eq "Sh" || $c eq "Ss") # Section/subsection
{
lineBreak();
endNoFill();
endParagraph();
$id = $contents{"\U$joined"};
$currentSection = $joined;
if ($c eq "SH" || $c eq "Sh") \n\n \n
{
endBlockquote();
if ($firstSection++==1) # after first 'Name' section
{
outputContents();
}
outputLine( "\n\n
$joined
blockquote();
}
elsif ($joined =~ m/\\f/)
{
$joined =~ s/\\f.//g;
$id = $contents{"\U$joined"};
outputLine( "\n
$joined
\n" );
}
else
{
endBlockquote();
outputLine( "\n\n
$joined
blockquote();
}
lineBreak();
}
elsif ($c eq "TX" || $c eq "TZ") # Document reference
{
$title = $title{$p[1]};
$title = "Document [$p[1]]" unless ($title);
outputLine( "\\fI$title\\fP$joined2\n" );
}
elsif ($c eq "PD") # Line spacing
{
$noSpace = ($p[1] eq "0");
$doneLine = 0;
}
elsif ($c eq "TS") # Table start
{
unless ($macroPackage =~ /tbl/)
{
if ($macroPackage =~ /eqn/)
{ $macroPackage =~ s/eqn/eqn & tbl/; }
else
{ $macroPackage .= " with tbl support"; }
}
resetStyles();
endNoFill();
$troffTable = 1;
$troffSeparator = "\t";
plainOutput( "
\n" );
}
elsif ($c eq "EQ") # Eqn start
{
unless ($macroPackage =~ /eqn/)
{
if ($macroPackage =~ /tbl/)
{ $macroPackage =~ s/tbl/tbl & eqn/; }
else
{ $macroPackage .= " with eqn support"; }
}
$eqnMode = 2;
}
elsif ($c eq "ps") # Point size
{
plainOutput(&sizeChange($p[1]));
}
elsif ($c eq "ft") # Font change
{
plainOutput(&fontChange($p[1]));
}
elsif ($c eq "I" || $c eq "B") # Single word font change
{
$id = $contents{"\U$joined"};
if ($id && $joined =~ m/^[A-Z]/)
{ $joined = "$joined"; }
outputLine( "\\f$c$joined\\fP " );
plainOutput("\n") if ($noFill);
}
elsif ($c eq "SM") # Single word smaller
{
outputLine("\\s-1$joined\\s0 ");
$doneLine = 0 unless ($joined);
}
elsif ($c eq "SB") # Single word bold and small
{
outputLine("\\fB\\s-1$joined\\s0\\fP ");
}
elsif (m/^\.[BI]R (\S+)\s?\(\s?([0-9lL][0-9a-zA-Z]*)\s?\)(.*)$/)
{
# Special form, .BR is generally used for references to other pages
# Annoyingly, some people have more than one per line...
# Also, some people use .IR ...
for ($i=1; $i<=$#p; $i+=2)
{
$pair = $p[$i]." ".$p[$i+1];
if ($p[$i+1] eq "(")
{
$pair .= $p[$i+2].$p[$i+3];
$i += 2;
}
if ($pair =~ m/^(\S+)\s?\(\s?([0-9lL][0-9a-zA-Z]*)\s?\)(.*)$/)
{
if ($cmdLineMode)
{ outputLine( "\\fB$1\\fR($2)$3\n" ); }
else
{ outputLine( "$1($2)$3\n" ); }
}
else
{ outputLine( "$pair\n" ); }
}
}
elsif ($c eq "BR" || $c eq "BI" || $c eq "IB" ||
$c eq "IR" || $c eq "RI" || $c eq "RB")
{
$f1 = (substr($c ,0,1));
$f2 = (substr($c,1,1));# Check if first param happens to be a section name
$id = $contents{"\U$p[1]"};
if ($id && $p[1] =~ m/^[A-Z]/)
{
$p[1] = "$p[1]";
}for ($i=1; $i<=$#p; ++$i)
{
$f = ($i%2 == 1) ? $f1 : $f2;
outputLine("\\f$f$p[$i]");
}
outputLine("\\fP ");
plainOutput("\n") if ($noFill);
}
elsif ($c eq "nf" || $c eq "Bd") # No fill
{
startNoFill();
}
elsif ($c eq "fi" || $c eq "Ed") # Fill
{
endNoFill();
}
elsif ($c eq "HP")
{
$indent = evalnum($p[1]);
if ($trapOnBreak)
{
plainOutput("
\n");
}
else
{
# Outdent first line, ie. until next break
$trapOnBreak = 1;
$trapAction = *trapHP;
newParagraph($indent);
plainOutput( "
\n" );
$colState = 2;
}
}
elsif ($c eq "IP")
{
$trapOnBreak = 0;
$tag = $p[1];
$indent = evalnum($p[2]);
newParagraph($indent);
outputLine("\n$tag\n \n");
$colState = 1;
lineBreak();
}
elsif ($c eq "TP")
{
$trapOnBreak = 0;
$trapLine = 1; # Next line is tag, then next column
$doneLine = 0; # (But don't count this line)
$trapAction = *trapTP;
$indent = evalnum($p[1]);
$tag = lookahead();
chop $tag;
$i = ($indent ? $indent : $prevailingIndent) ;
$w = width($tag);
if ($w > $i)
{
plainOutput("\n") if ($debug);
newParagraph($indent);
$trapAction = *trapHP;
plainOutput( "\n" );
$colState = 2;
}
else
{
newParagraph($indent);
plainOutput( "\n" );
$colState = 0;
}
$body = lookahead();
$lookaheadPtr = 0;
if ($body =~ m/^\.[HILP]?P/)
{
chop $body;
plainOutput("\n");
$trapLine = 0;
}
}
elsif ($c eq "LP" || $c eq "PP" || $c eq "P" || $c eq "Pp") # Paragraph
{
$trapOnBreak = 0;
$prevailingIndent = 6;
if ($indent[$indentLevel] > 0 && $docListStyle eq "")
{
$line = lookahead();
if ($line =~ m/^\.(TP|IP|HP)/)
{
plainOutput("\n");
}
elsif ($line =~ m/^\.RS/)
{
plainOutput("\n");
}
else
{
endRow();
$foundTag = "";
$lookaheadPtr = 0;
do
{
$line = lookahead();
if ($line =~ m/^\.(TP|HP|IP|RS)( \d+)?/)
{
$indent = $2;
$indent = $prevailingIndent unless ($2);
if ($indent == $indent[$indentLevel])
{ $foundTag = $1; }
$line = "";
}
}
while ($line ne "" && $line !~ m/^\.(RE|SH|SS|PD)/);
$lookaheadPtr = 0;
if ($foundTag)
{
plainOutput("\n");
plainOutput("\n");
$colState = 2;
}
else
{
plainOutput("\n");
setIndent(0);
}
}
}
else
{
plainOutput("\n");
}
lineBreak();
}
elsif ($c eq "br") # Break
{
if ($trapOnBreak)
{
# Should this apply to all macros that cause a break?
$trapOnBreak = 0;
&$trapAction();
}
$needBreak = 1 if ($textSinceBreak);
}
elsif ($c eq "sp") # Space
{
lineBreak();
plainOutput("\n");
}
elsif ($c eq "RS") # Block indent start
{
if ($indentLevel==0 && $indent[0]==0)
{
blockquote();
}
else
{
$indent = $p[1];
$indent = $prevailingIndent unless ($indent);
if ($indent > $indent[$indentLevel] && !$extraIndent)
{
$extraIndent = 1;
++$indentLevel;
$indent[$indentLevel] = 0;
setIndent($indent-$indent[$indentLevel-1]);
plainOutput("\n");
$colState = 1;
}
elsif ($indent < $indent[$indentLevel] || $colState==2)
{
endRow();
setIndent($indent);
plainOutput("\n");
$colState = 1;
}
++$indentLevel;
$indent[$indentLevel] = 0;
}
$prevailingIndent = 6;
}
elsif ($c eq "RE") # Block indent end
{
if ($extraIndent)
{
endRow();
setIndent(0);
--$indentLevel;
$extraIndent = 0;
}
if ($indentLevel==0)
{
endParagraph();
if ($blockquote>0)
{
plainOutput("\n");
--$blockquote;
}
}
else
{
endRow();
setIndent(0);
--$indentLevel;
}
$prevailingIndent = $indent[$indentLevel];
$prevailingIndent = 6 unless($prevailingIndent);
}
elsif ($c eq "DT") # default tabs
{
@tabstops = ();
}
elsif ($c eq "ta") # Tab stops
{
@tabstops = ();
for ($i=0; $i<$#p; ++$i)
{
$ts = $p[$i+1];
$tb = 0;
if ($ts =~ m/^\+/)
{
$tb = $tabstops[$i-1];
$ts =~ s/^\+//;
}
$ts = evalnum($ts);
$tabstops[$i] = $tb + $ts;
}
plainOutput("\n") if ($debug);
}
elsif ($c eq "It") # List item (mdoc)
{
lineBreak();
if ($docListStyle eq "-tag")
{
endRow() unless($multilineIt);
if ($tagWidth)
{
setIndent($tagWidth);
}
else
{
setIndent(6);
$width = ""; # let table take care of own width
}
if ($p[1] eq "Xo")
{
plainOutput("");
}
else
{
$tag = &mdocStyle(@p[1..$#p]);
$body = lookahead();
if ($body =~ m/^\.It/)
{ $multilineItNext = 1; }
else
{ $multilineItNext = 0; }
if ($multilineIt)
{
outputLine("
\n$tag\n");
}
elsif ($multilineItNext || $tagWidth>0 && width($tag)>$tagWidth)
{
outputLine("$tag\n");
$colState = 2;
}
else
{
outputLine("$tag\n");
$colState = 1;
}
if ($multilineItNext)
{
$multilineIt = 1;
}
else
{
$multilineIt = 0;
if ($colState==2)
{ plainOutput("\n"); }
else
{ plainOutput("\n"); }
}
}
}
else
{
plainOutput("");
}
lineBreak();
}
elsif ($c eq "Xc")
{
if ($docListStyle eq "-tag")
{
plainOutput("\n");
}
}
elsif ($c eq "Bl") # Begin list (mdoc)
{
push @docListStyles, $docListStyle;
if ($p[1] eq "-enum")
{
plainOutput("\n");
$docListStyle = $p[1];
}
elsif($p[1] eq "-bullet")
{
plainOutput("\n");
$docListStyle = $p[1];
}
else
{
$docListStyle = "-tag";
if ($p[2] eq "-width")
{
$tagWidth = width($p[3]);
if ($tagWidth < 6) { $tagWidth = 6; }
}
else
{
$tagWidth = 0;
}
$multilineIt = 0;
}
}
elsif ($c eq "El") # End list
{
if ($docListStyle eq "-tag")
{
endRow();
setIndent(0);
}
elsif ($docListStyle eq "-bullet")
{
plainOutput("\n");
}
else
{
plainOutput("\n");
}
$docListStyle = pop @docListStyles;
}
elsif ($c eq "Os")
{
$right = $joined;
}
elsif ($c eq "Dd")
{
$left = $joined;
}
elsif ($c eq "Sx") # See section
{
$id = $contents{"\U$joined"};
if ($id && $joined =~ m/^[A-Z]/)
{
outputLine("".&mdocStyle(@p[1..$#p])."\n");
}
else
{
my $x = &mdocStyle(@p[1..$#p]);
$x =~ s/^ //;
outputLine($x."\n");
}
}
elsif (&mdocCallable($c))
{
my $x = &mdocStyle(@p);
$x =~ s/^ //;
outputLine($x."\n");
}
elsif ($c eq "Bx")
{
outputLine("BSD $joined\n");
}
elsif ($c eq "Ux")
{
outputLine("Unix $joined\n");
}
elsif ($c eq "At")
{
outputLine("AT&T $joined\n");
}
elsif ($c =~ m/[A-Z][a-z]/) # Unsupported doc directive
{
outputLine("
.$c $joined\n");
}
elsif ($c eq "") # Empty line (eg. troff comment)
{
$doneLine = 0;
}
else # Unsupported directive
{
# Unknown macros are ignored, and don't count as a line as far as trapLine goes
$doneLine = 0;
plainOutput("\n");
}
}sub trapTP
{
$lookaheadPtr = 0;
$body = lookahead();
if ($body =~ m/^\.TP/)
{
consume();
$trapLine = 1; # restore TP trap
$doneLine = 0; # don't count this line
plainOutput("
\n");
}
else
{
plainOutput("\n");
$colState = 1;
}
lineBreak();
}sub trapHP
{
$lookaheadPtr = 0;
$body = lookahead();
if ($body =~ m/^\.([TH]P)/)
{
consume();
# Restore appropriate type of trap
if ($1 eq "TP")
{
$trapLine = 1;
$doneLine = 0; # don't count this line
}
else
{
$trapOnBreak = 1;
}
plainOutput("
\n");
}
else
{
plainOutput("\n");
$colState = 1;
}
lineBreak();
}sub newParagraph
{
$indent = $_[0];
endRow();
startRow($indent);
}sub startRow
{
$indent = $_[0];
$indent = $prevailingIndent unless ($indent);
$prevailingIndent = $indent;
setIndent($indent);
plainOutput( "" );
}# End an existing HP/TP/IP/RS row
sub endRow
{
if ($indent[$indentLevel] > 0)
{
lineBreak();
plainOutput( "\n" );
}
}# Called when we output a line break tag. Only needs to be called once if
# calling plainOutput, but should call before and after if using outputLine.
sub lineBreak
{
$needBreak = 0;
$textSinceBreak = 0;
}# Called to reset all indents and pending paragraphs (eg. at the start of
# a new top level section).
sub endParagraph
{
++$indentLevel;
while ($indentLevel > 0)
{
--$indentLevel;
if ($indent[$indentLevel] > 0)
{
endRow();
setIndent(0);
}
}
}# Interpolate a number register (possibly autoincrementing)
sub numreg
{
return 0 + $number{$_[0]};
}# Evaluate a numeric expression
sub evalnum
{
$n = $_[0];
return "" if ($n eq "");
if ($n =~ m/i$/) # inches
{
$n =~ s/i//;
$n *= 10;
}
return 0+$n;
}sub setIndent
{
$tsb = $textSinceBreak;
$indent = evalnum($_[0]);
if ($indent==0 && $_[0] !~ m/^0/)
{
$indent = 6;
}
plainOutput("\n") if ($debug);
if ($indent[$indentLevel] != $indent)
{
lineBreak();
if ($indent[$indentLevel] > 0)
{
plainOutput("
") unless ($noSpace);
plainOutput("");
}
if ($indent > 0)
{
endNoFill();
$border = "";
$border = " border=1" if ($debug>2);
#plainOutput("") unless ($indent[$indentLevel] > 0);
plainOutput("0);
if ($noSpace)
{
plainOutput(" cellpadding=0 cellspacing=0>\n");
}
else
{
plainOutput(" cellpadding=3>".($tsb ? "\n
\n" : "\n") );
}
#$width = " width=".($indent*5); # causes text to be chopped if too big
$percent = $indent;
if ($indentLevel > 0)
{ $percent = $indent * 100 / (100-$indentLevel[0]); }
$width = " width=$percent%";
}
$indent[$indentLevel] = $indent;
}
}# Process mdoc style macros recursively, as one of the macro arguments
# may itself be the name of another macro to invoke.
sub mdocStyle
{
return "" unless @_;
my ($tag, @param) = @_;
my ($rest, $term);# Don't format trailing punctuation
if ($param[$#param] =~ m/^[.,;:]$/)
{
$term = pop @param;
}
if ($param[$#param] =~ m/^[)\]]$/)
{
$term = (pop @param).$term;
}if ($param[0] =~ m,\\\\,)
{
print STDERR "$tag: ",join(",", @param),"\n";
}
$rest = &mdocStyle(@param);if ($tag eq "Op")
{
$rest =~ s/ //; # remove first space
return " \\fP[$rest]$term";
}
elsif ($tag eq "Xr") # cross reference
{
my $p = shift @param;
my $url = $p;
if (@param==1)
{
$url .= ".".$param[0];
$rest = "(".$param[0].")";
}
else
{
$rest = &mdocStyle(@param);
}
if ($cmdLineMode)
{
return " ".$p."".$rest.$term;
}
else
{
return " ".$p."".$rest.$term;
}
}
elsif ($tag eq "Fl")
{
my ($sofar);
while (@param)
{
$f = shift @param;
if ($f eq "Ns") # no space
{
chop $sofar;
}
elsif (&mdocCallable($f))
{
unshift @param, $f;
return $sofar.&mdocStyle(@param).$term;
}
else
{
$sofar .= "-$f "
}
}
return $sofar.$term;
}
elsif ($tag eq "Pa" || $tag eq "Er" || $tag eq "Fn" || $tag eq "Dv")
{
return "\\fC$rest\\fP$term";
}
elsif ($tag eq "Ad" || $tag eq "Ar" || $tag eq "Em" || $tag eq "Fa" || $tag eq "St" ||
$tag eq "Ft" || $tag eq "Va" || $tag eq "Ev" || $tag eq "Tn" || $tag eq "%T")
{
return "\\fI$rest\\fP$term";
}
elsif ($tag eq "Nm")
{
$defaultNm = $param[0] unless ($defaultNm);
$rest = $defaultNm unless ($param[0]);
return "\\fB$rest\\fP$term";
}
elsif ($tag eq "Ic" || $tag eq "Cm" || $tag eq "Sy")
{
return "\\fB$rest\\fP$term";
}
elsif ($tag eq "Ta") # Tab
{
# Tabs are used inconsistently so this is the best we can do. Columns won't line up. Tough.
return " $rest$term";
}
elsif ($tag eq "Ql")
{
$rest =~ s/ //;
return "`$rest'$term";
}
elsif ($tag eq "Dl")
{
return "$rest$term
\n";
}
elsif ($tag =~ m/^[ABDEOPQS][qoc]$/)
{
$lq = "";
$rq = "";
if ($tag =~ m/^A/)
{ $lq = "<"; $rq = ">"; }
elsif ($tag =~ m/^B/)
{ $lq = "["; $rq = "]"; }
elsif ($tag =~ m/^D/)
{ $lq = "\""; $rq = "\""; }
elsif ($tag =~ m/^P/)
{ $lq = "("; $rq = ")"; }
elsif ($tag =~ m/^Q/)
{ $lq = "\""; $rq = "\""; }
elsif ($tag =~ m/^S/)
{ $lq = "\\'"; $rq = "\\'"; }
elsif ($tag =~ m/^O/)
{ $lq = "["; $rq = "]"; }
if ($tag =~ m/^.o/)
{ $rq = ""; }
if ($tag =~ m/^.c/)
{ $lq = ""; }
$rest =~ s/ //;
return $lq.$rest.$rq.$term ;
}
elsif (&mdocCallable($tag)) # but not in list above...
{
return $rest.$term;
}
elsif ($tag =~ m/^[.,;:()\[\]]$/) # punctuation
{
return $tag.$rest.$term;
}
elsif ($tag eq "Ns")
{
return $rest.$term;
}
else
{
return " ".$tag.$rest.$term;
}
}# Determine if a macro is mdoc parseable/callable
sub mdocCallable
{
return ($_[0] =~ m/^(Op|Fl|Pa|Er|Fn|Ns|No|Ad|Ar|Xr|Em|Fa|Ft|St|Ic|Cm|Va|Sy|Nm|Li|Dv|Ev|Tn|Pf|Dl|%T|Ta|Ql|[ABDEOPQS][qoc])$/);
}# Estimate the output width of a string
sub width
{
local($word) = $_[0];
$word =~ s,<[/A-Z][^>]*>,,g; # remove any html tags
$word =~ s/^\.\S+\s//;
$word =~ s/\\..//g;
$x = length($word);
$word =~ s/[ ()|.,!;:"']//g; # width of punctuation is about half a character
return ($x + length($word)) / 2;
}# Process a tbl table (between TS/TE tags)
sub processTable
{
if ($troffTable == "1")
{
@troffRowDefs = ();
@tableRows = ();
$hadUnderscore = 0;
while(1)
{
outputOrigLine();
if (m/;\s*$/)
{
$troffSeparator = quotemeta($1) if (m/tab\s*\((.)\)/);
}
else
{
s/\.\s*$//;
s/\t/ /g;
s/^[^lrcan^t]*//; # remove any 'modifiers' coming before tag
# delimit on tags excluding s (viewed as modifier of previous column)
s/([lrcan^t])/\t$1/g;
s/^\t//;
push @troffRowDefs, $_;
last if ($origLine =~ m/\.\s*$/);
}
$_ = getLine();
preProcessLine();
}
$troffTable = 2;
return;
}s/$troffSeparator/\t/g;
if ($_ eq ".TE")
{
endTblRow();
flushTable();
$troffTable = 0;
plainOutput("
\n");
}
elsif ($_ eq ".T&")
{
endTblRow();
flushTable();
$troffTable = 1;
}
elsif (m/[_=]/ && m/^[_=\t]*$/ && $troffCol==0)
{
if (m/^[_=]$/)
{
flushTable();
plainOutput("
\n");
$hadUnderscore = 1;
}
elsif ($troffCol==0 && @troffRowDefs)
{
# Don't output a row, but this counts as a row as far as row defs go
$rowDef = shift @troffRowDefs;
@troffColDefs = split(/\t/, $rowDef);
}
}
elsif (m/^\.sp/ && $troffCol==0 && !$hadUnderscore)
{
flushTable();
plainOutput("
\n");
}
elsif ($_ eq ".br" && $troffMultiline)
{
$rowref->[$troffCol] .= "
\n";
}
elsif ($_ !~ m/^\./)
{
$rowref = $tableRows[$#tableRows]; # reference to current row (last row in array)
if ($troffCol==0 && @troffRowDefs)
{
$rowDef = shift @troffRowDefs;
if ($rowDef =~ m/^[_=]/)
{
$xxx = $_;
flushTable();
plainOutput("
\n");
$hadUnderscore = 1;
$_ = $xxx;
$rowDef = shift @troffRowDefs;
}
@troffColDefs = split(/\t/, $rowDef);
}
if ($troffCol == 0 && !$troffMultiline)
{
$rowref = [];
push(@tableRows, $rowref);
#plainOutput("
}
#{
if (m/T}/)
{
$troffMultiline = 0;
}
if ($troffMultiline)
{
$rowref->[$troffCol] .= "$_\n";
return;
}
@columns = split(/\t/, $_);
plainOutput("\n") if ($debug);
while ($troffCol <= $#troffColDefs && @columns > 0)
{
$def = $troffColDefs[$troffCol];
$col = shift @columns;
$col =~ s/\s*$//;
$align = "";
$col = "\\^" if ($col eq "" && $def =~ m/\^/);
$col = " " if ($col eq "");
$style1 = "";
$style2 = "";
if ($col ne "\\^")
{
if ($def =~ m/[bB]/ || $def =~ m/f3/)
{ $style1 = "\\fB"; $style2 = "\\fP"; }
if ($def =~ m/I/ || $def =~ m/f2/)
{ $style1 = "\\fI"; $style2 = "\\fP"; }
}
if ($def =~ m/c/) { $align = " align=center"; }
if ($def =~ m/[rn]/) { $align = " align=right"; }
$span = $def;
$span =~ s/[^s]//g;
if ($span) { $align.= " colspan=".(length($span)+1); }
#{
if ($col =~ m/T}/)
{
$rowref->[$troffCol] .= "$style2";
++$troffCol;
}
elsif ($col =~ m/T{/) #}
{
$col =~ s/T{//; #}
$rowref->[$troffCol] = "
$troffMultiline = 1;
}
else
{
$rowref->[$troffCol] = "
";
++$troffCol;
}
}
endTblRow() unless ($troffMultiline);
}
}
sub endTblRow
{
return if ($troffCol == 0);
while ($troffCol <= $#troffColDefs)
{
$rowref->[$troffCol] = "
";
#print OUT "
";
++$troffCol;
}
$troffCol = 0;
#print OUT "
\n"
}
sub flushTable
{
plainOutput("\n") if ($debug);
# Treat rows with first cell blank or with more than one vertically
# spanned row as a continuation of the previous line.
# Note this is frequently a useful heuristic but isn't foolproof.
for($r=0; $r<$#tableRows; ++$r)
{
$vspans = 0;
for ($c=0; $c<=$#{$tableRows[$r+1]}; ++$c)
{++$vspans if ($tableRows[$r+1][$c] =~ m,
,);}
if ((($vspans>1) || ($tableRows[$r+1][0] =~ m,
,)) &&
$#{$tableRows[$r]} == $#{$tableRows[$r+1]} && 0)
{
if ($debug)
{
plainOutput("\n");
plainOutput("\n");
plainOutput("\n");
}
for ($c=0; $c<=$#{$tableRows[$r]}; ++$c)
{
$tableRows[$r][$c] .= $tableRows[$r+1][$c];
$tableRows[$r][$c] =~ s,\\\^,,g; # merging is stronger than spanning!
$tableRows[$r][$c] =~ s,
,;
}
@tableRows = (@tableRows[0..$r], @tableRows[$r+2 .. $#tableRows]);
--$r; # process again
}
}
# Turn \^ vertical span requests into rowspan tags
for($r=0; $r<$#tableRows; ++$r)
{
for ($c=0; $c<=$#{$tableRows[$r]}; ++$c)
{
$r2 = $r+1;
while ( $r2<=$#tableRows && ($tableRows[$r2][$c] =~ m,
,) )
{
++$r2;
}
$rs = $r2-$r;
if ($rs > 1)
{
plainOutput("\n") if ($debug);
$tableRows[$r][$c] =~ s/
{
if ($tableRows[$r][$c] =~ m/
@$rowref = (@{$rowref}[0..$c-1], @{$rowref}[$c+1..$#$rowref]);
}
}
}
}
# Finally, output the cells that are left
for($r=0; $r<=$#tableRows; ++$r)
{
plainOutput("
for ($c=0; $c <= $#{$tableRows[$r]}; ++$c)
{
outputLine($tableRows[$r][$c]);
}
plainOutput("
\n");
}
@tableRows = ();
$troffCol = 0;
plainOutput("\n") if ($debug);
}
# Use these for all font changes, including .ft, .ps, .B, .BI, .SM etc.
# Need to add a mechanism to stack up these changes so tags match: ... etc.
sub pushStyle
{
$result = "";
$type = $_[0];
$tag = $_[1];
print OUT "1);
@oldItems = ();
if (grep(m/^$type/, @styleStack))
{
print OUT "undoing up to old $type " if ($debug>1);
while (@styleStack)
{
# search back, undoing intervening tags in reverse order
$oldItem = pop @styleStack;
($oldTag) = ($oldItem =~ m/^.(\S+)/);
$result .= "";
if (substr($oldItem,0,1) eq $type)
{
print OUT "found $oldItem " if ($debug>1);
while (@oldItems)
{
# restore the intermediates again
$oldItem = shift @oldItems;
push(@styleStack, $oldItem);
$result .= "<".substr($oldItem,1).">";
}
last;
}
else
{
unshift(@oldItems, $oldItem);
}
}
}
print OUT "oldItems=(@oldItems) " if ($debug>1);
push(@styleStack, @oldItems); # if we didn't find anything of type
if ($tag)
{
$result .= "<$tag>";
push(@styleStack, $type.$tag);
}
print OUT "-> '$result' -->\n" if ($debug>1);
return $result;
}
sub resetStyles
{
if (@styleStack)
{
print OUT "\n";
print OUT "
resetStyles [".join(",", @styleStack)."]
\n" if ($debug);
}
while (@styleStack)
{
$oldItem = pop @styleStack;
($oldTag) = ($oldItem =~ m/^.(\S+)/);
print OUT "";
}
$currentSize = 0;
$currentShift = 0;
}
sub blockquote
{
print OUT "
\n";
++$blockquote;
}sub endBlockquote
{
resetStyles();
while ($blockquote > 0)
{
print OUT "
\n";
--$blockquote;
}
}
sub indent
{
plainOutput(pushStyle("I", "TABLE"));
$width = $_[0];
$width = " width=$width%" if ($width);
plainOutput("
}
sub outdent
{
plainOutput("
\n");
plainOutput(pushStyle("I"));
}
sub inlineStyle
{
$_[0] =~ m/^(.)(.*)$/;
if ($1 eq "f")
{ fontChange($2); }
elsif ($1 eq "s" && ! $noFill)
{ sizeChange($2); }
else
{ superSub($1); }
}
sub fontChange
{
$fnt = $_[0];
$fnt =~ s/^\(//;
if ($fnt eq "P" || $fnt eq "R" || $fnt eq "1" || $fnt eq "")
{ $font = ""; }
elsif ($fnt eq "B" || $fnt eq "3")
{ $font = "B"; }
elsif ($fnt eq "I" || $fnt eq "2")
{ $font = "I"; }
else
{ $font = "TT"; }
return pushStyle("F", $font);
}
sub sizeChange
{
$size= $_[0];
if ($size =~ m/^[+-]/)
{ $currentSize += $size; }
else
{ $currentSize = $size-10; }
$currentSize = 0 if (! $size);
$sz = $currentSize;
$sz = -2 if ($sz < -2);
$sz = 2 if ($sz > 2);
if ($currentSize eq "0")
{ $size = ""; }
else
{ $size = "FONT size=$sz"; }
return pushStyle("S", $size);
}
sub superSub
{
$sub = $_[0];
++$currentShift if ($sub eq "u");
--$currentShift if ($sub eq "d");
$tag = "";
$tag = "SUP" if ($currentShift > 0);
$tag = "SUB" if ($currentShift < 0);
return pushStyle("D", $tag);
}
sub startNoFill
{
print OUT "\n" unless($noFill);
$noFill = 1;
}
sub endNoFill
{
print OUT "\n" if ($noFill);
$noFill = 0;
}
sub processEqns
{
if ($eqnMode==2 && $_[0] =~ m/^\.EN/)
{
$eqnMode = 0;
outputLine(flushEqn());
plainOutput("\n");
return;
}
$eqnBuffer .= $_[0]." ";
}
sub processEqnd
{
processEqns(@_);
return flushEqn();
}
sub flushEqn
{
@p = grep($_ !~ m/^ *$/, split(/("[^"]*"|\s+|[{}~^])/, $eqnBuffer) );
$eqnBuffer = "";
#return "[".join(',', @p)." -> ".&doEqn(@p)."]\n";
$res = &doEqn(@p);
#$res =~ s,\\\((..),$special{$1}||"\\($1",ge;
#$res =~ s,<,<,g;
#$res =~ s,>,>,g;
return $res;
}
sub doEqn
{
my @p = @_;
my $result = "";
my $res;
my $c;
while (@p)
{
($res, @p) = doEqn1(@p);
$result .= $res;
}
return $result;
}
sub doEqn1
{
my @p = @_;
my $res = "";
my $c;
$c = shift @p;
if ($eqndefs{$c})
{
@x = split(/\0/, $eqndefs{$c});
unshift @p, @x;
$c = shift @p;
}
if ($c =~ m/^"(.*)"$/)
{
$res = $1;
}
elsif ($c eq "delim")
{
$c = shift @p;
if ($c eq "off")
{
$eqnStart = "";
$eqnEnd = "";
}
else
{
$c =~ m/^(.)(.)/;
$eqnStart = quotemeta($1);
$eqnEnd = quotemeta($2);
}
}
elsif ($c eq "define" || $c eq "tdefine" || $c eq "ndefine")
{
$t = shift @p;
$d = shift @p;
$def = "";
if (length($d) != 1)
{
$def = $d;
$def =~ s/^.(.*)./\1/;
}
else
{
while (@p && $p[0] ne $d)
{
$def .= shift @p;
$def .= "\0";
}
chop $def;
shift @p;
}
$eqndefs{$t} = $def unless ($c eq "ndefine");
}
elsif ($c eq "{")
{
my $level = 1;
my $i;
for ($i=0; $i<=$#p; ++$i)
{
++$level if ($p[$i] eq "{");
--$level if ($p[$i] eq "}");
last if ($level==0);
}
$res = doEqn(@p[0..$i-1]);
@p = @p[$i+1..$#p];
}
elsif ($c eq "sup")
{
($c,@p) = &doEqn1(@p);
$res = "\\u$c\\d";
}
elsif ($c eq "to")
{
($c,@p) = &doEqn1(@p);
$res = "\\u$c\\d ";
}
elsif ($c eq "sub" || $c eq "from")
{
($c,@p) = &doEqn1(@p);
$res = "\\d$c\\u";
}
elsif ($c eq "matrix")
{
($c,@p) = &doEqn1(@p);
$res = "matrix ( $c )";
}
elsif ($c eq "bold")
{
($c,@p) = &doEqn1(@p);
$res = "\\fB$c\\fP";
}
elsif ($c eq "italic")
{
($c,@p) = &doEqn1(@p);
$res = "\\fI$c\\fP";
}
elsif ($c eq "roman")
{
}
elsif ($c eq "font" || $c eq "gfont" || $c eq "size" || $c eq "gsize")
{
shift @p;
}
elsif ($c eq "mark" || $c eq "lineup")
{
}
elsif ($c eq "~" || $c eq "^")
{
$res = " ";
}
elsif ($c eq "over")
{
$res = " / ";
}
elsif ($c eq "half")
{
$res = "\\(12";
}
elsif ($c eq "prime")
{
$res = "\\' ";
}
elsif ($c eq "dot")
{
$res = "\\u.\\d ";
}
elsif ($c eq "dotdot")
{
$res = "\\u..\\d ";
}
elsif ($c eq "tilde")
{
$res = "\\u~\\d ";
}
elsif ($c eq "hat")
{
$res = "\\u^\\d ";
}
elsif ($c eq "bar" || $c eq "vec")
{
$res = "\\(rn ";
}
elsif ($c eq "under")
{
$res = "_ ";
}
elsif ( $c eq "sqrt" || $c eq "lim" || $c eq "sum" || $c eq "pile" || $c eq "lpile" ||
$c eq "rpile" || $c eq "cpile" || $c eq "int" || $c eq "prod" )
{
$res = " $c ";
}
elsif ($c eq "cdot")
{
$res = " . ";
}
elsif ($c eq "inf")
{
$res = "\\(if";
}
elsif ($c eq "above" || $c eq "lcol" || $c eq "ccol")
{
$res = " ";
}
elsif ($c eq "sin" || $c eq "cos" || $c eq "tan" || $c eq "log" || $c eq "ln" )
{
$res = " $c ";
}
elsif ($c eq "left" || $c eq "right" || $c eq "nothing")
{
}
elsif ($c =~ m/^[A-Za-z]/)
{
$res = "\\fI$c\\fP";
}
else
{
$res = $c;
}
return ($res, @p);
}
##### Search manpath and initialise special char array #####
sub initialise
{
# Parse the macro definition file for section names
if (open(MACRO, "/usr/lib/tmac/tmac.an") ||
open(MACRO, "/usr/lib/tmac/an") ||
open(MACRO, "/usr/lib/groff/tmac/tmac.an") ||
open(MACRO, "/usr/share/tmac/tmac.an") ||
open(MACRO, "/usr/share/groff/tmac/tmac.an") )
{
while ()
{
chop;
if (m/\$2'([0-9a-zA-Z]+)' .ds ]D (.*)$/)
{
$sn = $2;
unless ($sn =~ m/[a-z]/)
{
$sn = "\u\L$sn";
$sn =~ s/ (.)/ \u\1/g;
}
$sectionName{"\L$1"} = $sn;
}
if (m/\$1'([^']+)' .ds Tx "?(.*)$/)
{
$title{"$1"} = $2;
}
if (m/^.ds ]W (.*)$/)
{
$osver = $1;
}
}
}
else
{
print STDERR "Failed to read tmac.an definitions\n" unless ($cgiMode);
}
if (open(MACRO, "/usr/lib/tmac/tz.map"))
{
while ()
{
chop;
if (m/\$1'([^']+)' .ds Tz "?(.*)$/)
{
$title{"$1"} = $2;
}
}
}
# Prevent redefinition of macros that have special meaning to us
$reservedMacros = '^(SH|SS|Sh|Ss)$';
# Predefine special number registers
$number{'.l'} = 75;
# String variables defined by man package
$vars{'lq'} = '';
$vars{'rq'} = '';
$vars{'R'} = '\\(rg';
$vars{'S'} = '\\s0';
# String variables defined by mdoc package
$vars{'Le'} = '\\(<=';
$vars{'<='} = '\\(<=';
$vars{'Ge'} = '\\(>=';
$vars{'Lt'} = '<';
$vars{'Gt'} = '>';
$vars{'Ne'} = '\\(!=';
$vars{'>='} = '\\(>=';
$vars{'q'} = '"'; # see also special case in preProcessLine
$vars{'Lq'} = '';
$vars{'Rq'} = '';
$vars{'ua'} = '\\(ua';
$vars{'ga'} = '\\(ga';
$vars{'Pi'} = '\\(*p';
$vars{'Pm'} = '\\(+-';
$vars{'Na'} = 'NaN';
$vars{'If'} = '\\(if';
$vars{'Ba'} = '|';
# String variables defined by ms package (access to accented characters)
$vars{'bu'} = '»';
$vars{'66'} = '';
$vars{'99'} = '';
$vars{'*!'} = '¡';
$vars{'ct'} = '¢';
$vars{'po'} = '£';
$vars{'gc'} = '¤';
$vars{'ye'} = '¥';
#$vars{'??'} = '¦';
$vars{'sc'} = '§';
$vars{'*:'} = '¨';
$vars{'co'} = '©';
$vars{'_a'} = 'ª';
$vars{'<<'} = '«';
$vars{'no'} = '¬';
$vars{'hy'} = '';
$vars{'rg'} = '®';
$vars{'ba'} = '¯';
$vars{'de'} = '°';
$vars{'pm'} = '±';
#$vars{'??'} = '²';
#$vars{'??'} = '³';
$vars{'aa'} = '´';
$vars{'mu'} = 'µ';
$vars{'pg'} = '¶';
$vars{'c.'} = '·';
$vars{'cd'} = '¸';
#$vars{'??'} = '¹';
$vars{'_o'} = 'º';
$vars{'>>'} = '»';
$vars{'14'} = '¼';
$vars{'12'} = '½';
#$vars{'??'} = '¾';
$vars{'*?'} = '¿';
$vars{'`A'} = 'À';
$vars{"'A"} = 'Á';
$vars{'^A'} = 'Â';
$vars{'~A'} = 'Ã';
$vars{':A'} = 'Ä';
$vars{'oA'} = 'Å';
$vars{'AE'} = 'Æ';
$vars{',C'} = 'Ç';
$vars{'`E'} = 'È';
$vars{"'E"} = 'É';
$vars{'^E'} = 'Ê';
$vars{':E'} = 'Ë';
$vars{'`I'} = 'Ì';
$vars{"'I"} = 'Í';
$vars{'^I'} = 'Î';
$vars{':I'} = 'Ï';
$vars{'-D'} = 'Ð';
$vars{'~N'} = 'Ñ';
$vars{'`O'} = 'Ò';
$vars{"'O"} = 'Ó';
$vars{'^O'} = 'Ô';
$vars{'~O'} = 'Õ';
$vars{':O'} = 'Ö';
#$vars{'mu'} = '×';
$vars{'NU'} = 'Ø';
$vars{'`U'} = 'Ù';
$vars{"'U"} = 'Ú';
$vars{'^U'} = 'Û';
$vars{':U'} = 'Ü';
#$vars{'??'} = 'Ý';
$vars{'Th'} = 'Þ';
$vars{'*b'} = 'ß';
$vars{'`a'} = 'à';
$vars{"'a"} = 'á';
$vars{'^a'} = 'â';
$vars{'~a'} = 'ã';
$vars{':a'} = 'ä';
$vars{'oa'} = 'å';
$vars{'ae'} = 'æ';
$vars{',c'} = 'ç';
$vars{'`e'} = 'è';
$vars{"'e"} = 'é';
$vars{'^e'} = 'ê';
$vars{':e'} = 'ë';
$vars{'`i'} = 'ì';
$vars{"'i"} = 'í';
$vars{'^i'} = 'î';
$vars{':i'} = 'ï';
#$vars{'??'} = 'ð';
$vars{'~n'} = 'ñ';
$vars{'`o'} = 'ò';
$vars{"'o"} = 'ó';
$vars{'^o'} = 'ô';
$vars{'~o'} = 'õ';
$vars{':o'} = 'ö';
$vars{'di'} = '÷';
$vars{'nu'} = 'ø';
$vars{'`u'} = 'ù';
$vars{"'u"} = 'ú';
$vars{'^u'} = 'û';
$vars{':u'} = 'ü';
#$vars{'??'} = 'ý';
$vars{'th'} = 'þ';
$vars{':y'} = 'ÿ';
# troff special characters and their closest equivalent
$special{'em'} = '';
$special{'hy'} = '-';
$special{'\-'} = ''; # was -
$special{'bu'} = 'o';
$special{'sq'} = '[]';
$special{'ru'} = '_';
$special{'14'} = '¼';
$special{'12'} = '½';
$special{'34'} = '¾';
$special{'fi'} = 'fi';
$special{'fl'} = 'fl';
$special{'ff'} = 'ff';
$special{'Fi'} = 'ffi';
$special{'Fl'} = 'ffl';
$special{'de'} = '°';
$special{'dg'} = ''; # was 182, para symbol
$special{'fm'} = "\\'";
$special{'ct'} = '¢';
$special{'rg'} = '®';
$special{'co'} = '©';
$special{'pl'} = '+';
$special{'mi'} = '-';
$special{'eq'} = '=';
$special{'**'} = '*';
$special{'sc'} = '§';
$special{'aa'} = '´'; # was '
$special{'ga'} = '`'; # was `
$special{'ul'} = '_';
$special{'sl'} = '/';
$special{'*a'} = 'a';
$special{'*b'} = 'ß';
$special{'*g'} = 'y';
$special{'*d'} = 'd';
$special{'*e'} = 'e';
$special{'*z'} = 'z';
$special{'*y'} = 'n';
$special{'*h'} = 'th';
$special{'*i'} = 'i';
$special{'*k'} = 'k';
$special{'*l'} = 'l';
$special{'*m'} = 'µ';
$special{'*n'} = 'v';
$special{'*c'} = '3';
$special{'*o'} = 'o';
$special{'*p'} = 'pi';
$special{'*r'} = 'p';
$special{'*s'} = 's';
$special{'*t'} = 't';
$special{'*u'} = 'u';
$special{'*f'} = 'ph';
$special{'*x'} = 'x';
$special{'*q'} = 'ps';
$special{'*w'} = 'o';
$special{'*A'} = 'A';
$special{'*B'} = 'B';
$special{'*G'} = '|\\u_\\d';
$special{'*D'} = '/\';
$special{'*E'} = 'E';
$special{'*Z'} = 'Z';
$special{'*Y'} = 'H';
$special{'*H'} = 'TH';
$special{'*I'} = 'I';
$special{'*K'} = 'K';
$special{'*L'} = 'L';
$special{'*M'} = 'M';
$special{'*N'} = 'N';
$special{'*C'} = 'Z';
$special{'*O'} = 'O';
$special{'*P'} = '||';
$special{'*R'} = 'P';
$special{'*S'} = 'S';
$special{'*T'} = 'T';
$special{'*U'} = 'Y';
$special{'*F'} = 'PH';
$special{'*X'} = 'X';
$special{'*Q'} = 'PS';
$special{'*W'} = 'O';
$special{'ts'} = 's';
$special{'sr'} = 'v/';
$special{'rn'} = '\\u\\d'; # was 175
$special{'>='} = '>=';
$special{'<='} = '<=';
$special{'=='} = '==';
$special{'~='} = '~=';
$special{'ap'} = '~'; # was ~
$special{'!='} = '!=';
$special{'->'} = '->';
$special{'<-'} = '<-';
$special{'ua'} = '^';
$special{'da'} = 'v';
$special{'mu'} = '×';
$special{'di'} = '÷';
$special{'+-'} = '±';
$special{'cu'} = 'U';
$special{'ca'} = '^';
$special{'sb'} = '(';
$special{'sp'} = ')';
$special{'ib'} = '(=';
$special{'ip'} = '=)';
$special{'if'} = 'oo';
$special{'pd'} = '6';
$special{'gr'} = 'V';
$special{'no'} = '¬';
$special{'is'} = 'I';
$special{'pt'} = '~';
$special{'es'} = 'Ø';
$special{'mo'} = 'e';
$special{'br'} = '|';
$special{'dd'} = ''; # was 165, yen
$special{'rh'} = '=>';
$special{'lh'} = '<=';
$special{'or'} = '|';
$special{'ci'} = 'O';
$special{'lt'} = '(';
$special{'lb'} = '(';
$special{'rt'} = ')';
$special{'rb'} = ')';
$special{'lk'} = '|';
$special{'rk'} = '|';
$special{'bv'} = '|';
$special{'lf'} = '|';
$special{'rf'} = '|';
$special{'lc'} = '|';
$special{'rc'} = '|';
# Not true troff characters but very common typos
$special{'cp'} = '©';
$special{'tm'} = '®';
$special{'en'} = '-';
# Build a list of directories containing man pages
@manpath = ();
if (open(MPC, "/etc/manpath.config") || open(MPC, "/etc/man.config"))
{
while ()
{
if (m/^(MANDB_MAP|MANPATH)\s+(\S+)/)
{
push(@manpath, $2);
}
}
}
@manpath = split(/:/, $ENV{'MANPATH'}) unless (@manpath);
@manpath = ("/usr/man") unless (@manpath);
}
# Search through @manpath and construct @mandirs (non-empty subsections)
sub loadManDirs
{
return if (@mandirs);
print STDERR "Searching ",join(":", @manpath)," for mandirs\n" unless($cgiMode);
foreach $tld (@manpath)
{
$tld =~ m/^(.*)$/;
$tld = $1; # untaint manpath
if (opendir(DIR, $tld))
{
# foreach $d (<$tld/man[0-9a-z]*>)
foreach $d (sort readdir(DIR))
{
if ($d =~ m/^man\w/ && -d "$tld/$d")
{
push (@mandirs, "$tld/$d");
}
}
closedir DIR;
}
}
}
##### Utility to search manpath for a given command #####
sub findPage
{
$request = $_[0];
$request =~ s,^/,,;
@multipleMatches = ();
$file = $_[0];
return $file if (-f $file || -f "$file.gz" || -f "$file.bz2");
# Search the path for the requested man page, which may be of the form:
# "/usr/man/man1/ls.1", "ls.1" or "ls".
($page,$sect) = ($request =~ m/^(.+)\.([^.]+)$/);
$sect = "\L$sect";
# Search the specified section first (if specified)
if ($sect)
{
foreach $md (@manpath)
{
$dir = $md;
$file = "$dir/man$sect/$page.$sect";
push(@multipleMatches, $file) if (-f $file || -f "$file.gz" || -f "$file.bz2");
}
}
else
{
$page = $request;
}
if (@multipleMatches == 1)
{
return pop @multipleMatches;
}
# If not found need to search through each directory
loadManDirs();
foreach $dir (@mandirs)
{
($s) = ($dir =~ m/man([0-9A-Za-z]+)$/);
$file = "$dir/$page.$s";
push(@multipleMatches, $file) if (-f $file || -f "$file.gz" || -f "$file.bz2");
$file = "$dir/$request";
push(@multipleMatches, $file) if (-f $file || -f "$file.gz" || -f "$file.bz2");
if ($sect && "$page.$sect" ne $request)
{
$file = "$dir/$page.$sect";
push(@multipleMatches, $file) if (-f $file || -f "$file.gz" || -f "$file.bz2");
}
}
if (@multipleMatches == 1)
{
return pop @multipleMatches;
}
if (@multipleMatches > 1)
{
return "";
}
# Ok, didn't find it using section numbers. Perhaps there's a page with the
# right name but wrong section number lurking there somewhere. (This search is slow)
# eg. page.1x in man1 (not man1x) directory
foreach $dir (@mandirs)
{
opendir(DIR, $dir);
foreach $f (readdir DIR)
{
if ($f =~ m/^$page\./)
{
$f =~ s/\.(gz|bz2)$//;
push(@multipleMatches, "$dir/$f");
}
}
}
if (@multipleMatches == 1)
{
return pop @multipleMatches;
}
return "";
}
sub loadPerlPages
{
my ($dir,$f,$name,@files);
loadManDirs();
return if (%perlPages);
foreach $dir (@mandirs)
{
if (opendir(DIR, $dir))
{
@files = sort readdir DIR;
foreach $f (@files)
{
next if ($f eq "." || $f eq ".." || $f !~ m/\./);
next unless ("$dir/$f" =~ m/perl/);
$f =~ s/\.(gz|bz2)$//;
($name) = ($f =~ m,(.+)\.[^.]*$,);
$perlPages{$name} = "$dir/$f";
}
closedir DIR;
}
}
delete $perlPages{'perl'}; # too ubiquitous to be useful
}
sub fmtTime
{
my $time = $_[0];
my @days = qw (Sun Mon Tue Wed Thu Fri Sat);
my @months = qw (Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$istdst) = localtime($time);
return sprintf ("%s, %02d %s %4d %02d:%02d:%02d GMT",
$days[$wday],$mday,$months[$mon],1900+$year,$hour,$min,$sec);
}