#!/usr/bin/perl -w
# 
# Input: the wiz_nnnn.txt files from http://h71000.www7.hp.com/wizard/wizard.zip
#        in $SRCDIR
# Can be called with -v in which case it will be more verbose about the conversion
# Can be called with a single (or multiple space-separated) wiz_nnnn.txt filenames
# in which case it only processes these
# 
# Output: wiz_nnnn.html and index.html in $TGTDIR
# 
# will make remarks about empty articles, topics, questions, answers, etc.
# 
# Author: Martin Vorlaender <mv@pdv-systeme.de> <martin@radiogaga.harz.de>
# 
# This program is free software. It may be used, redistributed and/or modified under
# the terms of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html)
# 
# The author takes no reliability whatsoever for it ;-)

use strict;
use Getopt::Std;
use Time::Local ();
use Text::Wrap ();

# input and output directories
my $SRCDIR = '.';
my $TGTDIR = '..';

# Pattern for generated filenames
my $filename_pattern = 'wiz_%05d.html';

# top title
my $title = 'Ask The Wizard';

# replacement topic for articles with an empty topic
my %replacement_topic = (
	1086 => 'Multithreading on a single CPU',
	1269 => 'PPP DIAL_OUT as plugin?',
	1272 => 'Error message strings from C',
	1313 => 'Ctrl-Y and C signals',
);

# replacements in the question parts
# (evaluable perl code!)
my %replacement_question = (
	1140 => 's|the file name is:\nwiz_(1020).html|topic ($1)|',
	1888 => 's|In this "Ask the Wizard" exchange:\n\nhttp://www.openvms.digital.com/wizard/wiz_(0580).html\n\n|In topic ($1) |; s|\([12]\) ||g',
	1918 => 's|articles wiz_(0817), (1089) and (1122)|topics ($1), ($2) and ($3)|',
	2320 => 's| \nhttp://www.openvms.digital.com/wizard/wiz_(0047).html\n\n|topic ($1) |',
	2394 => 's|\nhttp://www.openvms.digital.com/wizard/wiz_(1540).html| topic ($1)|',
	2681 => 's|wiz_(1661).html|($1)|',
	2697 => 's|A.T.W. wiz_(2655).html|topic ($1)|',
	2789 => 's|wiz_(1192)|($1)|g',
	2929 => 's|URL:\nhttp://www.openvms.digital.com/wizard/wiz_(1271).html|topic ($1)|',
	3223 => 's|wiz_(2821)|topic ($1)|',
	3285 => 's|wiz_(1139) &lt;&lt;http://www.openvms.digital.com/wizard/wiz_1139.html&gt;&gt; (addresses)\n|Topic ($1) $2 |',
	6099 => 's|WIZ_(5843)|($1)|',
	6984 => 's|wiz_(2681)|($1)|',
	7383 => 's|\((6984)\)|($1)|',
	8149 => 's|(6984) (&amp;) (7383)|($1) $2 ($3)|',
);

# replacements in the answer parts
# (evaluable perl code!)
my %replacement_answer = (
	1140 => 's|wiz_(1020)|Topic ($1)|',
	9144 => 's|\(6945\)|(6965)|',
);

# Template for the head of index.html
my $template_index_top = q(
<html>
<head>
<title><!--!title!--></title>
</head>
<body>
<h1><!--!title!--></h1>
<p>
An asterisk appended to the topic number denotes an incomplete
article. If no title or link is provided, the article file was empty.
</p>
<table border="0">
  <tr>
    <th>Number</th>
    <th>Topic</th>
    <th>Date</th>
  </tr>
);

# Template for an entry in index.html
my $template_index_entry = q(
  <tr>
    <td><!--!number!--></td>
    <td><a href="<!--!filename!-->"><!--!topic!--></a></td>
    <td><!--!date!--></td>
  </tr>
);

# Template for an entry in index.html without a link
my $template_index_entry_nolink = q(
  <tr>
    <td><!--!number!--></td>
    <td><!--!topic!--></td>
    <td><!--!date!--></td>
  </tr>
);

# Template for the bottom of index.html
my $template_index_bottom = q(
</table>
<hr>
Generated by wiztxt2html.pl on <!--!currdate!--> from 
HP's <a href="http://www.hp.com/go/openvms/wizard/">OpenVMS Ask The Wizard</a> page.
</body>
</html>
); #'

# Template for the Q&A page
my $template_qanda = q(
<html>
<head>
<title><!--!title!--> topic #<!--!number!-->: <!--!topic!--></title>
<link rel="stylesheet" type="text/css" href="style.css"> 
</head>
<body>
<h1><!--!title!--> topic #<!--!number!-->:<br><!--!topic!--></h1>
<p class="header">The Question is:</p>
<pre><!--!question!--></pre>
<p class="header">The Answer is:</p>
<pre><!--!answer!--></pre>
<p class="header">&nbsp;</p>
<p>Answer written or last revised on <!--!date!--></p>
</body>
</html>
);

# Stylesheet
my $stylesheet = q(
p.header {
	color:#FFFFFF;
	background-color:#0000CC;
}
);

#---------------------------------------------------------------------

my @mon = (qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC));
my %mon = map { ($mon[$_] => $_) } 0..$#mon;

sub epoch2vmsdate ($) {
    my ($d,$m,$y) = (localtime $_[0])[3,4,5];
    return $d . '-' . $mon[$m] . '-' . (1900+$y);
}

sub vmsdate2epoch ($) {
    my ($d,$m,$y) = split /-/,$_[0],3;
    return 0 unless $d && $m && $y && exists $mon{$m} && $y >= 1900;
    return Time::Local::timelocal(0,0,0,$d,$mon{$m},$y-1900);
}

sub trimleft_multiline ($) {
    # "Finding the longest common prefix over a list of strings"
    # cf. news:comp.lang.perl.misc around Nov 27, 1997

    my @lines = split "\n", $_[0];
    return unless @lines > 1;
    my $idx = 0;
    my $prefix;
    do {
	$prefix = $lines[$idx++];
	return unless defined $prefix;
    } while $prefix =~ /^\s*$/;
    my $len = length($prefix);
    for (@lines[$idx..$#lines]) {
	next if /^\s*$/;
	while (substr($_, 0, $len) ne $prefix) {
	    --$len;
	    $prefix = substr($prefix, 0, $len);
	}
	return if $prefix eq '';
    }
    $prefix =~ s/^([ \t]*).*$/$1/; # Restrict to leading whitespace
    return if $prefix eq '';
    $_[0] =~ s/^$prefix//mg;
}

use constant THRESHOLD => 90; # characters in line

sub wraplines ($) {
    my @para = split "\n\n", $_[0];
    return unless @para;

    for my $para (@para) {
	my @lines = split "\n", $para;
	my $haslonglines = 0;
	for my $line (@lines) {
	    $haslonglines = 1
		if length $line > THRESHOLD;
	}
	if ($haslonglines) {
	    $para = join "\n", Text::Wrap::wrap('','',@lines);
	}
    }

    $_[0] = join "\n\n", @para;
}

sub fillin_template ($;@) {
    my $result = shift;
    my $href = (@_ == 1) ? shift() : { @_ };
    while (my ($search, $replace) = each %$href) {
	$result =~ s/<!--!$search!-->/$replace/g;
    }
    return $result;
}

# --- main program starts HERE ---

my (@wiz, $file, $single);

use constant {
    EXPECTING_TOPIC => 0,
    AFTER_TOPIC     => 1,
    IN_QUESTION     => 2,
    IN_ANSWER       => 3,
    AFTER_DATE      => 4,
};

my %opts;
Getopt::Std::getopts('v', \%opts);
$opts{v} = 0 unless exists $opts{v} && $opts{v};

print STDERR "Reading...\n"
    unless $opts{v};

my @files;
if ($ARGV[0]) {
    @files = @ARGV;
    $single = 1;
} else {
    opendir DIR, $SRCDIR or die "opendir($SRCDIR): $!";
    @files = sort grep /(?i)\.txt$/, readdir DIR;
    closedir DIR;
    $single = 0;
}

foreach $file (@files) {
    unless ($file =~ /^wiz_(\d+)\.txt$/i) {
	print STDERR "Skipping $file\n";
	next;
    }
    print "Reading $file\n"
	if $opts{v};

    my $number = 0 + $1;

    if (-z $file) {
	print STDERR "$file is empty\n";
	push @wiz, {
	    number     => $number,
	    topic      => '',
	    question   => '',
	    answer     => '',
	    date       => (stat _)[9],
	    incomplete => '*',
	    empty      => 1,
	};
	next;
    }

    my ($topic,$question,$answer,$vmsdate) = ('','','','','');
    my ($date,$empty,$incomplete) = (0,0,'');

    my $infile = "$SRCDIR/$file";
    open FILE, $infile or die "open($infile): $!";
    my $state = EXPECTING_TOPIC;
    while (<FILE>) {
	if (/^\s*The Question is[\s:]+$/) {
	    $state = IN_QUESTION;
	} elsif (/^\s*The Answer is[\s:]+$/) {
	    $state = IN_ANSWER;
	} elsif (/^\s*Answer written(?: or last revised)? on\s+([-\d\w]+)/) {
	    $vmsdate = $1;
	    $state = AFTER_DATE;
	} else {
	    if ($state == IN_QUESTION) {
		$question .= $_;
	    } elsif ($state == IN_ANSWER) {
		$answer .= $_;
	    } elsif ($state == EXPECTING_TOPIC) {
		$topic = $_;
		$state = AFTER_TOPIC;
	    }
	}
    }
    close FILE;

    chomp $topic;
    $topic =~ s/^\s+//;
    $topic =~ s/\s+$//;
    unless ($topic) {
	if (exists $replacement_topic{$number}) {
	    print STDERR "$file: no topic; using replacement\n";
	    $topic = $replacement_topic{$number};
	} else {
	    print STDERR "$file: no topic\n";
	    ($topic) = $question =~ /^[\s\n]*([^\n]{1,30})/;
	    $topic .= '...';
	}
    }

    print STDERR "$file: no question\n" unless $question;
    print STDERR "$file: no answer\n" unless $answer;
    $incomplete = '*' unless $question && $answer;
    $empty = 1 unless $question || $answer;

    # Trim question and answer
    eval "for (\$question) { $replacement_question{$number} }"
	if exists $replacement_question{$number};
    trimleft_multiline($question);
    $question =~ s/[ \t]+$//mg;
    $question =~ s/\A\n+//s;
    $question =~ s/\n+\z//s;
    wraplines($question);

    eval "for (\$answer) { $replacement_answer{$number} }"
	if exists $replacement_answer{$number};
    trimleft_multiline($answer);
    $answer =~ s/[ \t]+$//mg;
    $answer =~ s/\A\n+//s;
    $answer =~ s/\n+\z//s;

    # If question or answer contains crossrefs, index them
    $question =~ s|\((\d+)\)|sprintf '<a href="%s">(%d)</a>',sprintf($filename_pattern,$1),0+$1|eg;
    $answer =~ s|\((\d+)\)|sprintf '<a href="%s">(%d)</a>',sprintf($filename_pattern,$1),0+$1|eg;
    $question =~ s|(topic\s+)(\d+)|$1.sprintf '<a href="%s">%d</a>',sprintf($filename_pattern,$2),0+$2|eg;
    $answer =~ s|(topic\s+)(\d+)|$1.sprintf '<a href="%s">%d</a>',sprintf($filename_pattern,$2),0+$2|eg;

    # If file contains a date, use it
    if ($vmsdate) {
	$date = vmsdate2epoch($vmsdate);
	print STDERR "$file: '$vmsdate' is not a valid VMS date string\n"
	    if $date == 0;
    }

    # If no date yet, use the file's timestamp
    $date = (stat $file)[9]
	unless $date;

    push @wiz, {
	number     => $number,
	topic      => $topic,
	question   => $question,
	answer     => $answer,
	date       => $date,
	incomplete => $incomplete,
	empty      => $empty,
    };
}

print STDERR "Writing"
    unless $opts{v};

unless ($single) {
    $file = "$TGTDIR/index.html";
    open IDX, ">$file" or die "open($file): $!";

    print IDX fillin_template($template_index_top, title => $title);
}

sub by_date_and_number {
    $a->{date} <=> $b->{date}
    ||
    $a->{number} <=> $b->{number}
}

my $i = 0;
my $inc1 = int(@wiz / 10);
my $inc2 = int($inc1 / 4);
my $goal1 = $inc1;
my $goal2 = $inc2;
for my $wiz (sort by_date_and_number @wiz) {
    unless ($opts{v}) {
	if  (++$i == $goal1) {print STDERR 10*int($goal1/$inc1),'%'; $goal1 += $inc1; $goal2 += $inc2 }
	elsif ($i == $goal2) {print STDERR '.'; $goal2 += $inc2; }
    }

    if ($wiz->{empty}) {
	print IDX fillin_template($template_index_entry_nolink,
	    number => $wiz->{number},
	    topic  => $wiz->{topic},
	    date   => epoch2vmsdate($wiz->{date}),
	)
	    unless $single;
    } else {
	$file = sprintf $filename_pattern, $wiz->{number};
	print "Writing $file\n"
	    if $opts{v};

	print IDX fillin_template($template_index_entry,
	    number   => $wiz->{number} . $wiz->{incomplete},
	    topic    => $wiz->{topic},
	    date     => epoch2vmsdate($wiz->{date}),
	    filename => $file,
	)
	    unless $single;

	open FILE, ">$TGTDIR/$file" or die "open($file): $!";
	print FILE fillin_template($template_qanda,
	    title    => $title,
	    number   => $wiz->{number},
	    topic    => $wiz->{topic},
	    date     => epoch2vmsdate($wiz->{date}),
	    question => $wiz->{question},
	    answer   => $wiz->{answer},
	);
	close FILE;
    }
}

unless ($single) {
    print IDX fillin_template($template_index_bottom, currdate => epoch2vmsdate(time()));
    close IDX;
}

open FILE, ">$TGTDIR/style.css" or die "open(style.css): $!";
print FILE $stylesheet;
close FILE;

print STDERR "\n";

__END__
