#!/usr/bin/perl -w # # Input: the wiz_nnnn.txt files from http://h71000.www7.hp.com/wizard/wizard.zip # in $SRCDIR # Can be called with -v in which case it will be more verbose about the conversion # Can be called with a single (or multiple space-separated) wiz_nnnn.txt filenames # in which case it only processes these # # Output: wiz_nnnn.html and index.html in $TGTDIR # # will make remarks about empty articles, topics, questions, answers, etc. # # Author: Martin Vorlaender # # This program is free software. It may be used, redistributed and/or modified under # the terms of the Perl Artistic License (see http://www.perl.com/perl/misc/Artistic.html) # # The author takes no reliability whatsoever for it ;-) use strict; use Getopt::Std; use Time::Local (); use Text::Wrap (); # input and output directories my $SRCDIR = '.'; my $TGTDIR = '..'; # Pattern for generated filenames my $filename_pattern = 'wiz_%05d.html'; # top title my $title = 'Ask The Wizard'; # replacement topic for articles with an empty topic my %replacement_topic = ( 1086 => 'Multithreading on a single CPU', 1269 => 'PPP DIAL_OUT as plugin?', 1272 => 'Error message strings from C', 1313 => 'Ctrl-Y and C signals', ); # replacements in the question parts # (evaluable perl code!) my %replacement_question = ( 1140 => 's|the file name is:\nwiz_(1020).html|topic ($1)|', 1888 => 's|In this "Ask the Wizard" exchange:\n\nhttp://www.openvms.digital.com/wizard/wiz_(0580).html\n\n|In topic ($1) |; s|$[12]$ ||g', 1918 => 's|articles wiz_(0817), (1089) and (1122)|topics ($1), ($2) and ($3)|', 2320 => 's| \nhttp://www.openvms.digital.com/wizard/wiz_(0047).html\n\n|topic ($1) |', 2394 => 's|\nhttp://www.openvms.digital.com/wizard/wiz_(1540).html| topic ($1)|', 2681 => 's|wiz_(1661).html|($1)|', 2697 => 's|A.T.W. wiz_(2655).html|topic ($1)|', 2789 => 's|wiz_(1192)|($1)|g', 2929 => 's|URL:\nhttp://www.openvms.digital.com/wizard/wiz_(1271).html|topic ($1)|', 3223 => 's|wiz_(2821)|topic ($1)|', 3285 => 's|wiz_(1139) <<http://www.openvms.digital.com/wizard/wiz_1139.html>> (addresses)\n|Topic ($1) $2 |', 6099 => 's|WIZ_(5843)|($1)|', 6984 => 's|wiz_(2681)|($1)|', 7383 => 's|$(6984)$|($1)|', 8149 => 's|(6984) (&) (7383)|($1) $2 ($3)|', ); # replacements in the answer parts # (evaluable perl code!) my %replacement_answer = ( 1140 => 's|wiz_(1020)|Topic ($1)|', 9144 => 's|$6945$|(6965)|', ); # Template for the head of index.html my $template_index_top = q(

An asterisk appended to the topic number denotes an incomplete article. If no title or link is provided, the article file was empty.

); # Template for an entry in index.html my $template_index_entry = q( ); # Template for an entry in index.html without a link my $template_index_entry_nolink = q( ); # Template for the bottom of index.html my $template_index_bottom = q(

Number	Topic	Date

Generated by wiztxt2html.pl on from HP's OpenVMS Ask The Wizard page. ); #' # Template for the Q&A page my $template_qanda = q(  topic #:

topic #:

Answer written or last revised on

); # Stylesheet my $stylesheet = q( p.header { color:#FFFFFF; background-color:#0000CC; } ); #--------------------------------------------------------------------- my @mon = (qw(JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC)); my %mon = map { ($mon[$_] => $_) } 0..$#mon; sub epoch2vmsdate ($) { my ($d,$m,$y) = (localtime $_[0])[3,4,5]; return $d . '-' . $mon[$m] . '-' . (1900+$y); } sub vmsdate2epoch ($) { my ($d,$m,$y) = split /-/,$_[0],3; return 0 unless $d && $m && $y && exists $mon{$m} && $y >= 1900; return Time::Local::timelocal(0,0,0,$d,$mon{$m},$y-1900); } sub trimleft_multiline ($) { # "Finding the longest common prefix over a list of strings" # cf. news:comp.lang.perl.misc around Nov 27, 1997 my @lines = split "\n", $_[0]; return unless @lines > 1; my $idx = 0; my $prefix; do { $prefix = $lines[$idx++]; return unless defined $prefix; } while $prefix =~ /^\s*$/; my $len = length($prefix); for (@lines[$idx..$#lines]) { next if /^\s*$/; while (substr($_, 0, $len) ne $prefix) { --$len; $prefix = substr($prefix, 0, $len); } return if $prefix eq ''; } $prefix =~ s/^([ \t]*).*$/$1/; # Restrict to leading whitespace return if $prefix eq ''; $_[0] =~ s/^$prefix//mg; } use constant THRESHOLD => 90; # characters in line sub wraplines ($) { my @para = split "\n\n", $_[0]; return unless @para; for my $para (@para) { my @lines = split "\n", $para; my $haslonglines = 0; for my $line (@lines) { $haslonglines = 1 if length $line > THRESHOLD; } if ($haslonglines) { $para = join "\n", Text::Wrap::wrap('','',@lines); } } $_[0] = join "\n\n", @para; } sub fillin_template ($;@) { my $result = shift; my $href = (@_ == 1) ? shift() : { @_ }; while (my ($search, $replace) = each %$href) { $result =~ s//$replace/g; } return $result; } # --- main program starts HERE --- my (@wiz, $file, $single); use constant { EXPECTING_TOPIC => 0, AFTER_TOPIC => 1, IN_QUESTION => 2, IN_ANSWER => 3, AFTER_DATE => 4, }; my %opts; Getopt::Std::getopts('v', \%opts); $opts{v} = 0 unless exists $opts{v} && $opts{v}; print STDERR "Reading...\n" unless $opts{v}; my @files; if ($ARGV[0]) { @files = @ARGV; $single = 1; } else { opendir DIR, $SRCDIR or die "opendir($SRCDIR): $!"; @files = sort grep /(?i)\.txt$/, readdir DIR; closedir DIR; $single = 0; } foreach $file (@files) { unless ($file =~ /^wiz_(\d+)\.txt$/i) { print STDERR "Skipping $file\n"; next; } print "Reading $file\n" if $opts{v}; my $number = 0 + $1; if (-z $file) { print STDERR "$file is empty\n"; push @wiz, { number => $number, topic => '', question => '', answer => '', date => (stat _)[9], incomplete => '*', empty => 1, }; next; } my ($topic,$question,$answer,$vmsdate) = ('','','','',''); my ($date,$empty,$incomplete) = (0,0,''); my $infile = "$SRCDIR/$file"; open FILE, $infile or die "open($infile): $!"; my $state = EXPECTING_TOPIC; while () { if (/^\s*The Question is[\s:]+$/) { $state = IN_QUESTION; } elsif (/^\s*The Answer is[\s:]+$/) { $state = IN_ANSWER; } elsif (/^\s*Answer written(?: or last revised)? on\s+([-\d\w]+)/) { $vmsdate = $1; $state = AFTER_DATE; } else { if ($state == IN_QUESTION) { $question .= $_; } elsif ($state == IN_ANSWER) { $answer .= $_; } elsif ($state == EXPECTING_TOPIC) { $topic = $_; $state = AFTER_TOPIC; } } } close FILE; chomp $topic; $topic =~ s/^\s+//; $topic =~ s/\s+$//; unless ($topic) { if (exists $replacement_topic{$number}) { print STDERR "$file: no topic; using replacement\n"; $topic = $replacement_topic{$number}; } else { print STDERR "$file: no topic\n"; ($topic) = $question =~ /^[\s\n]*([^\n]{1,30})/; $topic .= '...'; } } print STDERR "$file: no question\n" unless $question; print STDERR "$file: no answer\n" unless $answer; $incomplete = '*' unless $question && $answer; $empty = 1 unless $question || $answer; # Trim question and answer eval "for (\$question) { $replacement_question{$number} }" if exists $replacement_question{$number}; trimleft_multiline($question); $question =~ s/[ \t]+$//mg; $question =~ s/\A\n+//s; $question =~ s/\n+\z//s; wraplines($question); eval "for (\$answer) { $replacement_answer{$number} }" if exists $replacement_answer{$number}; trimleft_multiline($answer); $answer =~ s/[ \t]+$//mg; $answer =~ s/\A\n+//s; $answer =~ s/\n+\z//s; # If question or answer contains crossrefs, index them $question =~ s|$(\d+)$|sprintf '(%d)',sprintf($filename_pattern,$1),0+$1|eg; $answer =~ s|$(\d+)$|sprintf '(%d)',sprintf($filename_pattern,$1),0+$1|eg; $question =~ s|(topic\s+)(\d+)|$1.sprintf '%d',sprintf($filename_pattern,$2),0+$2|eg; $answer =~ s|(topic\s+)(\d+)|$1.sprintf '%d',sprintf($filename_pattern,$2),0+$2|eg; # If file contains a date, use it if ($vmsdate) { $date = vmsdate2epoch($vmsdate); print STDERR "$file: '$vmsdate' is not a valid VMS date string\n" if $date == 0; } # If no date yet, use the file's timestamp $date = (stat $file)[9] unless $date; push @wiz, { number => $number, topic => $topic, question => $question, answer => $answer, date => $date, incomplete => $incomplete, empty => $empty, }; } print STDERR "Writing" unless $opts{v}; unless ($single) { $file = "$TGTDIR/index.html"; open IDX, ">$file" or die "open($file): $!"; print IDX fillin_template($template_index_top, title => $title); } sub by_date_and_number { $a->{date} <=> $b->{date} || $a->{number} <=> $b->{number} } my $i = 0; my $inc1 = int(@wiz / 10); my $inc2 = int($inc1 / 4); my $goal1 = $inc1; my $goal2 = $inc2; for my $wiz (sort by_date_and_number @wiz) { unless ($opts{v}) { if (++$i == $goal1) {print STDERR 10*int($goal1/$inc1),'%'; $goal1 += $inc1; $goal2 += $inc2 } elsif ($i == $goal2) {print STDERR '.'; $goal2 += $inc2; } } if ($wiz->{empty}) { print IDX fillin_template($template_index_entry_nolink, number => $wiz->{number}, topic => $wiz->{topic}, date => epoch2vmsdate($wiz->{date}), ) unless $single; } else { $file = sprintf $filename_pattern, $wiz->{number}; print "Writing $file\n" if $opts{v}; print IDX fillin_template($template_index_entry, number => $wiz->{number} . $wiz->{incomplete}, topic => $wiz->{topic}, date => epoch2vmsdate($wiz->{date}), filename => $file, ) unless $single; open FILE, ">$TGTDIR/$file" or die "open($file): $!"; print FILE fillin_template($template_qanda, title => $title, number => $wiz->{number}, topic => $wiz->{topic}, date => epoch2vmsdate($wiz->{date}), question => $wiz->{question}, answer => $wiz->{answer}, ); close FILE; } } unless ($single) { print IDX fillin_template($template_index_bottom, currdate => epoch2vmsdate(time())); close IDX; } open FILE, ">$TGTDIR/style.css" or die "open(style.css): $!"; print FILE $stylesheet; close FILE; print STDERR "\n"; __END__