#!/usr/bin/perl -w

# Copyright (c) 1998 by Marco Budde (Budde@tu-harburg.de)
# Copyright (c) 2001, 2002, 2003 by Colin Watson (cjwatson@debian.org)
# GNU General Public License

################################################################
# HOWTO-INDEX -> dhelp / dwww / doc-base                       #
#                                                              # 
# usage:  html2docbase <dhelp section> <dwww section>          #
#                      <dhelp file> <dwww file>                #
#                      <unpacked HOWTOs directory> <files>     #
################################################################

my $dhelp_section = shift;
my $dwww_section  = shift;
my $dhelp_file    = shift;
my $dwww_file     = shift;
my $unpacked_dir  = shift;

my $root = '/usr/share/doc/HOWTO/en-html';

##############################
#  get abstract of document  #
##############################

sub get_abstracts ($)
{
    my $filename = shift;
    my %docs;
    my $index;

    open IN, "< $filename" or die "can't open $filename!\n";
    {
	local $/ = undef;
	$index = scalar <IN>;
    }
    close IN;

    # Transform silly DocBook-generated HTML into something more easily
    # parseable.
    $index =~ s/\n>/>/g;
    $index =~ s/\n/ /g;

    my ($link, $title);
    for my $paragraph ($index =~ m!<P>.*?</P>!g) {
	# Sorry for the convoluted control flow here.
	my $abstract;
	if ($paragraph =~ m!<A\s*HREF="\.\./(.*?)" .*?		# link
			    <I\s*CLASS="CITETITLE">(.*?)</I>	# title
			    !gx) {
	    ($link, $title) = ($1, $2);
	    next;
	} elsif ($paragraph =~ m!<I.*?>.*?</I>\.\s*		# skip date
				 (.*?)				# abstract
				 </P>!gx) {
	    $abstract = $1;
	} else {
	    next;
	}

	# Clean up whitespace.
	$abstract =~ s/^\s+//;
	$abstract =~ s/\s+$//;
	$abstract =~ s/\s\s+/ /g;
	# Dispose of some HTMLisms.
	$title =~ s/&#822[01];/"/g;
	$title =~ s/&#(\d+);/chr $1/eg;
	$abstract =~ s/&#822[01];/"/g;
	$abstract =~ s/&#(\d+);/chr $1/eg;
	$abstract =~ s!<EM>!!;
	$abstract =~ s!</EM>!!;
	$abstract =~ s!<I .*?>!!;
	$abstract =~ s!</I>!!;
	$abstract =~ s!<A .*?>!!;
	$abstract =~ s!</A>!!;
	$title =~ s/&/&amp;/g;
	$title =~ s/</&lt;/g;
	$title =~ s/>/&gt;/g;
	$title =~ s/"/&quot;/g;
	$title =~ s/\xE7/&ccedil;/g;
	$abstract =~ s/&/&amp;/g;
	$abstract =~ s/</&lt;/g;
	$abstract =~ s/>/&gt;/g;
	$abstract =~ s/"/&quot;/g;
	$abstract =~ s/\xE7/&ccedil;/g;
	# Improve sorting.
	$title =~ s/^the //i;
	$docs{$link} = {title => $title, abstract => $abstract};
    }

    return %docs;
}


##################
#  write .dhelp  #
##################

sub write_dhelp (*$$$$)
{
    my ($dhelp, $filename, $linkname, $abstract, $mini) = @_;
    my $section = $dhelp_section;
    $section .= '/mini' if $mini;
    print $dhelp <<EOF;
<item>
<directory>$section
<linkname>$linkname
<filename>$filename
<description>
$abstract
</description>
</item>

EOF
}

##################################
#  dwww support (via menu file)  #
##################################

sub write_dwww (*$$$$$)
{
    my ($dwww, $filename, $docid, $linkname, $abstract, $mini) = @_;
    my $section = $dwww_section;
    $section .= '/mini' if $mini;
    print $dwww <<EOF;
?package(doc-linux-html):needs="dwww" section="$section" \\
 title="$docid" \\
 longtitle="$linkname" \\
 description="$abstract" \\
 command="$root/$filename"
EOF
}

####################
#  write doc-base  #
####################

# Commented out until bug #114692 is fixed.

#sub write_doc_base ($$$$)
#{
#    my ($filename, $docid, $linkname, $abstract) = @_;
#    open DOCBASE, "> $outdir/$docid"
#	or die "can't write to $outdir/$docid: $!";
#    print DOCBASE <<EOF;
#Document: $docid
#Title: $linkname
#Abstract: $abstract
#Section: $section
#
#Format: HTML
#Index: $root/$filename
#Files: $root/$filename
#
#EOF
#    close DOCBASE;
#}


################
#     main     #
################

open DHELP, "> $dhelp_file";
open DWWW, "> $dwww_file";

for my $filename (@ARGV)
{
    my %abstracts = get_abstracts $filename;
    for my $docfile (sort grep { -e "$unpacked_dir/$_" } keys %abstracts)
    {
	my $mini = 0;
	my $docid = $docfile;
	$docid =~ s!mini/!! and $mini = 1;
	$docid =~ s!(?:-HOWTO)?(?:/index)?\.html!!;
	$docid = "ldp-en-$docid";
	if (defined $ENV{DEB_BUILD_OPTIONS} and
	    $ENV{DEB_BUILD_OPTIONS} =~ /debug/)
	{
	    print "$docid: $abstracts{$docfile}{title}\n";
	    print "$abstracts{$docfile}{abstract}\n\n";
	}
	write_dhelp DHELP, $docfile, $abstracts{$docfile}{title},
		    $abstracts{$docfile}{abstract}, $mini;
	write_dwww DWWW, $docfile, $docid, $abstracts{$docfile}{title},
		   $abstracts{$docfile}{abstract}, $mini;
#	write_doc_base $docfile, $docid, $abstracts{$docfile}{title},
#		       $abstracts{$docfile}{abstract};
    }
}

close DHELP;
close DWWW;
