#! /usr/bin/perl -w
use strict;

# Extract the relevant parts of the LDP OMF file, and transform identifier
# URLs to match the installed locations.

my ($prefix, $index, $omf) = @ARGV;
open INDEX, "< $index" or die "Couldn't open $index: $!";
my %good_url;
map { chomp; $good_url{$_} = 1; } <INDEX>;
close INDEX;

open OMF, "< $omf" or die "Couldn't open $omf: $!";
my @lines = <OMF>;
close OMF;

my @blocks;

for (@lines) {
    if (/^</ and not m#</resource>#) {
	push @blocks, $_;
    } else {
	$blocks[$#blocks] .= $_;
    }
}

my $format  = qq{<format mime="text/html"/>};
# Put everything in this category for now.
my $subject = qq{<subject><category>General|Other</category></subject>};

my $resid = 1;
for (my $id = 0; $id < @blocks; $id++) {
    next unless defined $blocks[$id];	# if we spliced away the last element
    next unless $blocks[$id] =~
	m# ^\s* <identifier> (.*?) </identifier> \s*$ #mx;
    my $url = $1;
    $url =~ s#http://.*?/##;

    my $good = 0;
    if ($url =~ m#^HOWTO/#) {
	if ($good_url{$url}) {
	    $good = 1;
	} elsif ($url =~ m#/$# and $good_url{"${url}index.html"}) {
	    $good = 1;
	} elsif ($good_url{"$url/index.html"}) {
	    $good = 1;
	}
    }

    if ($good) {
	$url =~ s/HOWTO/$prefix/;
	$blocks[$id] =~ s/<resource id=.*?>/<resource id='$resid'>/;
	$resid++;
	$blocks[$id] =~
	    s{^(\s*)<identifier>.*?</identifier>\s*$}
	     {$1<identifier url="$url"/>\n$1$format\n$1$subject}m;
	$blocks[$id] =~ s#<language>(.*?)</language>#<language code="$1"/>#;
    } else {
	splice @blocks, $id, 1;
	redo;
    }
}

print @blocks;
