#!/usr/local/bin/perl -w
#  wgen: Combine plain HTML-files with one template to generate a website
#  Copyright (C) 2000 Jan Ivar Pladsen <pladsen@users.sourceforge.net>
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

use strict;

use IO::File;
use File::Path;
use File::Copy;

my ( %FILE2TITLE, %FILE2EDITOR, %FILE2EDITOR_EMAIL );
my ( %UPDATED, %CRUMBS );
main();

sub print_help
{
    print STDERR <<EOT
Usage: 
   wgen configuration-file

Example: 
   wgen share/wgen.conf

Options:
   --help   This description.
   --conf   Help on the contents of the configuration-file.
   --temp   Help on the contents of the template-file.
   --edit   Help on the contents of the editors-file if any.
EOT
    ;
}
sub print_configuration_help
{
print STDERR <<EOT
  In wgen.conf:
  language              norsk
  base_directory        /home/pladsen/mysite
  editors_file          share/editors
  template_file         share/template.html
  source_directory      upload
  destination_directory htdocs
EOT
}
sub print_template_help
{
    print STDERR <<'EOT'
The template file is an ordinary html file containing certain keywords 
that will be substituted by this program. 

Keywords are to be surrounded by double percent-signs, like this: %%MENU%%. 
Any keyword may occur more than once.

The keywords and the results are:

    TITLE        Text fetched from between <title></title>
    UPDATED      Date of last modification
    BREADCRUMBS  Trail of titles from the pages preceding this. 
    MENU         Text fetched from between <hr> and <hr> if they are 
                 found right after <body>.
    TEXT         Content of page
    EDITOR       Name of editor in charge of the page
    EDITOR_EMAIL Email address to reach editor (and maybe others)
EOT
    ;
}
sub print_editors_help
{
    print STDERR <<'EOT'
The editors file is a file with colon separated values. 

The fields are name of editor, email address of editor, and path. 
Path may include a filename.

Example:

# Main editor
John Doe:john.doe@mysite.net:/

# Editor for /aktiviteter and all subdirectories (if any)
Jane Doe:jane.doe@mysite.net:/finance

# Editor for just this page
Toby Doe:toby.doe@mysite.net:/collections.html
EOT
    ;
}

sub _documentation_of_global_variables
{
#  %FILE2TITLE:
#
#  $FILE2TITLE{ "$path/$file" } = $title; 
#  One entry for each html-file.

#
#  %FILE2EDITOR
#
#  $FILE2EDITOR{ $path } = $name_of_editor;
#  One entry for each editor. The variable $path may include a file or
#  just one or more directories.

#
#  %FILE2EDITOR_EMAIL
#
#  $FILE2EDITOR_EMAIL{ $path } = $email_of_editor;
#  One entry for each editor. The variable $path may include a file or
#  just one or more directories.
#
}

sub main
{
    my ( $configuration_file, $config );
    my ( $template_file, $template );
    my ( $editors_file );

    $configuration_file = process_commandline() || exit;

    ( $template_file, $editors_file, $config ) = 
	process_configuration_file( $configuration_file );

    $template = process_template_file( $template_file );
    process_editors_file( $editors_file );
    
    process_directory( $config, '', $template );
}

sub process_commandline
{
    $_ = $ARGV[ 0 ] || '--help';
    /^--?help/ && do { print_help(); return undef; };
    /^--?conf/ && do { print_configuration_help(); return undef; };
    /^--?temp/ && do { print_template_help(); return undef; };
    /^--?edit/ && do { print_editors_help(); return undef; };
    return( $_ );
}
sub process_configuration_file
{
    my ( $configuration_file ) = @_;
    my ( %known_key, $key, $value, %in, %known_language );
    my ( $base_directory, $source_directory, $destination_directory );
    my ( $language, $template_file, $editors_file, %config );

    die unless defined $configuration_file;
    %known_key = map { $_ => 1 }  qw( language
				      base_directory
				      editors_file
				      template_file
				      source_directory
				      destination_directory
				      );

    if ( open( FH, $configuration_file ) )
    {
	while ( <FH> )
	{
	    chomp;
	    s/#.*//;
	    next if /^\s*$/;
	    ( $key, $value ) = split( /\s+/, $_, 2 );
	    $key =~ s/^\s+//;   $value =~ s/^\s+//;   # Remove leading and
	    $key =~ s/\s+$//;   $value =~ s/\s+$//;   # trailing whitespace
	    $in{ $key } = $value if ( $known_key{ $key } );
	}
	close( FH );
    }
    else
    {
	die "$configuration_file: $! \n";
    }
    
    $base_directory = $in{ base_directory } || `pwd`;
    $base_directory =~ s{/$ }{}x unless ( $base_directory eq '/' );
    
    $template_file = $in{ template_file } || ( print_configuration_help(), exit );
    $template_file = "$base_directory/$template_file" 
	unless ( $template_file =~ m{^/} );

    $editors_file = $in{ editors_file } || ( print_configuration_help(), exit );
    $editors_file = "$base_directory/$editors_file" 
	unless ( $editors_file =~ m{^/} );
    
    $source_directory = $in{ source_directory } || ( print_configuration_help(), exit );
    $source_directory = "$base_directory/$source_directory" 
	unless ( $source_directory =~ m{^/} );
    
    $destination_directory = $in{ destination_directory } || ( print_configuration_help(), exit );
    $destination_directory = "$base_directory/$destination_directory" 
	unless ( $destination_directory =~ m{^/} );

    $language = lc( $in{ language } ) || 'english';
    %known_language = map { $_ => 1 } qw( norsk );
    $language = 'english' unless $known_language{ $language };

    %config = (
	     'src' => $source_directory, 
	     'dst' => $destination_directory,
	     );

    return ( $template_file, $editors_file, \%config );
}

sub process_editors_file
{
    my ( $editors_file ) = @_;
    my ( $editor, $editor_email, $path );

    return unless defined $editors_file;

    if ( open( FH, $editors_file ) )
    {
	while ( <FH> )
	{
	    chomp;
	    s/#.*//;
	    next if /^\s*$/;
	    ( $editor, $editor_email, $path ) = split( /:/ );
	    $path =~ s/\/$// unless ( $path eq '/' ); # Remove trailing /
	    $FILE2EDITOR{ $path } = $editor;
	    $FILE2EDITOR_EMAIL{ $path } = $editor_email;
	}
	close( FH );
    }
}
sub process_template_file
{
    my ( $template_file ) = @_;
    my ( $text, $line, $pre, $key, $post );
    my ( $template );

    if ( open( FH, $template_file ) )
    {
	$text = '';
	while ( <FH> )
	{
	    $line = $_;
	    while ( $line =~ /%%[^%]+%%/ )
	    {
		( $pre, $key, $post ) = 
		    $line =~ /^(.*?)       # Text before keyword
	  		      %%([^%]+)%%  # The keyword inside %% %%
			      (.*)$/x;     # Text after keyword
		push( @{ $template->{ 'text' } }, $text . $pre );
		$text = '';
		push( @{ $template->{ 'key' } }, $key );
		$line = $post;
	    }
	    $text .= $line;
	}
	push( @{ $template->{ 'text' } }, $text );
	close( FH );
    }
    else
    { 
	warn "Can't open $template_file for reading";
    }

    return ( $template );
}

sub process_directory
{
    my ( $config, $path, $template ) = @_;
    my ( @files_to_be_copied, @html_files, @directories );
    my ( $has_index, $menu, $directory );
    my ( $src, $dst );

    $src = $config->{ src };
    $dst = $config->{ dst };

    $has_index = read_directory( $src, $path, 
				 \@files_to_be_copied, \@html_files, 
				 \@directories );
    mkpath( "$dst/$path" );
    copy_files( $config, $path, \@files_to_be_copied );
    if ( $has_index )
    {
	$menu = process_index_file( $config, $path, $template );
	make_menu_entries( $config, $path, $menu, \@directories, \@html_files );
	process_html_files( $config, $path, $template, $menu, \@html_files );
    }

    for $directory ( @directories )
    {
	process_directory( $config, "$path/$directory", $template );
    }
}
sub read_directory
{
    my ( $src, $path, $files_to_be_copied, $html_files, $directories ) = @_;
    my ( @filenames, $file, $index_found );

    $src .= "/$path" if ( $path ne '' );
    opendir( DIR, "$src" ) || ( warn "can't open $src: $!\n", return );
    @filenames = readdir( DIR );
    closedir( DIR );

    $index_found = 0;
    for $file ( @filenames )
    {
	if ( $file eq 'index.html' )
	{
	    $index_found = 1;
	}
	elsif ( $file =~ /html$/ ||
		$file =~ /htm$/ )
	{
	    push( @{ $html_files }, $file );
	}
	elsif ( $file =~ /~$/ )
	{
	    # Ignoring backup files.
	}
	elsif ( -d "$src/$file" )
	{
	    push( @{ $directories }, $file ) 
		unless ( $file eq '.' || $file eq '..' );
	}
	else
	{
	    push( @{ $files_to_be_copied }, $file );
	}
    }
    return( $index_found );
}
sub make_menu_entries
{
    my ( $config, $path, $menu, $directoriesref, $html_filesref ) = @_;
    my ( $line, $dir_file, $dir, $file, $abs_dir, $text );
    my ( $src, $dst );

    $src = $config->{ src };
    $dst = $config->{ dst };

    $menu ||= '';
    for $line ( split( /\n/, $menu ) )
    {
	( $dir_file, $text ) = $line =~ /<a href="(.+?)">(.+?)</;

	next unless ( defined( $dir_file ) and $dir_file ne '' and
		      defined( $text )     and $text     ne '' );

	$text =~ s/&nbsp;/ /g;

	if ( $dir_file =~ m|/| )
	{
	    ( $dir, $file ) = $dir_file =~ m/^(.+)\/([^\/]+)$/;
	    $abs_dir = "${src}$path/$dir";
	    
	    unless( -d $abs_dir )
	    {
		push @{ $directoriesref }, $dir;
		mkpath( $abs_dir );
	    }
	}
	$dir = '' unless defined $dir;
    }
}
sub copy_files
{
    my ( $config, $path, $files_to_be_copied ) = @_;
    my ( $file );
    my ( $src, $dst );

    $src = $config->{ src };
    $dst = $config->{ dst };
    
    $src .= "/$path" unless $path eq '';
    $dst .= "/$path" unless $path eq '';

    for $file ( @{ $files_to_be_copied } )
    {
	copy( "$src/$file", "$dst/$file" ) || die;
    }
}
sub process_index_file
{
    my ( $config, $path, $template ) = @_;
    my ( $file, $Input, $menu, $Fh );
    my ( $src, $dst );

    $src = $config->{ src };
    $dst = $config->{ dst };

    $Input = new WebFilter( $config );
    $src .= "/$path" unless $path eq '';
    $dst .= "/$path" unless $path eq '';

    -f "$src/index.html" || ( warn "index.html not found in $src\n", return );

    $file = 'index.html';
    $Fh = new IO::File ">$dst/$file";
    if ( not defined( $Fh ) ) 
    { 
	warn_about_error_on_open_for_write( "$dst/$file" );
	return();
    }
    my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
	 $atime, $mtime, $ctime, $blksize, $blocks )
	= stat( "$src/$file" );
    chown( $uid, $gid, "$dst/$file" );

    $Input->parse_file( $path, $file );
#    $Input->{ 'menu' } = "<!-- start of menu -->\n<!-- end of menu -->\n"
#	unless ( defined( $Input->{ 'menu' } ) 
#		 and
#		 $Input->{ 'menu' } ne '' );

    # '...' etter katalogar i menyen
    $menu = $Input->{ 'menu' };
    $menu =~ s/(index.html\">[^<]+<\/a>)\s*<br>/$1&nbsp;...<br>/g;
    $menu =~ s/(index.html\">[^<]+<\/a>)\s*<p>/$1&nbsp;...<p>/g;
    $menu =~ s/(index.html\">[^<]+<\/a>)\s*\n/$1&nbsp;.../g;
    $Input->{ 'menu' } = $menu;

    make_breadcrumbs(  $path, $file, $Input->{ 'title' } );
    $Input->{ 'breadcrumbs' } = get_breadcrumbs( $path, $file ) 
	|| 'Problemer med BREADCRUMBS';
    ( $Input->{ 'editor' },
      $Input->{ 'editor_email' } ) = get_editor_and_email( $path, $file );

    $UPDATED{ $Input->{ path } } = $Input->{ updated };
    $CRUMBS{ $Input->{ path } } = $Input->{ breadcrumbs };

    html( $Fh, $Input, $template );
    $Fh->close();

    return ( $menu );
}
sub process_html_files
{
    my ( $config, $path, $template, $menu, $html_files ) = @_;
    my ( $file, $Input, $Fh );
    my ( $src, $dst );

    $src = $config->{ src };
    $dst = $config->{ dst };

    $Input = new WebFilter( $config );
    $src .= "/$path" unless $path eq '';
    $dst .= "/$path" unless $path eq '';

    for $file ( @{ $html_files } )
    {
	$Fh = new IO::File ">$dst/$file";
	if ( not defined( $Fh ) ) 
	{ 
	    warn_about_error_on_open_for_write( "$dst/$file" );
	    return();
	}
	my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
	     $atime, $mtime, $ctime, $blksize, $blocks )
	    = stat( "$src/$file" );
	chown( $uid, $gid, "$dst/$file" );

	$Input->parse_file( $path, $file );
	$Input->{ menu } = $menu;
	
	make_breadcrumbs( $path, $file, $Input->{ 'title' } );
	$Input->{ 'breadcrumbs' } = get_breadcrumbs( $path, $file ) 
	    || 'Problemer med BREADCRUMBS';
	( $Input->{ 'editor' },
	  $Input->{ 'editor_email' } ) = get_editor_and_email( $path, $file );

	html( $Fh, $Input, $template );
	$Fh->close();
    }
}
sub warn_about_error_on_open_for_write
{
    my ( $dst_file ) = @_;

    if ( not -e $dst_file )
    {
	warn "$dst_file does not exist and you do not have the rights to create it.\n";
    }
    elsif ( not -w $dst_file )
    {
#	warn "$dst_file is not writable by effective uid/gid.\n";
    }
    elsif ( not -W $dst_file )
    {
#	warn "$dst_file is not writable by real uid/gid.\n";
    }
    else
    {
	warn( "$dst_file could not be opened for writing and I don't know why!\n" );
    }
}
sub html
{
    my ( $fh, $Input, $template ) = @_;
    my ( $tmp, @part, $levels, $cd );
    my ( $index, $key, $input );

    $tmp = $Input->{ 'path' };
    $tmp =~ s{^/}{};
    @part = split( m{/}, $tmp );
    $levels = scalar( @part );
    $levels = 0 if not defined $levels;
    $cd = '../' x $levels;

    for $index ( 0 .. scalar @{ $template->{ 'text' } } - 1 )
    {
	if ( defined( $template->{ 'key' }[ $index ] ) )
	{
	    $key = lc( $template->{ 'key' }[ $index ] );
	    if ( defined( $Input->{ $key } ) )
	    {
		$input = $Input->{ $key };
	    }
	    else
	    {
		print "key: $key $Input->{ title }\n", join "\n", keys %{ $Input };exit;
		$input = "udefinert: $key";
	    }
	}
	else
	{
	    $input = '';
	}

	$tmp = $template->{ 'text' }[ $index ];

	# Adjusting src in img-tags, using a zero-width negative lookahead assertion.
	# See man perlre
	$tmp =~ s/src="(?!http:)/src="$cd/g;

	$fh->print( $tmp, $input );
    }
}

sub make_breadcrumbs
{
    my ( $path, $file, $title ) = @_;

    $FILE2TITLE{ "$path/$file" } = $title || 'No title';	
}

sub get_breadcrumbs
{
    my ( $path, $file ) = @_;
    my ( @part, $levels, $part, $subpath );
    my ( $crumbs, $tmp );

    $tmp = $path;
    $tmp =~ s{^/}{};
    @part = split( m{/}, $tmp );
    $levels = scalar( @part );
    $levels = 0 if not defined $levels;
    $crumbs = '';

    if ( $levels == 0 )
    {
	if ( $file eq 'index.html' )
	{
	    $crumbs .= $FILE2TITLE{ "/index.html" };
	}
	else
	{
	    print "Manglar smule for: $path/$file |\n" 
		unless defined $FILE2TITLE{ "$path/$file" };
	    
	    $crumbs .= crumb_link( $FILE2TITLE{ "/index.html" }, 0 ) . '  -&gt; ' . 
		$FILE2TITLE{ "$path/$file" };
	}
    }
    elsif ( $levels == 1 )
    {
	$crumbs .= crumb_link( $FILE2TITLE{ "/index.html" }, $levels ) . ' -&gt; ';
	if ( $file eq 'index.html' )
	{
	    $crumbs .= $FILE2TITLE{ "/$part[ 0 ]/index.html" };
	}
	else
	{
	    $crumbs .= crumb_link( $FILE2TITLE{ "/$part[ 0 ]/index.html" }, 0 ) 
		. ' -&gt; ' .
	    $FILE2TITLE{ "/$part[ 0 ]/$file" };
	}
    }
    elsif ( $levels > 1 )
    {
	$crumbs .= crumb_link( $FILE2TITLE{ "/index.html" }, $levels -- );

	pop( @part ) if ( $file eq 'index.html' );
	for $part ( @part )
	{
	    $subpath .= "$part/";
	    $crumbs .= ' -&gt; ' . crumb_link( $FILE2TITLE{ "/${subpath}index.html" }, 
					       $levels -- );
	}

	$crumbs .= ' -&gt; ' . $FILE2TITLE{ "$path/$file" };
    }
    $crumbs .="\n";

    return( $crumbs );
}
sub get_editor_and_email
{
    my ( $path, $file ) = @_;

    my ( @path, $editor, $editor_email );

    $path =~ s/^\///;
    @path = split( /\//, $path );
    push( @path, $file );
    push( @path, 'to_be_popped_first_time' );

    do 
    {
	pop( @path );
	$path = '/' . join( '/', @path );
	$editor = $FILE2EDITOR{ $path } || '';
	$editor_email = $FILE2EDITOR_EMAIL{ $path } || '';
    } until ( ( $editor ne '' ) or 
	      ( scalar( @path ) == 0 ) );
    $editor = 'EDITOR_ERROR' if ( $editor eq '' );
    $editor_email = 'EDITOR_EMAIL_ERROR' if ( $editor_email eq '' );
    
    return ( $editor, $editor_email );
}
sub crumb_link
{
    my ( $text, $levels ) = @_;
    my ( $tmp );

    $tmp = '<a href="' . '../' x $levels . 'index.html">' . $text . '</a>';

    return( $tmp );
}
package WebFilter;

sub new
{
    my ( $proto, $configuration ) = @_;
    my ( $class, $self );
    my ( $src );

    $src = $configuration->{ src };

    $class = ref( $proto ) || $proto;

    $self->{ 'src' } = $src || die "No source";

    bless ( $self, $class );
    return( $self );
}
sub parse_file
{ 
    my ( $self, $path, $file ) = @_;
    my ( $src, $input, $body );
    my ( $title, $menu, $text );

    $self->{ url } = "$path/$file";
    $src = $self->{ src };
    $src .= "/$path" unless ( $path eq '' );

    open( FH, "$src/$file" ) || die;
    $input = join ( '', <FH> );
    close( FH );

    ( $title ) = $input =~ /<title[^>]*>   # title start-tag
	                   (.*?)           # contents of title
			   <\/title>/sxi;  # title end-tag

    ( $body  ) = $input =~ /<body[^>]*>    # body start-tag
	                   (.+?)           # contents of body
		 	   <\/body>/sxi;   # body end-tag

    $menu = '';
    ( $menu, $body ) = extract_menu( $body )
	if ( $file =~ /^\s*index.html\s*$/ );

    $text = $body;

    $self->{ 'path' } = $path;
    $self->{ 'updated' } = last_updated( $src, $file );
    $self->{ 'title' } = $title;
    $self->{ 'menu' } = "\n<!-- wgen: Menu begin. Do not remove! -->\n" .
	                $menu .
			"<!-- wgen: Menu end. Do not remove! -->\n";
    $self->{ 'text' } = "\n<!-- wgen: Text begin. Do not remove! -->\n" .
	                $text . 
			"<!-- wgen: Text end. Do not remove! -->\n";
}
sub extract_menu
{
    my ( $body ) = @_;
    my ( $menu, $new_body );

    if ( $body =~ /^\s*<hr>/is )
    {
	( $menu, $new_body ) = 
	    $body =~ /^\s*            # Leading space
		      <hr>(.*?)<hr>   # Menu between <hr> and <hr>
		      \s*             # Space
		      (.*)$/sxi;       # New body
    }
    
    $menu ||= '';
    $body = $new_body if ( defined $new_body );
    
    return ( $menu, $body );
}
sub last_updated
{
    my ( $path, $file ) = @_;
    my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size );
    my ( $atime, $mtime, $ctime, $blksize, $blocks );
    my ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst );
    my ( @mon, @wday );

    ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
      $atime, $mtime, $ctime, $blksize, $blocks )
	= stat( "$path/$file" );

    ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) =
	localtime( $mtime );

    $year += 1900;
    $hour =~ s/^(\d)$/0$1/;
    $min  =~ s/^(\d)$/0$1/;

    @mon = qw( January February March April May June July August September October November December );
    @wday = qw( sunday monday tuesday wednesday thursday friday saturday );
    return "$wday[ $wday ] $mon[ $mon ]&nbsp;$mday.&nbsp;$year at&nbsp;$hour:$min";

    @mon = qw( januar februar mars april mai juni juli august september oktober november desember );
    @wday = qw( søndag mandag tirsdag onsdag torsdag fredag lørdag );
    return "$wday[ $wday ] $mday.$mon[ $mon ]&nbsp;$year klokka&nbsp;$hour:$min";
}