#!/usr/bin/perl -w
#
# Lintian HTML reporting tool -- Create Lintian web reports
#
# Copyright (C) 1998 Christian Schwarz and Richard Braakman
# Copyright (C) 2007 Russ Allbery
#
# This program is free software.  It is distributed under the terms of
# the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

use strict;
use warnings;
use autodie;

use POSIX qw(strftime);
use File::Copy qw(copy);
use Fcntl qw(SEEK_SET);
use List::MoreUtils qw(uniq);
use URI::Escape;
use Text::Template ();

# ------------------------------
# Global variables and configuration

# These have no default and must be set in the configuration file.
# FIXME: $statistics_file should be in all caps as well.
our (
    $LINTIAN_ROOT, $LINTIAN_LAB, $LINTIAN_ARCHIVEDIR,
    $LINTIAN_DIST,$LINTIAN_ARCH, $HTML_TMP_DIR,
    $statistics_file,$LINTIAN_AREA, $HISTORY,
    $HISTORY_DIR, $LINTIAN_SOURCE,$GRAPHS_RANGE_DAYS,
    $GRAPHS, $LINTIAN_MIRROR_NAME, $HARNESS_STATE_DIR,
);

# Read the configuration.
BEGIN { require './config'; } ## no critic (Modules::RequireBarewordIncludes)

# The path to the mirror timestamp.
our $LINTIAN_TIMESTAMP
  = "$LINTIAN_ARCHIVEDIR/project/trace/$LINTIAN_MIRROR_NAME";

# Import Lintian Perl libraries.
use lib "$LINTIAN_ROOT/lib";
use Lintian::Data;
use Lintian::Internal::FrontendUtil qw(split_tag);
use Lintian::Lab;
use Lintian::Profile;
use Lintian::Relation::Version qw(versions_comparator);
use Lintian::Reporting::ResourceManager;
use Lintian::Util qw(read_dpkg_control slurp_entire_file load_state_cache
  find_backlog);

my $profile = Lintian::Profile->new(undef,[$LINTIAN_ROOT]);
my $RESOURCE_MANAGER
  = Lintian::Reporting::ResourceManager->new('html_dir' => $HTML_TMP_DIR,);

Lintian::Data->set_vendor($profile);

# Set the Lintian version, current timestamp, and archive timestamp.
our $LINTIAN_VERSION = `$LINTIAN_ROOT/frontend/lintian --print-version`;
our $timestamp = `date -u --rfc-822`;
our $mirror_timestamp = slurp_entire_file($LINTIAN_TIMESTAMP);
chomp($LINTIAN_VERSION, $timestamp);
$mirror_timestamp =~ s/\n.*//s;

# ------------------------------
# Initialize templates

# The path to our templates.
our $TEMPLATES = "$LINTIAN_ROOT/reporting/templates";

# This only has to be done once, so do it at the start and then reuse the same
# templates throughout.
our %templates;
for my $template (
    qw/head foot clean index maintainer maintainers packages tag
    tags tags-severity tag-not-seen tags-all/
  ) {
    my %options = (TYPE => 'FILE', SOURCE => "$TEMPLATES/$template.tmpl");
    $templates{$template} = Text::Template->new(%options)
      or die "cannot load template $template: $Text::Template::ERROR\n";
}

$templates{'lintian.css'} = Text::Template->new(
    TYPE => 'FILE',
    SOURCE => "$TEMPLATES/lintian.css.tmpl",
    DELIMITERS => ['{{{', '}}}'],
) or die("cannot load template for lintian.css: $Text::Template::ERROR\n");

# ------------------------------
# Main routine

my $LAB = Lintian::Lab->new($LINTIAN_LAB);
my $source_info;
$LAB->open;
$source_info = $LAB->_get_lab_index('source');

# Create output directories.
mkdir($HTML_TMP_DIR, 0777);
mkdir("$HTML_TMP_DIR/full", 0777);
mkdir("$HTML_TMP_DIR/maintainer", 0777);
mkdir("$HTML_TMP_DIR/tags", 0777);
symlink('.', "$HTML_TMP_DIR/reports");
symlink("$LINTIAN_ROOT/doc/lintian.html", "$HTML_TMP_DIR/manual");
symlink("$LINTIAN_ROOT/doc/api.html", "$HTML_TMP_DIR/library-api");

if ($ARGV[0]) {
    copy($ARGV[0], "$HTML_TMP_DIR/lintian.log")
      or die("cannot copy $ARGV[0] to $HTML_TMP_DIR/lintian.log: $!");
    system("gzip --best -c > \"$HTML_TMP_DIR/lintian.log.gz\" < \"$ARGV[0]\"")
      == 0
      or die "cannot create $HTML_TMP_DIR/lintian.log.gz.\n";
}

for my $dir_basename (qw(resources images)) {
    my $dir = "$LINTIAN_ROOT/reporting/$dir_basename";
    next if not -d $dir;
    opendir(my $dirfd, $dir);
    for my $resname (readdir($dirfd)) {
        next if $resname eq '.' or $resname eq '..';
        $RESOURCE_MANAGER->install_resource("$dir/$resname");
    }
    closedir($dirfd);
}
# Create lintian.css from a template, install the output file as a resource
# and discard the original output file.  We do this after installing all
# resources, so the .css file can refer to resources.
output_template(
    'lintian.css',
    $templates{'lintian.css'},
    { 'path_prefix' => '../' });
$RESOURCE_MANAGER->install_resource("$HTML_TMP_DIR/lintian.css");
unlink("$HTML_TMP_DIR/lintian.css");

my $STATE_CACHE = load_state_cache($HARNESS_STATE_DIR);

# %statistics accumulates global statistics.  For tags: errors, warnings,
# experimental, overridden, and info are the keys holding the count of tags of
# that sort.  For packages: binary, udeb, and source are the number of
# packages of each type with Lintian errors or warnings.  For maintainers:
# maintainers is the number of maintainers with Lintian errors or warnings.
#
# %tag_statistics holds a hash of tag-specific statistics.  Each tag name is a
# key, and its value is a hash with the following keys: count and overrides
# (number of times the tag has been detected and overriden, respectively), and
# packages (number of packages with at least one such tag).
my (%statistics, %tag_statistics);

# %by_maint holds a hash of maintainer names to packages and tags.  Each
# maintainer is a key.  The value is a hash of package names to hashes.  Each
# package hash is in turn a hash of versions to an anonymous array of hashes,
# with each hash having keys code, package, type, tag, severity, certainty,
# extra, and xref.  xref gets the partial URL of the maintainer page for that
# source package.
#
# In other words, the lintian output line:
#
#     W: gnubg source: substvar-source-version-is-deprecated gnubg-data
#
# for gnubg 0.15~20061120-1 maintained by Russ Allbery <rra@debian.org> is
# turned into the following structure:
#
# { 'gnubg' => {
#       '0.15~20061120-1' => [
#           { code      => 'W',
#             package   => 'gnubg',
#             version   => '0.15~20061120-1',
#             area      => 'main',
#             type      => 'source',
#             tag       => 'substvar-source-version-is-deprecated',
#             severity  => 'normal',
#             certainty => 'certain',
#             extra     => 'gnubg-data'
#             xref      => 'rra@debian.org.html#gnubg_0.15~20061120-1'
#           } ] } }
#
# and then stored under the key 'Russ Allbery <rra@debian.org>'
#
# %by_uploader holds the same thing except for packages for which the person
# is only an uploader.
#
# %by_tag is a hash of tag names to an anonymous array of tag information
# hashes just like the inside-most data structure above.
my (%by_maint, %by_uploader, %by_tag);

# We take a lintian log file on either standard input or as the first
# argument.  This log file contains all the tags lintian found, plus N: tags
# with informational messages.  Ignore all the N: tags and load everything
# else into the hashes we use for all web page generation.
#
# We keep track of a hash from maintainer page URLs to maintainer values so
# that we don't have two maintainers who map to the same page and overwrite
# each other's pages.  If we find two maintainers who map to the same URL,
# just assume that the second maintainer is the same as the first (but warn
# about it).
#
# The "last_*" are optimizations to avoid computing the same things
# over and over again when a package have multiple tags.
my (%seen, $last_info, $last_maintainer);
my %expanded_code = (
    E => 'errors',
    W => 'warnings',
    I => 'info',
    X => 'experimental',
    O => 'overridden',
    P => 'pedantic',
);

while (<>) {
    my @parts;
    chomp;
    @parts = split_tag($_);
    next unless @parts;
    my ($code, $package, $type, $pver, $parch, $tag, $extra) = @parts;
    $type = 'binary' unless (defined $type);
    next unless ($type eq 'source' || $type eq 'binary' || $type eq 'udeb');
    # Ignore unknown tags - happens if we removed a tag that is still present
    # in the log file.
    my $tag_info = $profile->get_tag($tag, 1);
    next unless $tag_info;

    # Update statistics.
    my $key = $expanded_code{$code};
    $statistics{$key}++;
    unless ($seen{"$package $type"}) {
        $statistics{"$type-packages"}++;
        $seen{"$package $type"} = 1;
    }

    # Determine the source package for this package and warn if there appears
    # to be no source package in the archive.  Determine the maintainer,
    # version, and archive area.  Work around a missing source package by
    # pulling information from a binary package or udeb of the same name if
    # there is any.
    my ($source, $version, $area, $source_version, $maintainer, $uploaders);
    my $pkg_data= $LAB->_get_lab_manifest_data($package, $type, $pver, $parch);
    my $state_data = {};
    if ($type eq 'source') {
        $source = $package;
        if (defined $pkg_data) {
            $source_version = $version = $pkg_data->{version};
            $area = $pkg_data->{area};
            $maintainer = $pkg_data->{maintainer};
            $uploaders = $pkg_data->{uploaders};
        } else {
            $source_version = $version = $pver;
            $pver//='N/A';
            warn "source package $package/$pver not found!\n";
        }
    } else {
        my $src_data;
        if (defined $pkg_data) {
            $version = $pkg_data->{version};
            $area = $pkg_data->{area};
            $source = $pkg_data->{source}//$package;
            $source_version = $pkg_data->{'source-version'}//$version;
            $src_data = $LAB->_get_lab_manifest_data($source, 'source',
                $source_version);
        } else {
            $version = $pver;
        }
        if (defined $src_data) {
            $maintainer = $src_data->{maintainer};
            $uploaders = $src_data->{uploaders};
        } else {
            $pver//='N/A';
            $parch//='N/A';
            if (defined $pkg_data) {
                warn "source for package $package/$pver/$parch"
                  . " ($source/$source_version) not found!\n";
            } else {
                warn "data for package $package/$pver/$parch not found!\n";
            }
            $maintainer = undef;
            $uploaders = undef;
        }
    }
    if ($source and ($version//'') ne '') {
        my $group_id = "$source/$version";
        if (exists($STATE_CACHE->{$group_id})) {
            $state_data = $STATE_CACHE->{$group_id};
        }
    }
    $maintainer ||= '(unknown)';
    $area ||= 'main';
    $source ||= '';
    $version = 'unknown'
      unless (defined($version) and length($version) > 0);
    $source_version = $version
      unless (defined($source_version) and length($source_version) > 0);

    # Sanitize, just out of paranoia.
    $package =~ tr/a-zA-Z0-9.+-/_/c;
    $source =~ tr/a-zA-Z0-9.+-/_/c;
    $version =~ tr/a-zA-Z0-9.+:~-/_/c;
    $source_version =~ tr/a-zA-Z0-9.+:~-/_/c;

    # Conditionally call html_quote if needed.  On average, 11-13% of
    # all tags (emitted on lintan.d.o) have no "extra".  That would be
    # tags like "no-upstream-changelog".
    if (defined($extra)) {
        $extra = html_quote($extra);
    } else {
        $extra = '';
    }

    my $info;

    # Add the tag information to our hashes.  Share the data between the
    # hashes to save space (which means we can't later do destructive tricks
    # with it).
    if (   $last_info
        && $last_info->{type} eq $type
        && $last_info->{package} eq $package
        && $last_info->{version} eq $version) {

        # There are something like 622k tags emitted on lintian.d.o,
        # but only "some" 90k unique package+version(+arch) pairs.
        # Therefore, we can conclude that the average package will
        # have ~6 tags and optimise for that case.  Indeed, this path
        # seems to be taken about 90% of the time (561k/624k).
        # - In fact, we see less than "90k" package+version(+arch)
        #   pairs here, since entries without tags never this far down
        #   in this loop (i.e. they are filtered out by split_tag
        #   above).

        # Copy the last info and then change the bits that can change
        $info = {%{$last_info}};
        # Code depends on whether the given tag was overridden or not
        $info->{code} = $code;
        $info->{extra} = $extra;
        if ($info->{tag} ne $tag) {
            # Technically we ought to html_quote when doing the
            # comparison; but then, the number of tags with characters
            # needing HTML-escaping is (next to) 0.
            $info->{tag} = html_quote($tag);
            $info->{severity}  = $tag_info->severity;
            $info->{certainty} = $tag_info->certainty;
        }
        # saves a map_maintainer call
        $maintainer = $last_maintainer;
    } else {

       # Check if we've seen the URL for this maintainer before and, if so, map
       # them to the same person as the previous one.

        $last_maintainer = $maintainer = map_maintainer($maintainer);

        # Update maintainer statistics.
        $statistics{maintainers}++ unless defined $by_maint{$maintainer};

        $last_info = $info = {
            # split_tags ensures that $code is a single upper case letter
            code         => $code,
            package      => $package,
            version      => $version,
            area         => html_quote($area),
            # There is a check for type being in a fixed whitelist of
            # HTML-safe keywords in the start of the loop.
            type         => $type,
            tag          => html_quote($tag),
            # severity and certainty follows a fixed whitelist of
            # HTML-safe keywords
            severity     => $tag_info->severity,
            certainty    => $tag_info->certainty,
            # extra is unsafe in general, but we already quote it above.
            extra        => $extra,
            xref         => maintainer_url($maintainer)
              . "#${source}_${source_version}",
            'state_data' => $state_data,
        };
        if (!$by_maint{$maintainer}{$source}{$source_version}) {
            my $list_ref = [];
            $by_maint{$maintainer}{$source}{$source_version} = $list_ref;
            # If the package had uploaders listed, also add the
            # information to %by_uploaders (still sharing the data
            # between hashes).
            if ($uploaders) {
                my @uploaders
                  = map { map_maintainer($_) } split(/>\K\s*,\s*/, $uploaders);
                for my $uploader (@uploaders) {
                    next if $uploader eq $maintainer;
                    $by_uploader{$uploader}{$source}{$source_version}
                      = $list_ref;
                }
            }
        }
    }

    push(@{ $by_maint{$maintainer}{$source}{$source_version} }, $info);
    $by_tag{$tag} ||= [];
    push(@{ $by_tag{$tag} }, $info);

}

# Build a hash of all maintainers, not just those with Lintian tags.  We use
# this later to generate stub pages for maintainers whose packages are all
# Lintian-clean.
my %clean;
$source_info->visit_all(
    sub {
        my ($srcdata) = @_;
        my $maintainer = $srcdata->{maintainer};
        my $id = maintainer_url($maintainer);
        $clean{$id} = $maintainer;
    });

# Done with the lab and its metadata
$LAB->close;
undef $LAB;
undef $source_info;

# Now, walk through the tags by source package (sorted by maintainer).  Output
# a summary page of errors and warnings for each maintainer, output a full
# page that includes info, experimental, and overriden tags, and assemble the
# maintainer index and the QA package list as we go.
my (%qa, %maintainers, %sources);
my @maintainers = sort(uniq(keys(%by_maint), keys(%by_uploader)));

for my $maintainer (@maintainers) {
    my $id = maintainer_url($maintainer);
    delete $clean{$id};

    # For each of this maintainer's packages, add statistical information
    # about the number of each type of tag to the QA data and build the
    # packages hash used for the package index.  We only do this for the
    # maintainer packages, not the uploader packages, to avoid
    # double-counting.
    for my $source (keys %{ $by_maint{$maintainer} }) {
        my %count;
        for my $version (
            sort versions_comparator keys %{ $by_maint{$maintainer}{$source} })
        {
            my $tags = $by_maint{$maintainer}{$source}{$version};
            for my $tag (@$tags) {
                $count{$tag->{code}}++;
            }
            if (@$tags) {
                $sources{$source}{$version} = $tags->[0]->{xref};
            }
        }
        $qa{$source} = \%count;
    }

    # Determine if the maintainer's page is clean.  Check all packages for
    # which they're either maintainer or uploader and set $error_clean if
    # they have no errors or warnings.
    #
    # Also take this opportunity to sort the tags so that all similar tags
    # will be grouped, which produces better HTML output.
    my $error_clean = 1;
    for my $source (keys %{ $by_maint{$maintainer} },
        keys %{ $by_uploader{$maintainer} }) {
        my $versions = $by_maint{$maintainer}{$source}
          || $by_uploader{$maintainer}{$source};
        for my $version (keys %$versions) {
            $versions->{$version} = [sort by_tag @{ $versions->{$version} }];
            my $tags = $versions->{$version};
            for my $tag (@$tags) {
                $error_clean = 0 if ($tag->{code} eq 'E');
                $error_clean = 0 if ($tag->{code} eq 'W');
            }
        }
    }

    # Determine the parts of the maintainer and the file name for the
    # maintainer page.
    my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/);
    $name = 'Unknown Maintainer' unless $name;
    $email = 'unknown' unless $email;
    my $regular = "maintainer/$id";
    my $full = "full/$id";

    # Create the regular maintainer page (only errors and warnings) and the
    # full maintainer page (all tags, including overrides and info tags).
    print "Generating page for $id\n";
    my %data = (
        email      => html_quote(uri_escape($email)),
        errors     => 1,
        id         => $id,
        maintainer => html_quote($maintainer),
        name       => html_quote($name),
        packages   => $by_maint{$maintainer},
        uploads    => $by_uploader{$maintainer},
    );
    my $template;
    if ($error_clean) {
        $template = $templates{clean};
    } else {
        $template = $templates{maintainer};
    }
    output_template($regular, $template, \%data);
    $template = $templates{maintainer};
    $data{errors} = 0;
    output_template($full, $template, \%data);

    # Add this maintainer to the hash of maintainer to URL mappings.
    $maintainers{$maintainer} = $id;
}

# Write out the maintainer index.
my %data = (maintainers => \%maintainers,);
output_template('maintainers.html', $templates{maintainers}, \%data);

mkdir("$HTML_TMP_DIR/lookup-tables");
open(my $table, '>', "$HTML_TMP_DIR/lookup-tables/source-packages");

foreach my $source (sort keys %sources) {
    my $first = 1;
    for my $version (sort versions_comparator keys %{ $sources{$source} }) {
        my $xref = $sources{$source}{$version};
        print {$table} "$source full/$xref\n" if $first;
        print {$table} "$source/$version full/$xref\n";
        $first = 0;
    }
}

close($table);

# Write out the QA package list.  This is a space-delimited file that contains
# the package name and then the error count, warning count, info count,
# pedantic count, experimental count, and overridden tag count.
open(my $qa_fd, '>', "$HTML_TMP_DIR/qa-list.txt");
for my $source (sort keys %qa) {
    print {$qa_fd} $source;
    for my $code (qw/E W I P X O/) {
        my $count = $qa{$source}{$code} || 0;
        print {$qa_fd} " $count";
    }
    print {$qa_fd} "\n";
}
close($qa_fd);

# Now, generate stub pages for every maintainer who has only clean packages.
for my $id (keys %clean) {
    my $maintainer = $clean{$id};
    my ($name, $email) = ($maintainer =~ /^(.*) <([^>]+)>/);
    $email = 'unknown' unless $email;
    my %maint_data = (
        id         => $id,
        email      => html_quote(uri_escape($email)),
        maintainer => html_quote($maintainer),
        name       => html_quote($name),
        clean      => 1,
    );
    print "Generating clean page for $id\n";
    output_template("maintainer/$id", $templates{clean}, \%maint_data);
    output_template("full/$id", $templates{clean}, \%maint_data);
}

# Create the pages for each tag.  Each page shows the extended description for
# the tag and all the packages for which that tag was issued.
for my $tag (sort $profile->tags(1)) {
    my $info = $profile->get_tag($tag, 1);
    my $description = $info->description('html', '    ');
    my ($count, $overrides) = (0, 0);
    my %seen_tags;
    my $tmpl = 'tag-not-seen';
    if (exists $by_tag{$tag}) {
        $tmpl = 'tag';
        foreach (@{$by_tag{$tag}}) {
            if ($_->{code} ne 'O') {
                $count++;
                $seen_tags{$_->{xref}}++;
            } else {
                $overrides++;
            }
        }
        $tag_statistics{$tag}{'count'} = $count;
        $tag_statistics{$tag}{'overrides'} = $overrides;
        $tag_statistics{$tag}{'packages'} = scalar keys %seen_tags;
    }

    my %maint_data = (
        description => $description,
        tag         => html_quote($tag),
        code        => $info->code,
        tags        => $by_tag{$tag},
        graphs      => $GRAPHS,
        graphs_days => $GRAPHS_RANGE_DAYS,
        statistics  => {
            count       => $count,
            overrides   => $overrides,
            total       => $count + $overrides,
        },
    );
    output_template("tags/$tag.html", $templates{$tmpl}, \%maint_data);
}

# Create the general tag indices.
%data = (
    tags       => \%by_tag,
    stats      => \%tag_statistics,
    profile    => \$profile,
);
output_template('tags.html', $templates{tags}, \%data);
output_template('tags-severity.html', $templates{'tags-severity'}, \%data);
output_template('tags-all.html', $templates{'tags-all'}, \%data);

# Generate the package lists.  These are huge, so we break them into four
# separate pages.
#
# FIXME: Does anyone actually use these pages?  They're basically unreadable.
my %list;
$list{'0-9, A-F'} = [];
$list{'G-L'}      = [];
$list{'M-R'}      = [];
$list{'S-Z'}      = [];
for my $package (sort keys %sources) {
    my $first = uc substr($package, 0, 1);
    if    ($first le 'F') { push(@{ $list{'0-9, A-F'} }, $package) }
    elsif ($first le 'L') { push(@{ $list{'G-L'} },      $package) }
    elsif ($first le 'R') { push(@{ $list{'M-R'} },      $package) }
    else                  { push(@{ $list{'S-Z'} },      $package) }
}
%data = (sources => \%sources,);
my $i = 1;
for my $section (sort keys %list) {
    $data{section} = $section;
    $data{list} = $list{$section};
    output_template("packages_$i.html", $templates{packages}, \%data);
    $i++;
}

# Finally, we can start creating the index page.  First, read in the old
# statistics file so that we can calculate deltas for all of our statistics.
my $old_statistics;
if (-f $statistics_file) {
    ($old_statistics) = read_dpkg_control($statistics_file);
}
$statistics{'groups-known'} = scalar(keys(%{$STATE_CACHE}));
$statistics{'groups-backlog'}
  = scalar(find_backlog($LINTIAN_VERSION,$STATE_CACHE));

my %delta;
my @attrs = qw(maintainers source-packages binary-packages udeb-packages
  errors warnings info experimental pedantic overridden groups-known
  groups-backlog);
for my $attr (@attrs) {
    my $old = $old_statistics->{$attr} || 0;
    $statistics{$attr} ||= 0;
    $delta{$attr}
      = sprintf('%d (%+d)', $statistics{$attr},$statistics{$attr} - $old);
}

# Update the statistics file.
open(my $stats_fd, '>', $statistics_file);
print {$stats_fd} "last-updated: $timestamp\n";
print {$stats_fd} "mirror-timestamp: $mirror_timestamp\n";
for my $attr (@attrs) {
    print {$stats_fd} "$attr: $statistics{$attr}\n";
}
print {$stats_fd} "lintian-version: $LINTIAN_VERSION\n";
close($stats_fd);

# Create the main page.
%data = (
    architecture => $LINTIAN_ARCH,
    delta        => \%delta,
    dist         => $LINTIAN_DIST,
    mirror       => $mirror_timestamp,
    previous     => $old_statistics->{'last-updated'},
    area         => join(', ', split(/\s*,\s*/, $LINTIAN_AREA)),
    graphs       => $GRAPHS,
    graphs_days  => $GRAPHS_RANGE_DAYS,
);
output_template('index.html', $templates{index}, \%data);

exit 0 if (not $HISTORY);

# Update history.
my %versions = ();
my $graph_dir = "$HTML_TMP_DIR/graphs";
my $commonf = "$graph_dir/common.gpi";
my $unix_time = time();
mkdir("$HISTORY_DIR")
  if (not -d "$HISTORY_DIR");
mkdir("$HISTORY_DIR/tags")
  if (not -d "$HISTORY_DIR/tags");

my $history_file = "$HISTORY_DIR/statistics.dat";
my $stats = '';
for my $attr (@attrs) {
    $stats .= " $statistics{$attr}";
}
open(my $hist_fd, '+>>', $history_file);
print {$hist_fd} "$unix_time $LINTIAN_VERSION$stats\n";

if ($GRAPHS) {
    seek($hist_fd, 0, SEEK_SET);
    while (<$hist_fd>) {
        my @fields = split();
        $versions{$fields[1]} = $fields[0] if not exists $versions{$fields[1]};
    }
}
close($hist_fd);

if ($GRAPHS) {
    mkdir("$graph_dir", 0777);
    mkdir("$graph_dir/tags", 0777);

    my $date_min
      = strftime('%s', localtime($unix_time - 3600 * 24 * $GRAPHS_RANGE_DAYS));
    my $date_max = strftime('%s', localtime($unix_time));

    # Generate loadable Gnuplot file with common variables and labels/arrows
    # for Lintian versions.
    open(my $common, '>', $commonf);
    print {$common} "history_dir='$HISTORY_DIR'\n";
    print {$common} "graph_dir='$graph_dir'\n";
    print {$common} "date_min='$date_min'\n";
    print {$common} "date_max='$date_max'\n";
    my $last_version = 0;
    for my $v (sort { $versions{$a} <=> $versions{$b} } keys %versions) {
        next unless $versions{$v} > $date_min;

        print {$common} "set arrow from '$versions{$v}',graph 0 to ",
          "'$versions{$v}',graph 1 nohead lw 0.4\n";

        # Skip label if previous release is too close; graphs can't display
        # more than ~32 labels.
        my $min_spacing = 3600 * 24 * $GRAPHS_RANGE_DAYS / 32;
        if ($versions{$v} - $last_version > $min_spacing) {
            (my $label = $v) =~ s/\-[\w\d]+$//;
            print {$common} "set label '$label' at '$versions{$v}',graph ",
              "1.04 rotate by 90 font ',8'\n";
        }

        $last_version = $versions{$v};
    }
    close($common);

    print "Plotting global statistics...\n";
    chdir_system($graph_dir,
        ['gnuplot', "$LINTIAN_ROOT/reporting/graphs/statistics.gpi"]) == 0
      or die "gnuplot died with $?\n";
}

my $gnuplot_fd;
if ($GRAPHS) {
    open($gnuplot_fd, '>', "$graph_dir/call.gpi");
}

for my $tag (sort keys %tag_statistics) {
    $history_file = "$HISTORY_DIR/tags/$tag.dat";
    $stats = $tag_statistics{$tag};
    open(my $tag_fd, '>>', $history_file);
    print {$tag_fd} "$unix_time $stats->{'count'} $stats->{'overrides'} "
      ."$stats->{'packages'}\n";
    close($tag_fd);
    if ($GRAPHS) {
        print {$gnuplot_fd} qq{print 'Plotting $tag statistics...'\n};
        print {$gnuplot_fd}
          qq{call '$LINTIAN_ROOT/reporting/graphs/tags.gpi' '$tag'\n};
        print {$gnuplot_fd} qq{reset\n};
    }
}

if ($GRAPHS) {
    close($gnuplot_fd);
    chdir_system($graph_dir, ['gnuplot', 'call.gpi']) == 0
      or die("gnuplot died with $?\n");
    unlink($commonf);
}

exit 0;

# ------------------------------
# Utility functions

# Determine the file name for the maintainer page given a maintainer.  It
# should be <email>.html where <email> is their email address with all
# characters other than a-z A-Z 0-9 - _ . @ = + replaced with _.  Don't change
# this without coordinating with QA.
sub maintainer_url {
    my ($maintainer) = @_;
    if ($maintainer =~ m/<([^>]+)>/) {
        my $id = $1;
        $id =~ tr/a-zA-Z0-9_.@=+-/_/c;
        return "$id.html";
    } else {
        return 'unsorted.html';
    }
}

# Deduplicate maintainers.  Maintains a cache of the maintainers we've seen
# with a given e-mail address, issues a warning if two maintainers have the
# same e-mail address, and returns the maintainer string that we should use
# (which is whatever maintainer we saw first with that e-mail).

# PerlCritic apparently does not recognise this way of "encapsulating"
# variables.  Though, these could probably be replaced by "state"
# variables.
## no critic (ControlStructures::ProhibitUnreachableCode)
{
    my (%urlmap, %warned);

    sub map_maintainer {
        my ($maintainer) = @_;
        my $url = maintainer_url($maintainer);
        if ($urlmap{$url} && $urlmap{$url} ne $maintainer) {
            warn "$maintainer has the same page as $urlmap{$url}\n"
              unless ($warned{$maintainer}
                || lc($maintainer) eq lc($urlmap{$url})
                || $maintainer =~ /\@lists\.(alioth\.)?debian\.org>/);
            $warned{$maintainer}++;
            $maintainer = $urlmap{$url};
        } else {
            $urlmap{$url} = $maintainer;
        }
        return $maintainer;
    }
}

# Quote special characters for HTML output.
sub html_quote {
    my ($text) = @_;
    $text ||= '';
    # Use index to do a quick check before we bother requesting a
    # subst.  On average, this is cheaper than blindly s///'ing, since
    # we rarely subst (all) of the characters below.
    if (index($text, '&') > -1) {
        $text =~ s/&/\&amp;/g;
    }
    if (index($text, '<') > -1) {
        $text =~ s/</\&lt;/g;
    }
    if (index($text, '>') > -1) {
        $text =~ s/>/\&gt;/g;
    }
    return $text;
}

# Given a file name, a template, and a data hash, fill out the template with
# that data hash and output the results to the file.
sub output_template {
    my ($file, $template, $data) = @_;
    my $path_prefix = $data->{path_prefix};
    if (not defined($path_prefix)) {
        $path_prefix = '';
        if (index($file, '/') > -1) {
            $path_prefix = '../' x ($file =~ tr|/||);
        }
    }
    $data->{version} ||= $LINTIAN_VERSION;
    $data->{timestamp} ||= $timestamp;
    $data->{by_version} ||= \&versions_comparator;
    $data->{path_prefix} ||= $path_prefix;
    $data->{resource_path} ||= sub {
        return $path_prefix . $RESOURCE_MANAGER->resource_URL($_[0]);
    };
    $data->{head} ||= sub {
        $templates{head}->fill_in(
            HASH => {
                page_title => $_[0],
                %{$data},
            }) or die "Filling out head of $file: $Text::Template::ERROR\n";
    };
    $data->{foot} ||= sub {
        $templates{foot}->fill_in(
            HASH => {
                LINTIAN_SOURCE => $LINTIAN_SOURCE,
                %{$data},
            }) or die "Filling out footer of $file: $Text::Template::ERROR\n";
    };
    open(my $fd, '>', "$HTML_TMP_DIR/$file");
    $template->fill_in(OUTPUT => $fd, HASH => $data)
      or die "filling out $file failed: $Text::Template::ERROR\n";
    close($fd);
    return;
}

# Sort function for sorting lists of tags.  Sort by package, version, area,
# type, tag, and then any extra data.  This will produce the best HTML output.
#
# Note that source tags must come before all other tags, hench the "unfair"
# priority for those.  This is because the first tags listed are assumed to
# be source package tags.
sub by_tag {
    if ($a->{type} ne $b->{type}) {
        return -1 if $a->{type} eq 'source';
        return  1 if $b->{type} eq 'source';
    }
    return
         $a->{package} cmp $b->{package}
      || $a->{version} cmp $b->{version}
      || $a->{area}    cmp $b->{area}
      || $a->{type}    cmp $b->{type}
      || $a->{tag}     cmp $b->{tag}
      || $a->{extra}   cmp $b->{extra};
}

sub chdir_system {
    my ($dir, $cmd) = @_;
    my $pid = fork();
    if ($pid) {
        waitpid($pid, 0) == $pid or die "waitpid failed: $!\n";
        return $?;
    }

    chdir($dir);
    exec @$cmd
      or die "exec failed: $!";
}

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
