#!/usr/bin/perl -w

# duck - the Debian Url Checker
# Copyright (C) 2014 Simon Kainz <simon@familiekainz.at>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# he Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# On Debian GNU/Linux systems, the complete text of the GNU General
# Public License can be found in `/usr/share/common-licenses/GPL-2'.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

use strict;

use lib '/usr/share/duck/lib';

use DUCK;
use Getopt::Std;
use YAML::XS qw(Load);
use Parse::DebControl qw(parse_file);
use Regexp::Common qw /URI Email::Address/;
use Email::Address;

sub proc($;$;$;$;$);
sub guess_type($);

our $VERSION="0.5";
our $copyright_year="2014";

$Getopt::Std::STANDARD_HELP_VERSION=1;

my $cf_parser = new Parse::DebControl;

#my $debug=0;
my $exitcode=0;

my @yaml_urls;

my @extract=("Homepage","Repository","Repository-Browse","Screenshots","Bug-Submit","Bug-Database","Changelog","Donation","FAQ","Gallery","Other-References","Webservice","Reference","URL","Eprint");

my @extract_copyright=("Format","Source");

my @upstream_filenames=("debian/upstream","debian/upstream-metadata.yaml","debian/upstream/metadata");

			 
my $extract_hash;
my $extract_copyright_hash;

my $upstream_filename;


my %opt;
getopts('qvf:u:c:FUCn', \%opt);

if ( $opt{v} && $opt{q} ) 
{
    print STDERR " Please specify either -q or -v\n";
    exit(1);
}

foreach my $a (@extract)
{
    $extract_hash->{$a}=1;
}

foreach my $a (@extract_copyright)
{
    $extract_copyright_hash->{$a}=1;
}

my $DUCK= DUCK->new();
my $funcref= $DUCK->cb();

my @entries;

if (!$opt{C}) {

#processing copyright file

open my $fh,"<",($opt{c} or "debian/copyright");

my @copy_raw=<$fh>;

close($fh);
chomp @copy_raw;

my $linenum=0;
foreach my $copyright_line (@copy_raw)
{
    $linenum++;
    $copyright_line =~ s/^[*\s#\-|\/\.]*//;
    $copyright_line =~ s/[\s#\-|\)*]*$//;
    
    next unless length($copyright_line);
    if ($copyright_line =~ /($RE{URI}{HTTP}{-keep})/)
    {
	push (@entries, ["debian/copyright:".$linenum,"URL",$1,$copyright_line ]);
    }
    if ($copyright_line =~ /@/)
    {
	my $copyright_line_mangled =$copyright_line;
	$copyright_line_mangled =~ s/[\*\#|<>\(\)\/]/ /g;
	$copyright_line_mangled =~ s/\s\s*/ /g;
	next unless length($copyright_line_mangled);
	my @emails = ($copyright_line_mangled =~ /$RE{Email}{Address}{-keep}/go );
	if (@emails)
	{
	    my @parsed = map $_->address,Email::Address->parse(@emails);
	    
	    foreach (@parsed)
	    {
		push (@entries, ["debian/copyright:".$linenum,"Email",$_,$copyright_line_mangled,$copyright_line ]);
	    }
	    
	}
	
	
    }
    
    
}

}
#Processing debian/control file

if (!$opt{F})
{
    my $opts= {stripComments => 'true'};
    my $cf=($opt{f} or "debian/control");
    my @data_file = $cf_parser->parse_file($cf, $opts);
    my @cfdata=$data_file[0][0];
 
# create list of urls from debian/control
    
    foreach my $cfline1 (@data_file)
    {
	foreach my $cfline2 (@$cfline1)
	{
	    foreach my $k (keys %$cfline2)
	    {
		push (@entries, ["debian/control",$k,$cfline2->{$k} ]);
	    }
	}
    }
}

#Processing upstream metadata file
if (!$opt{U})
{
# extend list of urls by urls from upstream metadata
    foreach (@upstream_filenames)
    {
	@yaml_urls=();
	
	if ( -f $_)
	{
	    $upstream_filename=$_;
	    open my $fh,"<",$_;
	    
	    my @raw=<$fh>;
	    my $raw_string=join("",@raw);
	    
	    close($fh);
	my $hashref;
	    
	    eval { Load($raw_string);}; if (!$@)
	    {
		$hashref=Load($raw_string);
		
		foreach my $k (keys $hashref)
		{
		    if ($extract_hash->{$k})
		    { 
			proc("",\@yaml_urls,$k,$hashref->{$k});
		    }
		}
		
	    }
	    
	}
	

	
	foreach my $yaml_url(@yaml_urls)
	{
	    # try to be smart: git:// and svn:// based urls must not be handled 
	    # by curl.
	    
	    my $keyname=guess_type(@$yaml_url[1]); 
	    
    if (!$keyname) {$keyname="URL";}
	    @$yaml_url[1] =~ s/^\s*//;
	    push (@entries, [$upstream_filename.": ".@$yaml_url[2],$keyname,@$yaml_url[1] ]);
	}
    }
    
}

	
# iterate over all urls, run checks.


foreach my $entry (@entries)
{
    my $type=@$entry[0];
    my $k=@$entry[1];
    my $url=@$entry[2];
    my $origline=@$entry[3];

    chomp $origline unless !$origline;
  
    if ($funcref->{$k})
    {

  if ($opt{n})
    {
         print STDOUT $type.": ".$k.": ".$url.": ";
	 print STDOUT " DRY RUN\n";
	 next;
    }


	my $res=&{$funcref->{$k}}($url);
	
	if (!defined $res)
	{
	    if (!$opt{q})
	    {
		print STDERR " Skipping field ".$k." (Reason: Missing helper!)\n";
	    }
	}
	else
	{
	if ($res->{retval}>0)
	{
	    if (!$opt{q})
	    {
		print STDERR  $type.": ".$k.": ".$url.": ";
		
		if ($origline)
		{
		    print STDERR $origline.": ";
		}
		
		print STDERR " ERROR\n";
		print STDERR $res->{response};
		print STDERR "\n\n";
	    }
	   $exitcode=1;
	}
	else
	{
	 if ($opt{v})
	 {
	     print STDOUT $type.": ".$k.": ".$url.": ";
	     if ($origline)
	     {
		 print STDOUT $origline.": ";
	     }
	     print STDOUT " OK\n\n";
	 }
	 
	}
	}
    }



}

exit($exitcode);

##############################################################################
# Helper functions

sub guess_type($)
{

    my ($url)=@_;
    return "Vcs-Git" if ($url =~/^\s*git:\/\//);
    return "Vcs-Svn" if ($url =~/^\s*svn:\/\//);
    return "URL" if ($url =~/$RE{URI}{HTTP}/);
    return "URL" if ($url =~/$RE{URI}{FTP}/);
    return undef;


}


sub HELP_MESSAGE()
{
print STDOUT <<EOF;

Usage: duck [options] 
  -h\t--help\tdisplay this usage information and exit
  -f file\tspecify path to control file
  -F\t\tskip processing of debian/control file
  -u file\tspecify path to upstream metadata file
  -U\t\tskip processing of upstream metadata file
  -c file\tspecify path to copyright file
  -C\t\tskip processing of copyright file
  -q\t\tquiet mode, suppress all output
  -v\t\tverbose mode
  -n\t\tdry run, don't run any checks, just show what would be checked
EOF
    exit(0);
}

sub VERSION_MESSAGE()
{
    print "duck $VERSION\n";

print <<EOF;
This code is copyright $copyright_year by Simon Kainz <simon\@familiekainz.at>
all rights reserved.
This program comes with ABSOLUTELY NO WARRANTY.
You are free to redistribute this code under the terms of the
GNU General Public License, version 2 or later.

EOF
}


sub proc($;$;$;$;$)
{
    my ($sp,$ref,$key,$r,$p)=@_;
    my $t=ref($r);
    
    if ($t eq "HASH")
    {
	my %a=%{$r};
	foreach my $e (keys %a)
	{
	    return proc($sp,$ref,$e,$a{$e},$key);
	}
	
    }
    
    
    if ($t eq "ARRAY")
    {
	
	my @a=@{$r};
	foreach my $e (@a)
	{
	    return proc($sp,$ref,$key,$e,$key);
	}
    }
    
    if ($t eq "")
    {
	if ($extract_hash->{$key})
	{
	    my @data=($sp,$r,$key);
	    push(@{$ref},\@data);
	}
    }
}
