#!/usr/bin/perl
# SPDX-FileCopyrightText: 2019 Robert Krawitz <rlk@alum.mit.edu>
# SPDX-License-Identifier: GPL-2.0-or-later

# Maintain KPhotoAlbum index.xml files

use strict;
use warnings;

use XML::LibXML;
use DBI;
use DBD::SQLite::Constants qw/:file_open/;
use Getopt::Long;
use Carp("cluck");
use POSIX;

my (%categories);
# Map between category name and ID for compressed files
my (%category_map);
my (@category_names);
# This is stored as {category}{member}{groupname}, as members can only be
# members of one group.
my (%member_groups);
# $group_closure{$category}{value}{values...}
my (%group_closure);

# Global image (for filtering)
my (%current_image);

my (%exif_data);

my ($opt_index_file);
my ($opt_filter_file);
my ($opt_list_type);
my ($opt_use_exif);
my ($opt_print_count);
my ($opt_dryrun);
my (@opt_exif_vars);
our ($kpa_filter);

# Needs to be kept up to date with XMLDB/Database.cpp
my (@standard_vars) = (
    "file",
    "label",
    "description",
    "startDate",
    "endDate",
    "angle",
    "md5sum",
    "width",
    "height",
    "rating",
    "stackId",
    "stackOrder",
    "videoLength");

my ($rootdir);

my (@exif_vars) = (
    "Exif_Photo_FocalLength",
    "Exif_Photo_ExposureTime",
    "Exif_Photo_ApertureValue",
    "Exif_Photo_FNumber",
    "Exif_Photo_Flash",
    "Exif_Photo_Contrast",
    "Exif_Photo_Sharpness",
    "Exif_Photo_Saturation",
    "Exif_Image_Orientation",
    "Exif_Photo_MeteringMode",
    "Exif_Photo_ISOSpeedRatings",
    "Exif_Photo_ExposureProgram",
    "Exif_Image_Make",
    "Exif_Image_Model",
    "Exif_GPSInfo_GPSVersionID",
    "Exif_GPSInfo_GPSAltitude",
    "Exif_GPSInfo_GPSAltitudeRef",
    "Exif_GPSInfo_GPSMeasureMode",
    "Exif_GPSInfo_GPSDOP",
    "Exif_GPSInfo_GPSImgDirection",
    "Exif_GPSInfo_GPSLatitude",
    "Exif_GPSInfo_GPSLatitudeRef",
    "Exif_GPSInfo_GPSLongitude",
    "Exif_GPSInfo_GPSLongitudeRef",
    "Exif_GPSInfo_GPSTimeStamp",
    "Exif_Photo_LensModel"
    );

my (@all_vars) = (
    @standard_vars,
    "mediaType");

sub max_width(\@) {
    my ($strings) = @_;
    my ($answer) = 0;
    map { $answer = length if (length > $answer); } @$strings;
    return $answer;
}

sub cols(\@) {
    my ($strings) = @_;
    my ($width) = max_width(@$strings) + 2;
    my ($answer) = floor((80 - 12)/$width);
    $answer = 1 if ($answer == 0);
    return $answer;
}

sub rows(\@) {
    my ($strings) = @_;
    my ($cols) = cols(@$strings);
    return ceil(scalar @$strings / $cols);
}

sub generate_vars(\@) {
    my ($strings) = @_;
    my (@strings) = sort sort map {my $a = $_; $a =~ s/ //g; "\$$a"} @$strings;
    my ($count) = scalar @strings;
    my ($width) = max_width(@strings);
    my ($cols) = cols(@strings);
    my ($pcols) = $cols;
    my ($rows) = rows(@strings);
    my ($full_rows) = $count % $rows;
    $full_rows = $rows if ($full_rows == 0);
    my ($answer) = "";
    foreach my $i (0..$rows - 1) {
	$pcols-- if ($i == $full_rows);
	my ($fmt) = "            " . join(" ", map {sprintf("%%-%ds", $width)} (1..$pcols));
	my (@pvars) = map { $strings[$_]} grep { $_ % $rows == $i; } (0..$count - 1);
	$answer .= sprintf("$fmt\n", @pvars);
    }
    return $answer;
}

sub usage() {
    my ($known_vars) = generate_vars(@all_vars);
    my ($exif_vars) = generate_vars(@exif_vars);
    my ($usage) = << "FINIS";
Usage: $0 [options...] [filter]

	Filter a KPhotoAlbum database file for either images files or
	category values.

	<filter> is a filter, written as a Perl expressions.  Variable
	names are generated from the attributes.  The supported
	variables are:

$known_vars
	The following EXIF attributes may be specified in the same way:

$exif_vars
	In addition, the following functions are provided to match on
	categoried information:

	    hasKeyword([\$value])
	    hasPerson([\$value])
	    hasPlace([\$value])
	    hasToken([\$value])
	    hasAttribute(\$category, [\$value])
	    matchesKeyword(\$pattern)
	    matchesPerson(\$pattern)
	    matchesPlace(\$pattern)
	    matchesToken(\$pattern)
	    matchesAttribute(\$category, \$pattern)

	The "matches" functions accept Perl regular expressions (see
	perlre(1)).  The "has" functions take the value as an optional
	argument; if not provided, they search for images that have any
	value of the specified category.

	The following options are available:

	-f|--filter-file file	Location of file defining the filter.
				The filter-file is a Perl fragment; at
			     	a minimum, it must define `\$kpa_filter'.
	-d|--db filename	Location of the index file.  Defaults to
				your normal KPhotoAlbum database file.
				If you use EXIF filtering, the EXIF data
				is extracted from exif-info.db.
	-l|--list attribute     Rather than listing matching files,
			        this lists all values of the specified
			        attribute.  If you want to use
			        a category such as keywords for this
			        specify ``cat:Keywords'' as appropriate.
	-c|--count		In combination with --list, prints the
			        number of matching images for each
			        value (histogram).
	--exif			Extract EXIF data in addition to index
				file information.  Enabled if the filter
				expression appears to require it.  Normally
			        not needed.  You can specify --no-exif if
				you know you don't need EXIF data; this
				improves performance.
	--exif-vars=var,...	List of EXIF variables to extract.
				See above for available variables.
				Restricting the extracted variables may
			        improve performance.
	--dry-run		Print a dry run of the filtering loop,
				for debugging.
	-h|--help		Print this message.
FINIS
    print STDERR $usage;
    exit(1);
}

################################################################
################################################################
# Load files ###################################################
################################################################
################################################################


################################################################
# Utilities
################################################################

sub isNode($$) {
    my ($node, $name) = @_;
    return ($node->nodeType() == 1 && lc $node->nodeName() eq $name);
}

sub computeClosure($\@) {
    my ($category, $members) = @_;
    my (%answer);
    foreach my $value (@$members) {
	map { $answer{$_} = 1;} keys %{$group_closure{$category}{$value}};
    }
    return sort keys %answer;
}

################################################################
# Categories
################################################################

sub loadCategories($$) {
    my ($node, $compressed) = @_;
    foreach my $child($node->childNodes()) {
	next if !isNode($child, "category");
	my ($category) = $child->getAttribute("name");
	$categories{$category} = {};
	$category_map{$category} = [];
	$member_groups{$category} = {};
	$group_closure{$category} = {};
	my (@members);
	foreach my $grandchild ($child->childNodes()) {
	    next if !isNode($grandchild, "value");
	    my ($value) = $grandchild->getAttribute("value");
	    # This works for both compact and original file format
	    $categories{$category}{$value} = 1;
	    $category_map{$category}[$grandchild->getAttribute("id")] = $value;
	    $group_closure{$category}{$value} = {};
	    $group_closure{$category}{$value}{$value} = 1;
	}
    }
    @category_names = sort keys %category_map;
}

################################################################
# Images
################################################################

# Image options and values for uncompressed files.

sub loadUncompressedOptions(\%) {
    my ($image) = @_;
    my (%options);
    my ($node) = $$image{"__node"};
    foreach my $child ($node->childNodes()) {
	next if !isNode($child, "options");
	my (@members);
	foreach my $grandchild ($child->childNodes()) {
	    next if !isNode($grandchild, "option");
	    my ($category) = $grandchild->getAttribute("name");
	    foreach my $greatgrandchild ($grandchild->childNodes()) {
		next if !isNode($greatgrandchild, "value");
		my ($val) = $greatgrandchild->getAttribute("value");
		push @members, $val;
	    }
	    map { $options{$category}{$_} = 1; } computeClosure($category, @members);
	}
    }
    return \%options;
}

# Compressed XML files are simpler to parse; there's simply an attribute
# for each category

sub loadCompressedOptions(\%) {
    my ($image) = @_;
    my (%options);
    foreach my $category (@category_names) {
	my ($members) = $$image{$category};
	my (@members);
	if (defined $members && $members ne '') {
	    my ($map) = $category_map{$category};
	    @members = map {$$map[$_]} split(/,/, $members);
	    $options{$category} = {};
	    map { $options{$category}{$_} = 1; } computeClosure($category, @members);
	}
    }
    return \%options;
}

sub loadOptions() {
    if (! defined $current_image{"options"}) {
	if ($current_image{"__compressed"}) {
	    $current_image{"options"} = loadCompressedOptions(%current_image);
	} else {
	    $current_image{"options"} = return loadUncompressedOptions(%current_image);
	}
    }
}

sub hasAttribute($;$) {
    my ($category, $value) = @_;
    loadOptions();
    if (! defined $category_map{$category}) {
	die "hasAttribute: no such category $category\n";
    }
    if (defined $value) {
	return defined $current_image{"options"}{$category}{$value};
    } else {
	return defined $current_image{"options"}{$category};
    }
}

sub hasKeyword(;$) {
    my ($value) = @_;
    return hasAttribute("Keywords", $value);
}

sub hasPerson(;$) {
    my ($value) = @_;
    return hasAttribute("People", $value);
}

sub hasPlace(;$) {
    my ($value) = @_;
    return hasAttribute("Places", $value);
}

sub hasToken(;$) {
    my ($value) = @_;
    return hasAttribute("Token", $value);
}

sub matchesAttribute($$) {
    my ($category, $value) = @_;
    loadOptions();
    return grep(/$value/, keys %{$current_image{"options"}{$category}}) > 0;
}

sub matchesKeyword($) {
    my ($value) = @_;
    return matchesAttribute("Keywords", $value);
}

sub matchesPerson($) {
    my ($value) = @_;
    return matchesAttribute("People", $value);
}

sub matchesPlace($) {
    my ($value) = @_;
    return matchesAttribute("Places", $value);
}

sub matchesToken($) {
    my ($value) = @_;
    return matchesAttribute("Token", $value);
}

sub makeVarcode($) {
    my ($identifier) = @_;
    my ($varname) = $identifier;
    $varname =~ s/ //g;
    return "        my (\$$varname) = \$current_image{\"$identifier\"};";
}

sub loadImages($$) {
    my ($node, $compressed) = @_;
    my ($varcode) = join("\n", map {makeVarcode($_)} @standard_vars);
    if (! defined $kpa_filter) {
	$kpa_filter = '1';
    }
    my ($code) = << 'EOF';
{
    no warnings "uninitialized";
    no warnings "numeric";
    my %items_found;
    my $matched_count = 0;
    foreach my $child ($node->childNodes()) {
	next if !isNode($child, "image");
	%current_image=();
	$current_image{"__node"} = $child;
	$current_image{"__compressed"} = $compressed;
	map { $current_image{$_->nodeName} = $_->value } $child->attributes();
EOF
    $code .= "$varcode\n";
    $code .= << 'EOF';
	# Restore any attributes defaulted in version 8
	$current_image{"angle"} = 0 if (! defined $current_image{"angle"});
	$current_image{"endDate"} = $current_image{"startDate"} if (! defined $current_image{"endDate"});
	if (! defined $current_image{"label"}) {
	    my ($label) = $file;
	    $label =~ s,^.*/(.*)\.[^.]*$,$1,;
	    $current_image{"label"} = $label;
	}
EOF
    $code .= << 'EOF';
	my ($mediaType) = defined $videoLength ? "Video" : "Image";
EOF
    if ($opt_use_exif) {
	my ($exifdecl) = join("\n        ", map {"my (\$$_);"} @exif_vars);
	my ($exifcode) = join("\n	    ", map {"\$$exif_vars[$_] = \$\$row[$_];"} (0..$#exif_vars));
	$code .= << "EOF";
	$exifdecl;
	if (my \$row = \$exif_data{"\$rootdir\$file"}) {
	    $exifcode
	}
EOF
    }
    $code .= << "EOF";
        if ($kpa_filter) {
	    \$matched_count++;
EOF
    if ($opt_list_type) {
	if ($opt_list_type =~ /[Cc]at(egory)?:(.*)/) {
	    $code .= << "EOF"
	    loadOptions();
	    map { \$items_found{\$_}++; } keys \%{\$current_image{"options"}{"$2"}};
EOF
	} else {
	    $code .= "            \$items_found{\$$opt_list_type}++;\n";
	}
	$code .= << 'EOF';
	}
    }
EOF
	if ($opt_print_count) {
	    $code .= << 'EOF'
    print join("\n", map {sprintf("%7d %s", $items_found{$_}, $_);} sort keys %items_found), "\n";
    printf("%7d Total\n", $matched_count);
EOF
	} else {
	    $code .= << 'EOF'
    print join("\n", sort keys %items_found), "\n";
EOF
	}
    } else {
	$code .= << 'EOF';
            print "$file\n";
        }
    }
EOF
    }
    $code .= "}\n";
    if ($opt_dryrun) {
	print STDERR $code;
	exit;
    }
    eval $code;
    if ($@) {
	my $known_vars = join("\n", sort map {my $a = $_; $a =~ s/ //g; "    $a"} @all_vars);
	die "Filter $kpa_filter failed:\n\n$@\n";

    }
}

################################################################
# Member groups
################################################################

sub loadMemberGroups($$) {
    my ($node, $compressed) = @_;
    foreach my $child ($node->childNodes()) {
	next if !isNode($child, "member");
	my ($category) = $child->getAttribute("category");
	my ($groupname) = $child->getAttribute("group-name");
	if ($compressed) {
	    my ($members) = $child->getAttribute("members");
	    if ($members) {
		my ($map) = $category_map{$category};
		my (@members) = grep { ! $_ == 0 } split(/,/, $members);
		map {
		    if (defined $$map[$_]) {
			$member_groups{$category}{$$map[$_]} = $groupname;
		    } else {
			warn "Unknown keyword ID $_ in group $groupname\n";
		    }
		} @members;
	    }
	} else {
	    my ($member) = $child->getAttribute("member");
	    $member_groups{$category}{$member} = $groupname;
	}
    }
    foreach my $category (sort keys %member_groups) {
	foreach my $member (keys %{$member_groups{$category}}) {
	    my ($parent) = $member_groups{$category}{$member};
	    # Break up any circular member groups
	    my (%seen_parents);
	    my ($parentid) = 1;
	    do {
		if (defined $seen_parents{$parent}) {
		    my (%reverse_parents) = reverse %seen_parents;
		    warn "Circular member group found, members: " . join(" <= ", map { $reverse_parents{$_}} sort keys %reverse_parents) . "\n";
		    last;
		}
		$group_closure{$category}{$member}{$parent} = 1;
		$seen_parents{$parent} = $parentid++;
	    } while (defined ($parent = $member_groups{$category}{$parent}));
	}
    }
}

################################################################
# Top level file loader
################################################################

sub load_file($) {
    my ($file) = @_;
    my ($images);
    if ($opt_use_exif && @opt_exif_vars) {
	my (%exif_keys);
	my (%known_exif_keys);
	map {$known_exif_keys{$_} = 1;} @exif_vars;
	foreach my $exif (@opt_exif_vars) {
	    map { $exif_keys{$_} = 1; } grep {defined $known_exif_keys{$_}}split(/[ ,]+/, $exif);
	}
	delete $exif_keys{"filename"};
	@exif_vars = ("filename", keys %exif_keys);
    }

    if ($opt_dryrun) {
	loadImages($images, 1);
	exit;
    }

    my $doc = XML::LibXML->load_xml(location => $file);
    if (! $doc) {
	die "Can't open $file as a KPhotoAlbum database.\n";
    }

    my $kpa = ${$doc->findnodes('KPhotoAlbum')}[0];
    if (! $kpa) {
	die "$file is not a KPhotoAlbum database.\n";
    } elsif ($kpa->getAttribute("version") != 7 &&
	$kpa->getAttribute("version") != 8) {
	die "kpa-list-images only works with version 7 and 8 files.\n";
    }

    $rootdir = $file;
    $rootdir =~ s,[^/]*$,,;
    if ($opt_use_exif) {
	my ($querystr) = 'SELECT ' . join(', ', "filename", @exif_vars) . " FROM exif";

	my $exif_db .= "${rootdir}exif-info.db";

	if (! -f $exif_db) {
	    die "Expected EXIF database at $exif_db, but can't find it.\n"
	}
	my $EXIF_DB = DBI->connect("dbi:SQLite:$exif_db", undef, undef, {
	    sqlite_open_flags => SQLITE_OPEN_READONLY,
				   });
	if (! defined $EXIF_DB) {
	    die "Can't open EXIF database $exif_db.\n"
	}
	my $exif_query = $EXIF_DB->prepare($querystr);
	$exif_query->execute();
	# This is measured to be considerably (about 15% total,
	# considerably more for EXIF database alone) than individual
	# lookups on a prepared query.
	while (my @row = $exif_query->fetchrow_array) {
	    my ($filename) = shift @row;
	    $exif_data{$filename} = \@row;
	}
    }
    my ($compressed) = int $kpa->getAttribute("compressed");

    foreach my $topcn ($kpa->childNodes()) {
	if (isNode($topcn, "categories")) {
	    loadCategories($topcn, $compressed);
	    if (! $opt_print_count && $opt_list_type && $opt_list_type =~ /^cat(egory)?:(.*)/ && !defined $kpa_filter) {
		$opt_list_type = $2;
		if (! defined $category_map{$opt_list_type}) {
		    die "Can't list category $opt_list_type: no such category\n";
		}
		my $stuff = join("\n", sort grep(defined $_ && $_ ne '', @{$category_map{$opt_list_type}}));
		if ($stuff) {
		    print "$stuff\n";
		}
		exit;
	    }
	} elsif (isNode($topcn, "images")) {
	    $images = $topcn;
	} elsif (isNode($topcn, "member-groups")) {
	    loadMemberGroups($topcn, $compressed);
	} elsif (isNode($topcn, "blocklist")) {
	} elsif ($topcn->nodeType() == 1) {
	    warn "Found unknown node " . $topcn->nodeName() . "\n";
	}
    }
    # Load images last so that we can stream them through.
    loadImages($images, $compressed);
}

sub get_standard_kpa_index_file() {
    my ($kpa_config) = $ENV{"HOME"} . "/.config/kphotoalbumrc";
    open KPACONFIG, "<", "$kpa_config" or return "";
    my ($imageDBFile) = "";
    while (<KPACONFIG>) {
	if (/^imageDBFile=(.*)$/) {
	    $imageDBFile = $1;
	    last;
	}
    }
    close KPACONFIG;
    return $imageDBFile;
}

my ($do_help);

my (%options) = ("f=s"           => \$opt_filter_file,
		 "filter-file=s" => \$opt_filter_file,
		 "d=s"           => \$opt_index_file,
		 "db=s"          => \$opt_index_file,
		 "l=s"           => \$opt_list_type,
		 "list=s"        => \$opt_list_type,
		 "list-values=s" => \$opt_list_type,
		 "exif!"         => \$opt_use_exif,
		 "exif-vars=s"   => \@opt_exif_vars,
		 "c"             => \$opt_print_count,
		 "count!"        => \$opt_print_count,
		 "dryrun!"       => \$opt_dryrun,
		 "dry-run!"      => \$opt_dryrun,
		 "h"             => \$do_help,
		 "help"          => \$do_help,
    );

Getopt::Long::Configure("bundling", "require_order");
if (!Getopt::Long::GetOptions(%options) || $do_help) {
    usage();
}

if ($opt_filter_file) {
    if (!($opt_filter_file =~ m,/,)) {
	$opt_filter_file = "./$opt_filter_file";
    }
    my $retval = do $opt_filter_file;
    if ($@) {
	die "Cannot process filter file $opt_filter_file: $@\n";
    } elsif (! defined $retval) {
	die "Cannot read filter file $opt_filter_file: $!\n";
    } elsif (! defined $kpa_filter) {
	die "Filter file $opt_filter_file does not define \$kpa_filter.\n"
    }
} elsif ($#ARGV >= 0) {
    $kpa_filter = $ARGV[0];
}
if (! defined $opt_use_exif) {
    if ((defined $kpa_filter && $kpa_filter =~ /Exif_/) ||
	(defined $opt_list_type && $opt_list_type =~ /Exif_/)) {
	$opt_use_exif = 1;
    } else {
	$opt_use_exif = 0;
    }
}
my ($index_file);
if ($opt_index_file) {
    $index_file = $opt_index_file;
} else {
    $index_file = get_standard_kpa_index_file();
}
load_file($index_file);
