#!/bin/bash

# version 1.1

set -e

sourcepool="$1"
destpool="$2"
#sourcepool="/srv/archive.ubuntu.com/ubuntu/pool"
#destpool="/srv/pkgsum.ubuntu.com/www/ubuntu/pool"
lock="$destpool/.lock"

if [ ! "$sourcepool" ] || [ ! -d "$sourcepool" ] || [ ! "$destpool" ] || [ ! -d "$destpool" ]; then
  echo "WOOHOOO! gimme arguments or i won't work. kthxbye!"
  exit 1
fi

if [ -e "$lock" ]; then
  echo "Lock detected. Exit."
  exit 0
fi

touch "$lock"

## validate md5 (based on the client variant in libs/)

validate_md5file() {
  if [ "$(tail -n 1 "$1")" != "$(head -n -1 "$1" | md5sum  | awk '{print $1}')" ]; then
    echo "Cannot validate md5 file $file"
    return 1
  fi
}

md5calc() {
  deb="$1"

  # check if the md5 exists and validate it. If the md5 is not valid, we regenerate it.
  # we might hit this situation if for some reasons the script has been aborted while
  # performing some I/O operations.
  if [ -e "$destpool/$deb.md5" ]; then 
    if validate_md5file "$destpool/$deb.md5"; then
      return;
    fi
  fi

  echo "Processing: $deb"

  # create temporary dir and file
  tempdir=$(mktemp -d)
  tempfile=$(mktemp)

  if [ ! -d "$tempdir" ] || [ ! -f "$tempfile" ]; then
    echo "Unable to create temp dir/file"
    rm -rf "$tempdir" "$tempfile"
    exit 1
  fi

  # unpack the deb
  if ! fakeroot dpkg -x "$deb" "$tempdir"; then
    echo "Unable to unpack data"
    rm -rf "$tempdir" "$tempfile"
    exit 1
  fi

  if ! dpkg -e "$deb" "$tempdir/DEBIAN"; then
    echo "Unable to unpack metadata"
    rm -rf "$tempdir" "$tempfile"
    exit 1
  fi

  # control is the only file that does not land in /var/dpkg/info/$pkgname.
  # don't process it at all server side, instead of propagating the special
  # case at client level.
  rm -f "$tempdir/DEBIAN/control"

  cd "$tempdir"

  # make sure we can enter all dirs!
  find * -type d -exec chmod 755 {} \;

  # calculate the deb contents md5 for file
  find * -type f -exec md5sum {} \; | { while read md5 line; do echo -e "md5:\n$md5\n$line"; done } > "$tempfile"
  # store info about symlinks
  find * -type l | { while read line ; do echo -e "sym:\n$line" && readlink "$line"; done } >> "$tempfile"
  cd - 1>/dev/null 2>&1

  # change to real path of DEBIAN files
  pkg="$(basename "$deb" | cut -d "_" -f 1)"
  sed -i -e 's#DEBIAN/#var/lib/dpkg/info/'$pkg'.#g' "$tempfile"

  # calculate the md5 of the final file and add it at the end.
  md5=$(md5sum "$tempfile" | awk '{print $1}')
  echo "$md5" >> "$tempfile"

  # get the position in the pool, create the destination dir and move the md5 file.
  path=$(dirname "$deb")
  mkdir -p "$destpool/$path"
  mv $tempfile "$destpool/$deb.md5"
  chmod 644 "$destpool/$deb.md5"

  # cleanup leftovers
  rm -rf "$tempdir" "$tempfile"
}

# generate the md5sums.
cd "$sourcepool"
find * -name "*.deb" | { while read line ; do md5calc "$line"; done }
cd - 1>/dev/null 2>&1

# clean up old md5 files that don't have a deb in the archive anymore.
cd "$destpool"
find * -name "*.deb.md5" | \
  { while read line; do \
      origname=${line%.md5}; \
      if [ ! -e "$sourcepool/$origname" ]; then echo "Cleaning: $line" && rm "$line" ; fi \
    done }

# generate Packages list. This is extremely useful client side. It avoids to trust even
# more data. We might want to do more here but it can be done later on.
cd "$sourcepool"
find * -name "*.deb" | gzip -9 - > "$destpool/../Packages.gz.new"

cd "$destpool/../"
mv Packages.gz.new Packages.gz
md5sum Packages.gz > Packages.gz.md5

rm -f "$lock"

exit 0
