#! /usr/bin/perl -w

use strict;
use Symbol;
use Digest::MD5;

my $STDOUT = Symbol::gensym;    # to avoid 'bareword' warnings
my $STDERR = Symbol::gensym;

sub usage {
  my ($status) = @_;
  my $msg = <<EoF;
Usage:
  $0 -i sumfile.txt plainfile [...] > outsum.txt

The sumfile should contain lines of the format produced by the md5sum
command, i.e. a checksum (32 hex characters), followed by two spaces,
followed by the filename.  Our output is in the same format.

Arguments are procesed in order, left-to-right, so that later
items supersede earlier items.

Option "-i ... " may be specified as many times as desired, from zero
on up.  We take each checksum given in the file at face value
(unless/until it is superseded by a later argument).  We do not
recompute the sum; indeed we don't care whether the given file even
exists.

Plainfiles may be specified as many times as desired.  We compute
the md5sum of the contents of the file.

Whenever (via -i ... or via a plainfile) we learn a sum for a
particular filename, the newly-learned sum replaces the old sum
in our database.

  BEWARE: When deciding to supersede an old sum with a new sum, the
  filename matching is brutally literal.  In particular "foo.bar" will
  not match "./foo.bar", so you need to be consistent in your naming,
  or ugly, confusing output will result.  We try to produce warnings
  about the most common examples of this problem, but mostly you are
  on your own.
EoF

  $status = $status || 0;
  my $ouch = $status ? $STDERR : $STDOUT;
  print $ouch $msg;
  exit $status;
}

my %conflict = ();

sub check_con {
  my ($arg) = @_;
  my $basename = $arg;
  $basename =~ s'^[.]/'';
#xx  print STDERR ":: $basename\n";

  if (exists $conflict{$basename}) {
    my $con = $conflict{$basename};
    if ($con ne ' - ' && $con ne $arg) {
      print STDERR "Warning: '$arg' conflicts with '$con'\n";
      $conflict{$basename} = ' - ';         # only warn once
    }
    # else it's a match, or a warning was given previously
  } else {
      $conflict{$basename} = $arg;
  }
}

main: {
  open ($STDOUT, '>&', STDOUT);         # avoid 'bareword' warnings
  open ($STDERR, '>&', STDERR);

  my $md5 = Digest::MD5->new;

  my $inch = Symbol::gensym;

  my $oldfile = 'md5sum.txt';
  my %db = ();

  while (@ARGV) {
    my $arg = shift @ARGV;
    if ($arg =~ m'^--?h') {
      usage;
      exit 0;
    } elsif ($arg =~ m'^-i') {
      $oldfile = shift @ARGV;
      (defined $oldfile) || die "Option -i requires an argument.\n";
      open ($inch, '<', $oldfile)
        || die "Cannot read old database '$oldfile'\n";

      while (my $line = <$inch>) {
        chomp $line;
        my @stuff = split(' ', $line, 2);
        my $md5 = $stuff[0];
        my $fn = $stuff[1];
        check_con($fn);
        $db{$fn} = $md5;
      }
      close $inch;
    } elsif ($arg =~ m'^-') {
      print $STDERR "Unrecognized option '$arg'\n";
      usage(1);
    } else {
      open ($inch, '<', $arg)
        || die "Cannot read message file '$arg'\n";
      $md5->addfile($inch);
      check_con($arg);
      $db{$arg} = $md5->hexdigest;
      close $inch;
    }
  }
  
# output the results
  for my $file (sort keys %db) {
    ## Note there are _two_ spaces in the following:
    print $STDOUT "$db{$file}  $file\n";
    ## Note        ..........^^.......
  }
}
