package base26;

sub usage {
  print <<\EoF;
Convert base26 numerals to/from plain numbers
Examples:
 "A"   <==> 1
 "Z"   <==> 26
 "AA"  <==> 27
 "XFD" <==> 16384

Note that base26 numerals are widely used as the column-name
aka column-letter in spreadsheets such as gnumeric and excel.

Technical note:  Base26 has the weird property that the
digits represent 1 through 26 (rather than the customary
0 through 25).

To ensure openness, quality, and maintainability,
self-test methods are provided. Test with:
:; perl -Mbase26 -e 'base26::usage()'
:; perl -Mbase26 -e 'test26(1)'         # main test suite
:; perl -Mbase26 -e "check26(0xffffFFFFffffFFFF, 'GKGWBYLWRXTLPO')"
:; perl -Mbase26 -e "check26(0xffffFFFFffffFFFF+1, 'GKGWBYLWRXTLPP')"
      # bigint+1, should overflow
EoF
}

use strict;
use Exporter;
use vars qw(@ISA @EXPORT);
use POSIX;
@ISA = ('Exporter');
@EXPORT = ('decode26', 'encode26', 'test26', 'check26');

###non-portable
### my $bigint =  0xffffFFFFffffFFFF;
### my $halfint = 0x7fffFFFFffffFFFF;

my $bigint =  0xffffFFFF;
my $halfint = 0x7fffFFFF;

# perl can sometimes represent 64-bit unsigned ints,
# but we have to be real careful lest it coerce them to floats
my $ascii_A = ord('A');

# converts 1 to "A"
# converts 26 to "Z"
# converts 27 to "AA"
# converts 16384 to "XFD"
sub encode26 {
  my $arg = shift;
  die "encode26: out of range: '$arg'\n" if $arg <= 0;
  return encode26raw($arg);
}

my $mask = (1<<32) - 1;

# long division is a thing.
# handles 64-bit unsigned integers in perl
sub uint_div {
  use integer;
  my ($top, $bot) = @_;
  my $lefttop = ($top >> 32) & $mask;
  my $leftq = $lefttop / $bot;
  my $leftrem = $lefttop - $leftq * $bot;
  my $righttop = $top & $mask;
  $righttop += $leftrem<<32;
  my $rightq = $righttop / $bot;
  return ($leftq << 32) + $rightq;
}

# Note that the place value of each place is a power of 26, which is
# normal, whereas the digit in each place ranges from 1 to 26, which
# is weird. You to multiply a number by 26, you cannot just shift it
# to the left and append a 0, because there is no representation for
# 0. Instead you have to subtract 1, then shift left, then append Z.
sub encode26raw {
  my $arg = shift;
  $arg--;
  my $rest;
  my $lowbits;
  {
    use integer;
    $rest = uint_div($arg, 26);
    ### printf "++++ arg: %20d  rest: %d\n:", $arg, $rest;
    $lowbits = $arg - $rest * 26;
  }
  my $lowchar = chr($lowbits + $ascii_A);
  if ($rest) {
    return encode26raw($rest) . $lowchar;
  }
  return $lowchar;
}

# converts "A" to 1
# converts "Z" to 26
# converts "AA" to 27
# converts "XFD" to 16384
sub decode26 {
  my ($code) = @_;
  $code =~ tr/a-z/A-Z/;
  my @bytes = unpack("C*", $code);
  my $sum = 0;
  for my $byte (@bytes) {
    my $off = $byte - $ascii_A;
    die "decode26: character '$byte' out of range\n"
      if $off < 0 || $off > 25;
# Delta needed for range checking.
# Multiply by 25 here, because it will be *added* to the sum, later.
    my $delta = 25*$sum + $off + 1;
    my $oldsum = $sum;
    $sum += $delta;
    if ($sum - $delta != $oldsum) {
      use integer;
# do it this way, since quite possibly this die
# (and the whole message) will be caught and dismissed.
      my $msg;
      $msg .=  "decode26: overflow from '$code'\n";
      $msg .= sprintf  "...bigint: %20u\n", $bigint;
      $msg .= sprintf  "...old:    %20u\n", $oldsum;
      $msg .= sprintf  "...delta:  %20u\n", $delta;
      $msg .= sprintf  "...sum:    %20u\n", $sum;
      $msg .= sprintf  "...s-d:    %20u\n", $sum - $delta;
      die $msg;
    }
  }
  return $sum;
}

######################################################################
# As they said in third grade, Check Your Work.

sub check26_sub {
  my ($num, $code) = @_;
  my $encode = encode26($num);
  my $CODE = $code;
  $CODE =~ tr/a-z/A-Z/;
  if ($encode ne $CODE) {
    die "encode26($num) returns '$encode' instead of '$CODE'\n";
  }
  my $decode = decode26($code);
  if ($decode ne $num) {
    die "decode26($code) returns '$decode' instead of '$num'\n";
  }
}

sub check26 {
  my ($num, $code, $msg) = @_;
  if ($msg) {
    eval {
      check26_sub($num, $code);
    };
    if ($@) {
      # should fail, and did
#####      print STDERR "caught the following:\n$@\n";
      return;
    }
    print  STDERR "check26($num, $code) should have failed but didn't",
         " ($msg)...\n";
    printf STDERR "...%u -> %s\n", $num, encode26($num);
    printf STDERR "...%s -> %u\n", $code, decode26($code);
    printf STDERR "...bigint: %u, log(): %f\n",
                 $bigint, POSIX::log2($bigint);
    die "overflow\n";
  }
  check26_sub($num, $code);
}

sub dumpit {
  my $arg = shift;
  printf "%22u encodes as %s\n", $arg, encode26($arg);
}

# dies on any error
# If verbose flag, prints "All OK";
# otherwise silent on success.
sub test26 {
   my $verbose = shift;
   my $A = 1;
   my $B = 2;
   my $C = 3;
   my $D = 4;
   my $Z = 26;
   my $base = 26;
   check26($A, 'A');
   check26($Z, 'Z');
   check26($Z+1, 'AA');                        ## carry
   check26($Z*$base+$Z, 'ZZ');
   check26($Z*$base+$Z+1, 'AAA');              ## carry
   check26(($A*$base+$A)*$base+$A, 'AAA');
   check26(($B*$base+$C)*$base+$D, 'BCD');
   check26(($Z*$base+$Z)*$base+$Z, 'zZz');
   check26(($Z*$base+$Z)*$base+$Z+1, 'AAAA');  ## carry
   check26(7258, 'jsd');
   check26($base ** 5, 'yyyyZ');         # place-value shift rule
   check26(2147483647, 'FXSHRXW');             ## 31 bits

   check26($halfint-1, 'CRPXNLSKVLJFHF');
   check26($halfint+0, 'CRPXNLSKVLJFHG');
   check26($halfint+1, 'CRPXNLSKVLJFHH');
   check26($halfint+2, 'CRPXNLSKVLJFHI');

   check26($bigint-1, 'GKGWBYLWRXTLPN');
   check26($bigint+0, 'GKGWBYLWRXTLPO');
   check26($bigint+1, 'GKGWBYLWRXTLPP', 'overflow');

   print "All OK\n" if $verbose;
   return 0;
}

sub useless {
   dumpit($halfint+0);
   dumpit($halfint+1);
   dumpit($halfint+2);
###non-portable
##   dumpit(0x7fffFFFFffffFFFF);
##   dumpit(0x8000000000000000);

   dumpit($bigint-2);
   dumpit($bigint-1);
   dumpit($bigint-0);
   dumpit($bigint+1);   ## overflow
   dumpit($bigint+2);   ## overflow
   dumpit($bigint+11000);   ## overflow
}

1;