package base26; sub usage { print <<\EoF; Convert base26 numerals to/from plain numbers Examples: "A" <==> 1 "Z" <==> 26 "AA" <==> 27 "XFD" <==> 16384 Note that base26 numerals are widely used as the column-name aka column-letter in spreadsheets such as gnumeric and excel. Technical note: Base26 has the weird property that the digits represent 1 through 26 (rather than the customary 0 through 25). To ensure openness, quality, and maintainability, self-test methods are provided. Test with: :; perl -Mbase26 -e 'base26::usage()' :; perl -Mbase26 -e 'test26(1)' # main test suite :; perl -Mbase26 -e "check26(0xffffFFFFffffFFFF, 'GKGWBYLWRXTLPO')" :; perl -Mbase26 -e "check26(0xffffFFFFffffFFFF+1, 'GKGWBYLWRXTLPP')" # bigint+1, should overflow EoF } use strict; use Exporter; use vars qw(@ISA @EXPORT); use POSIX; @ISA = ('Exporter'); @EXPORT = ('decode26', 'encode26', 'test26', 'check26'); ###non-portable ### my $bigint = 0xffffFFFFffffFFFF; ### my $halfint = 0x7fffFFFFffffFFFF; my $bigint = 0xffffFFFF; my $halfint = 0x7fffFFFF; # perl can sometimes represent 64-bit unsigned ints, # but we have to be real careful lest it coerce them to floats my $ascii_A = ord('A'); # converts 1 to "A" # converts 26 to "Z" # converts 27 to "AA" # converts 16384 to "XFD" sub encode26 { my $arg = shift; die "encode26: out of range: '$arg'\n" if $arg <= 0; return encode26raw($arg); } my $mask = (1<<32) - 1; # long division is a thing. # handles 64-bit unsigned integers in perl sub uint_div { use integer; my ($top, $bot) = @_; my $lefttop = ($top >> 32) & $mask; my $leftq = $lefttop / $bot; my $leftrem = $lefttop - $leftq * $bot; my $righttop = $top & $mask; $righttop += $leftrem<<32; my $rightq = $righttop / $bot; return ($leftq << 32) + $rightq; } # Note that the place value of each place is a power of 26, which is # normal, whereas the digit in each place ranges from 1 to 26, which # is weird. You to multiply a number by 26, you cannot just shift it # to the left and append a 0, because there is no representation for # 0. Instead you have to subtract 1, then shift left, then append Z. sub encode26raw { my $arg = shift; $arg--; my $rest; my $lowbits; { use integer; $rest = uint_div($arg, 26); ### printf "++++ arg: %20d rest: %d\n:", $arg, $rest; $lowbits = $arg - $rest * 26; } my $lowchar = chr($lowbits + $ascii_A); if ($rest) { return encode26raw($rest) . $lowchar; } return $lowchar; } # converts "A" to 1 # converts "Z" to 26 # converts "AA" to 27 # converts "XFD" to 16384 sub decode26 { my ($code) = @_; $code =~ tr/a-z/A-Z/; my @bytes = unpack("C*", $code); my $sum = 0; for my $byte (@bytes) { my $off = $byte - $ascii_A; die "decode26: character '$byte' out of range\n" if $off < 0 || $off > 25; # Delta needed for range checking. # Multiply by 25 here, because it will be *added* to the sum, later. my $delta = 25*$sum + $off + 1; my $oldsum = $sum; $sum += $delta; if ($sum - $delta != $oldsum) { use integer; # do it this way, since quite possibly this die # (and the whole message) will be caught and dismissed. my $msg; $msg .= "decode26: overflow from '$code'\n"; $msg .= sprintf "...bigint: %20u\n", $bigint; $msg .= sprintf "...old: %20u\n", $oldsum; $msg .= sprintf "...delta: %20u\n", $delta; $msg .= sprintf "...sum: %20u\n", $sum; $msg .= sprintf "...s-d: %20u\n", $sum - $delta; die $msg; } } return $sum; } ###################################################################### # As they said in third grade, Check Your Work. sub check26_sub { my ($num, $code) = @_; my $encode = encode26($num); my $CODE = $code; $CODE =~ tr/a-z/A-Z/; if ($encode ne $CODE) { die "encode26($num) returns '$encode' instead of '$CODE'\n"; } my $decode = decode26($code); if ($decode ne $num) { die "decode26($code) returns '$decode' instead of '$num'\n"; } } sub check26 { my ($num, $code, $msg) = @_; if ($msg) { eval { check26_sub($num, $code); }; if ($@) { # should fail, and did ##### print STDERR "caught the following:\n$@\n"; return; } print STDERR "check26($num, $code) should have failed but didn't", " ($msg)...\n"; printf STDERR "...%u -> %s\n", $num, encode26($num); printf STDERR "...%s -> %u\n", $code, decode26($code); printf STDERR "...bigint: %u, log(): %f\n", $bigint, POSIX::log2($bigint); die "overflow\n"; } check26_sub($num, $code); } sub dumpit { my $arg = shift; printf "%22u encodes as %s\n", $arg, encode26($arg); } # dies on any error # If verbose flag, prints "All OK"; # otherwise silent on success. sub test26 { my $verbose = shift; my $A = 1; my $B = 2; my $C = 3; my $D = 4; my $Z = 26; my $base = 26; check26($A, 'A'); check26($Z, 'Z'); check26($Z+1, 'AA'); ## carry check26($Z*$base+$Z, 'ZZ'); check26($Z*$base+$Z+1, 'AAA'); ## carry check26(($A*$base+$A)*$base+$A, 'AAA'); check26(($B*$base+$C)*$base+$D, 'BCD'); check26(($Z*$base+$Z)*$base+$Z, 'zZz'); check26(($Z*$base+$Z)*$base+$Z+1, 'AAAA'); ## carry check26(7258, 'jsd'); check26($base ** 5, 'yyyyZ'); # place-value shift rule check26(2147483647, 'FXSHRXW'); ## 31 bits check26($halfint-1, 'CRPXNLSKVLJFHF'); check26($halfint+0, 'CRPXNLSKVLJFHG'); check26($halfint+1, 'CRPXNLSKVLJFHH'); check26($halfint+2, 'CRPXNLSKVLJFHI'); check26($bigint-1, 'GKGWBYLWRXTLPN'); check26($bigint+0, 'GKGWBYLWRXTLPO'); check26($bigint+1, 'GKGWBYLWRXTLPP', 'overflow'); print "All OK\n" if $verbose; return 0; } sub useless { dumpit($halfint+0); dumpit($halfint+1); dumpit($halfint+2); ###non-portable ## dumpit(0x7fffFFFFffffFFFF); ## dumpit(0x8000000000000000); dumpit($bigint-2); dumpit($bigint-1); dumpit($bigint-0); dumpit($bigint+1); ## overflow dumpit($bigint+2); ## overflow dumpit($bigint+11000); ## overflow } 1;