#! /usr/bin/perl -CS use warnings; use strict; use utf8; use Text::CSV_XS 'csv'; use Symbol; sub usage { print <<\EoF; Example usage: :; echo 'ContrΡ–Ζ„Υ½teπ„ž' | od-unicode ==== C o n t r Ρ– Ζ„ Υ½ 43 6f 6e 74 72 0456 0184 057d ==== t e π„ž ␊ 74 65 01d11e 0a EoF } my $junk = <<\EoF; 0456 : Cyrillic Small Letter Byelorussian-Ukrainian I 0184 : Latin Capital Letter Tone Six 057D : Armenian Small Letter Seh 01D11E : MUSICAL SYMBOL G CLEF EoF sub read_data { my $replace; my $csv = Text::CSV_XS->new; my $aoa = $csv->getline_all('DATA'); ####my $NR = 0+$aoa->@*; my %replace = (); rower: for my $row ($aoa->@*) { my $code = $row->[1]; next rower if $code eq ''; $replace->{$code} = $row->[6]; } return $replace; } main: { while (@ARGV) { my $arg = shift @ARGV; if ($arg eq '-h') { usage(); exit; } else { die "Unrecognized arg '$arg'\n"; } } my $replace = read_data; my $horiz = 0; liner: for (;;) { my $line; my $nn = read(STDIN, $line, 8); last liner if ! $nn; my $l1; my $l2; for my $char (split ('', $line)) { my $ord = ord($char); my $xchar = $char; if (exists $replace->{$ord}) { $xchar = $replace->{$ord}; } $l1 .= sprintf "%4s ", $xchar; if ($ord <= 255) { $l2 .= sprintf " %02x ", $ord; } elsif ($ord <= 65535) { $l2 .= sprintf " %04x ", $ord; } else { $l2 .= sprintf "%06x ", $ord; } } print "==== ", $l1, "\n"; print " ", $l2, "\n"; } } __DATA__ 0,0,0,NUL,,"U+ 2400",␀,"Symbol For Null" 1,1,1,SOH,,"U+ 2401",␁,"Symbol For Start Of Heading" 2,2,2,STX,,"U+ 2402",␂,"Symbol For Start Of Text" 3,3,3,ETX,,"U+ 2403",␃,"Symbol For End Of Text" 4,4,4,EOT,,"U+ 2404",␄,"Symbol For End Of Transmission" 5,5,5,ENQ,,"U+ 2405",␅,"Symbol For Enquiry" 6,6,6,ACK,,"U+ 2406",␆,"Symbol For Acknowledge" 7,7,7,BEL,,"U+ 2407",␇,"Symbol For Bell" 10,8,8,BS,,"U+ 2408",␈,"Symbol For Backspace" 11,9,9,HT,,"U+ 2409",␉,"Symbol For Horizontal Tabulation" 12,10,0A,LF,,"U+ 240A",␊,"Symbol For Line Feed" 13,11,0B,VT,,"U+ 240B",␋,"Symbol For Vertical Tabulation" 14,12,0C,FF,,"U+ 240C",␌,"Symbol For Form Feed" 15,13,0D,CR,,"U+ 240D",␍,"Symbol For Carriage Return" 16,14,0E,SO,,"U+ 240E",␎,"Symbol For Shift Out" 17,15,0F,SI,,"U+ 240F",␏,"Symbol For Shift In" 20,16,10,DLE,,"U+ 2410",␐,"Symbol For Data Link Escape" 21,17,11,DC1,,"U+ 2411",␑,"Symbol For Device Control One" 22,18,12,DC2,,"U+ 2412",␒,"Symbol For Device Control Two" 23,19,13,DC3,,"U+ 2413",␓,"Symbol For Device Control Three" 24,20,14,DC4,,"U+ 2414",␔,"Symbol For Device Control Four" 25,21,15,NAK,,"U+ 2415",␕,"Symbol For Negative Acknowledge" 26,22,16,SYN,,"U+ 2416",␖,"Symbol For Synchronous Idle" 27,23,17,ETB,,"U+ 2417",␗,"Symbol For End Of Transmission Block" 30,24,18,CAN,,"U+ 2418",␘,"Symbol For Cancel" 31,25,19,EM,,"U+ 2419",␙,"Symbol For End Of Medium" 32,26,1A,SUB,,"U+ 241A",␚,"Symbol For Substitute" 33,27,1B,ESC,,"U+ 241B",␛,"Symbol For Escape" 34,28,1C,FS,,"U+ 241C",␜,"Symbol For File Separator" 35,29,1D,GS,,"U+ 241D",␝,"Symbol For Group Separator" 36,30,1E,RS,,"U+ 241E",␞,"Symbol For Record Separator" 37,31,1F,US,,"U+ 241F",␟,"Symbol For Unit Separator" ,,,,,"U+ 2420",␠,"Symbol For Space" ,,,,,"U+ 2421",␑,"Symbol For Delete" ,,,,,"U+ 2422",␒,"Blank Symbol" 240,160,A0,,,"U+ 2423",␣,"Open Box" ,,,,,"U+ 2424",␀,"Symbol For Newline" ,,,,,"U+ 2425",β₯,"Symbol For Delete Form Two" ,,,,,"U+ 2426",␦,"Symbol For Substitute Form Two"