# -*- mode:perl; fill-column:80 -*- # Compiles a subset of Genera Tag into Grill Tag. # # The programs supported as input are those with modulus 2 and where productions # have length at most 2. use strict; use warnings; use 5.016; use utf8; use feature qw/unicode_strings fc/; use open ':std', ':encoding(UTF-8)'; # First line of input: initial state my $init_state = '#'; $init_state = <> while $init_state =~ /^\s*#/; my $symbol_re = qr/\p{Cased_Letter}/; $init_state =~ /^\s*(?:$symbol_re\s*)*\z/ or die "Could not parse initial state from the first line of input"; $init_state =~ s/\s//g; # The rest of the input contains productions and width definitions. A # production looks like "0a:bc", where the 0 is the modulus, the a is the symbol # produced from, and the bc the string being produced. A width definition looks # like "a@1", where a is the symbol whose width is being defined and 1 the # width. # # Symbols are represented as cased letters (for compatibility with AORS). my %widths; my %productions; while (<>) { /^\s*\#/ and next; if (s/^\s*([01])($symbol_re):((?:$symbol_re|\$)*)(?=\s|\z)//) { defined $productions{$2}{$1} and die "Duplicate definition for $1$2"; $productions{$2}{$1} = $3; redo; } if (s/^\s*($symbol_re)@(\d+)(?=\s|\z)//) { defined $widths{$1} and die "Duplicate width for $1"; $widths{$1} = $2; redo; } unless ($_ =~ /^\s*\z/) { /^\s*(\S*+)/; die "Could not parse input: unrecognised chunk \"$1\"" } } # The symbol/symbol-index map. # There is one additional entry, '-' mapping to 0, used as filler when a # production has length less than 2. my %symbol_index = ('-' => 0); my @index_symbol = ('-'); $widths{'-'} = 0; $productions{'-'}{0} = '--'; $productions{'-'}{1} = '--'; # Number the symbols in "alphabetical order" (unclear due to the use of # Unicode, but letters that are the same apart from case are sorted # together, with a lowercase ASCII letter before it uppercase version). for my $symbol (sort { fc($a) cmp fc($b) || $b cmp $a} keys %productions) { $symbol eq '-' and next; $symbol_index{$symbol} = scalar @index_symbol; push @index_symbol, $symbol; } # Calculate the symbol size and modulus: # a = 28 × the number of symbols, # symbol size = a × 7, # modulus m = symbol size × 2; my $a = 28 * scalar @index_symbol; my $m = $a * 7 * 2; print STDERR "Symbol size = 7a, where a = $a\n"; print STDERR "Modulus = 14a = $m\n"; print STDERR "Symbols by index are [@index_symbol]\n"; # The number of 1s in each production. my @production_ones; # Initialize all the productions to empty. push @production_ones, 0 for 0..($m-1); # Loop over the two positions. for my $pos (0, 1) { my $pa = $pos * $a * 7; # For each symbol, add its productions. while (my ($y, $symbol) = each @index_symbol) { # Work out what the first and second produced symbols are. defined $productions{$symbol}{$pos} or die "No definition for $pos$symbol"; my $prod = $productions{$symbol}{$pos}; my $halt = 0; if ($prod eq '$') { $halt = 1; $prod = '--'; } my @production = split //, $prod; my $prod0 = $production[0] // '-'; my $prod1 = $production[1] // '-'; scalar @production <= 2 or die "Definition for $pos$symbol is too long"; defined(my $y0 = $symbol_index{$prod0}) or die "Undefined identifier $prod0, seen in definition for $pos$symbol"; defined(my $y1 = $symbol_index{$prod1}) or die "Undefined identifier $prod1, seen in definition for $pos$symbol"; my $b0 = 14*$y0+7; my $b1 = 14*$y1+7; # Work out the width and width adjustment for each generated symbol. defined $widths{$prod0} or die "No width for $symbol"; my $width0 = $widths{$prod0}; defined $widths{$prod1} or die "No width for $symbol"; my $width1 = $widths{$prod1}; # Special case: a production '--' generates two width-1 # '-'s (rather than two width-0 '-'s) because it's equivalent # and leads to shorter code. $prod0 eq '-' and $prod1 eq '-' and ($width0, $width1) = (1, 1); my $wa0 = (1 - $width0) * $a * 7; my $wa1 = (1 - $width1) * $a * 7; # Now the actual productions. $production_ones[28*$y+ 17+$pa] = $a-3; $production_ones[28*$y+ 19+$pa] = 7; $production_ones[28*$y+ 21+$pa] = $a-4; $production_ones[28*$y+ $a+13+$pa] = $a-3; $production_ones[28*$y+ $a+15+$pa] = 7; $production_ones[28*$y+ $a+17+$pa] = $a-4; $production_ones[14*$y+2*$a+ 3+$pa] = $b0 + ($a*3/2) * $halt; $production_ones[14*$y+2*$a+ 5+$pa] = 3; $production_ones[14*$y+2*$a+ 7+$pa] = 3*$a - $b0 - 3 + $wa0; $production_ones[14*$y+2*$a+ 9+$pa] = 0; $production_ones[14*$y+2*$a+11+$pa] = $b1; $production_ones[14*$y+2*$a+13+$pa] = 3; $production_ones[14*$y+2*$a+15+$pa] = 3*$a - $b1 - 3 + $wa1; } } # Print the productions. print "["; my $first = 1; for my $ones (@production_ones) { $first or print ","; print $ones; undef $first; } print "]\n["; # Print the initial state. $first = 1; for my $symbol (split //, $init_state) { # A symbol is encoded as: # 14*y+7 zeroes (i.e. length 1 grills)); # a grill of length 2a-5 (with a-3 ones); # a grill of length 15 (with 7 ones); # a grill of length 2a-7 (with a-4 ones); # 3*a-14*y-10 or 10*a-14*y-10 zeroes (i.e. length 1 grills). defined $symbol_index{$symbol} or die "Symbol $symbol in initial string has no definition"; my $y = $symbol_index{$symbol}; my $width = $widths{$symbol}; my $wa = (1 - $width) * $a * 7; print "," unless $first; undef $first; print "0"; print ",0" x (14 * $y + 6); print ",0"; print ",1,0" x ($a - 3); print ",0"; print ",1,0" x 7; print ",0"; print ",1,0" x ($a - 4); print ",0" x (3*$a - 14*$y - 10 + $wa); } print "]\n";