# -*- mode:perl; fill-column:80 -*- # Interpreter for Genera Tag. use strict; use warnings; use 5.016; use utf8; use feature qw/unicode_strings fc/; use open ':std', ':encoding(UTF-8)'; # First line of input: initial state my $init_state = '#'; $init_state = <> while $init_state =~ /^\s*#/; my $symbol_re = qr/\p{Cased_Letter}/; $init_state =~ /^\s*(?:$symbol_re\s*)*\z/ or die "Could not parse initial state from the first line of input"; $init_state =~ s/\s//g; # The rest of the input contains productions and width definitions. A # production looks like "0a:bc", where the 0 is the modulus, the a is the symbol # produced from, and the bc the string being produced. A width definition looks # like "a@1", where a is the symbol whose width is being defined and 1 the # width. Definitions are separated by whitespace. # # Symbols are represented as cased letters (for compatibility with AORS). my %widths; my %productions; while (<>) { /^\s*\#/ and next; if (s/^\s*(\d+)($symbol_re):((?:$symbol_re|\$)*)(?=\s|\z)//) { defined $productions{$2}{$1} and die "Duplicate definition for $1$2"; $productions{$2}{$1} = $3; redo; } if (s/^\s*($symbol_re)@(\d+)(?=\s|\z)//) { defined $widths{$1} and die "Duplicate width for $1"; $widths{$1} = $2; redo; } unless ($_ =~ /^\s*\z/) { /^\s*(\S*+)/; die "Could not parse input: unrecognised chunk \"$1\"" } } # Assign widths to symbols that don't have them. # # Also warn about explicitly given widths that don't match the symbol's case. for my $symbol (keys %productions) { my $lowercase = $symbol =~ /\p{Lowercase_Letter}/; $lowercase and $widths{$symbol} and warn "Symbol $symbol has nonzero width " . $widths{$symbol} . " and thus should not have a lowercase name"; !$lowercase and exists $widths{$symbol} and $widths{$symbol} == 0 and warn "Symbol $symbol has zero width" . " and thus should have a lowercase name"; exists $widths{$symbol} or $widths{$symbol} = $lowercase ? 0 : 1; $widths{$symbol} += 0; } # Calculate the modulus (by looking at the productions). my $modulus = 0; for my $production_set (values %productions) { $modulus <= $_ and $modulus = $_ + 1 for keys %$production_set; } print STDERR "Modulus is $modulus\n"; # Ensure that all productions exist. while (my ($symbol, $production_set) = each %productions) { defined $production_set->{$_} or die "No definition for $_$symbol" for 0..$modulus-1; } my $state = $init_state; my $position = 0; while ($state !~ /\$/) { print "($position) $state"; my $newstate = ""; for my $symbol (split //, $state) { my $production = $productions{$symbol}{$position}; defined $production or die "Unrecognised symbol $symbol " . "(needed for production $position$symbol)"; $newstate .= $production; $position += $widths{$symbol}; $position %= $modulus; } $state = $newstate; print " ($position)\n"; } print "($position) $state (halt)\n"; $state =~ /\$.*\$/ and die "Two halt symbols \$ appeared within a single generation";