| 264 | | if (open(LEX, "$dir/$file")) { |
| | 279 | if ($STORABLE and -e "$dir/$file.store") { |
| | 280 | my $lexer = retrieve("$dir/$file.store"); |
| | 281 | my $pat = $lexer->{PATS}; |
| | 282 | my $fates; |
| | 283 | my $i = 0; |
| | 284 | for (@$pat) { |
| | 285 | my $fstr; |
| | 286 | if ( m/\(\?#FATE(\d+) +(.*?)\)/) { |
| | 287 | warn "MISMATCH $i $1" unless $i == $1; |
| | 288 | $fstr = $2; |
| | 289 | } |
| | 290 | else { |
| | 291 | die "Whoops, no fate in storage"; |
| | 292 | } |
| | 293 | my $fate; |
| | 294 | if ($fate = $FATECACHE{$fstr}) { |
| | 295 | $fates->[$i] = $fate; |
| | 296 | } |
| | 297 | else { |
| | 298 | $FATECACHE{$fstr} = $fate = $fates->[$i] = [0,0,0]; |
| | 299 | while ($fstr =~ s/(\S+)\s+(\S+)\s*//) { |
| | 300 | $fate->[0] = $1; |
| | 301 | $fate->[1] = $2; |
| | 302 | if ($fate->[2] = $FATECACHE{$fstr}) { |
| | 303 | last; |
| | 304 | } |
| | 305 | $fate = $fate->[2] //= [0,0,0] if $fstr ne ''; |
| | 306 | } |
| | 307 | } |
| | 308 | $i++; |
| | 309 | } |
| | 310 | $lexer->{FATES} = $fates; |
| | 311 | return $lexer; |
| | 312 | } |
| | 313 | elsif (open(LEX, "$dir/$file")) { |
| 353 | | my $fate = $fates->[$i] = [0,0,0,$fstr]; |
| 354 | | while ($fstr =~ s/(\S+)\s+(\S+)\s*//) { |
| 355 | | $fate->[0] = $1; |
| 356 | | $fate->[1] = $2; |
| 357 | | $fate = $fate->[2] = [0,0,0,$fstr] if $fstr ne ''; |
| | 401 | my $fate; |
| | 402 | if ($fate = $FATECACHE{$fstr}) { |
| | 403 | $fates->[$i] = $fate; |
| | 404 | } |
| | 405 | else { |
| | 406 | $FATECACHE{$fstr} = $fate = $fates->[$i] = [0,0,0]; |
| | 407 | while ($fstr =~ s/(\S+)\s+(\S+)\s*//) { |
| | 408 | $fate->[0] = $1; |
| | 409 | $fate->[1] = $2; |
| | 410 | if ($fate->[2] = $FATECACHE{$fstr}) { |
| | 411 | last; |
| | 412 | } |
| | 413 | $fate = $fate->[2] //= [0,0,0] if $fstr ne ''; |
| | 414 | } |
| 366 | | $lexer = { "NAME" => $name, "FILE" => "$dir/$file", "PATS" => [@pat], "FATES" => $fates, "DBA" => $dba}; |
| | 423 | my $T; |
| | 424 | if ($TRIE) { |
| | 425 | $T = {}; |
| | 426 | PAT: |
| | 427 | for my $fnum (0..@pat-1) { |
| | 428 | my ($chars) = $pat[$fnum]; |
| | 429 | $chars =~ s/\(\?#::\)//g; |
| | 430 | my @chars; |
| | 431 | my $final = ''; |
| | 432 | while ($chars ne '') { |
| | 433 | last if $chars =~ m/^\t/; |
| | 434 | if ($chars =~ s/^\\(\W)([*+?{]?)//) { |
| | 435 | if ($2) { |
| | 436 | $final = "\\$1$2$chars"; |
| | 437 | last; |
| | 438 | } |
| | 439 | push(@chars, ord($1)); |
| | 440 | next; |
| | 441 | } |
| | 442 | if ($chars =~ s/^(\\\w.*)//) { |
| | 443 | $final = $1; |
| | 444 | last; |
| | 445 | } |
| | 446 | if ($chars =~ /^(\[\S+)/) { |
| | 447 | $final = $1; |
| | 448 | last; |
| | 449 | } |
| | 450 | if ($chars =~ /^(\.\S+)/) { |
| | 451 | $final = $1; |
| | 452 | last; |
| | 453 | } |
| | 454 | if ($chars =~ s/^(.)([*+?{]?)//) { |
| | 455 | if ($2) { |
| | 456 | $final = "$1$2$chars"; |
| | 457 | last; |
| | 458 | } |
| | 459 | push(@chars, unpack('U',$1)); |
| | 460 | next; |
| | 461 | } |
| | 462 | } |
| | 463 | my $state = $T; |
| | 464 | for my $ch (@chars) { |
| | 465 | my $char = chr($ch); |
| | 466 | if (my $next = $state->{$char}) { |
| | 467 | $state = $next; |
| | 468 | } |
| | 469 | else { |
| | 470 | $state = $state->{$char} = {}; |
| | 471 | } |
| | 472 | } |
| | 473 | push @{$state->{'~~'}}, $final, $fnum; |
| | 474 | } |
| | 475 | } |
| | 476 | |
| | 477 | |
| | 478 | $lexer = { "NAME" => $name, "FILE" => "$dir/$file", "PATS" => [@pat], "FATES" => $fates, "T" => $T, "DBA" => $dba}; |
| 374 | | |
| 375 | | open(my $cache, '>', "$dir/$file") // die "Can't print: $!"; |
| 376 | | binmode($cache, ":utf8"); |
| | 486 | if ($STORABLE) { |
| | 487 | delete $lexer->{FATES}; |
| | 488 | store($lexer, "$dir/$file.store"); |
| | 489 | $lexer->{FATES} = $fates; |
| | 490 | if ($file eq 'termish') { |
| | 491 | system "cp $dir/termish.store $dir/EXPR.store"; |
| | 492 | } |
| | 493 | return $lexer; |
| | 494 | } |
| | 495 | |
| | 496 | open(my $cache, '>:utf8', "$dir/$file") // die "Can't print: $!"; |
| 660 | | # A rudimentary trie walker, unused so far. It assumes a prebuilt trie; |
| 661 | | # it will need some revisions to build it on the fly. Also, it's currently |
| 662 | | # written for clarity rather than speed, and that will also change later. |
| 663 | | # There is an arbitrary cutoff between characters we look up by array |
| 664 | | # vs characters we look up by hash. ->{T}[0] is the array, [1] is the hash. |
| 665 | | # and [2] contains actual results generator closure. At the moment we just |
| 666 | | # drop through if we don't get a result, which is always. |
| | 781 | # A rudimentary trie walker. |
| 672 | | while ($next = $state->[0][$ch] // $state->[1]{chr $ch}) { |
| | 788 | print STDERR "=" x 10,"\n$p TRIE for ${pkg}::$name in ", ref $self, "\n", |
| | 789 | $p," ", pack("U",$ch), "\n" if $DEBUG & DEBUG::autolexer; |
| | 790 | for (;;) { |
| | 791 | if ($state->{'~~'}) { |
| | 792 | if (not $state->{''}++) { |
| | 793 | for (@{$state->{'~~'}}) { |
| | 794 | next if /^\d/; |
| | 795 | next unless $_; |
| | 796 | next if ref $_ eq 'CODE'; |
| | 797 | our %RXCACHE; |
| | 798 | $_ = $RXCACHE{$_} //= eval "sub { \$::ORIG =~ /\\G$_/xsgc }"; |
| | 799 | } |
| | 800 | } |
| | 801 | my @x = @{$state->{'~~'}}; |
| | 802 | while (my ($final,$fnum) = splice(@x,0,2)) { |
| | 803 | my $pend = $p; |
| | 804 | if ($final) { |
| | 805 | print STDERR $p," probing $fnum\n" if $DEBUG & DEBUG::autolexer; |
| | 806 | pos($::ORIG) = $p; |
| | 807 | next unless &$final; |
| | 808 | $pend = pos($::ORIG); |
| | 809 | } |
| | 810 | push(@{$candidates[$pend - $P][$p - $P]}, $fnum); |
| | 811 | print STDERR $pend," FNUM $fnum @",$pend - $P,"\n" if $DEBUG & DEBUG::autolexer; |
| | 812 | } |
| | 813 | } |
| | 814 | last unless $next = $state->{chr $ch}; |
| | 815 | if ($DEBUG & DEBUG::autolexer) { |
| | 816 | print STDERR substr($::ORIG, $P, $p - $P), "\n"; |
| | 817 | print STDERR $p," ", pack("U",$ch), "\n"; |
| | 818 | } |