Legend:
- Unmodified
- Added
- Removed
-
src/perl6/Cursor.pmc
r21852 r21901 1440 1440 my $buf = $self->{_orig}; 1441 1441 pos($$buf) = $P; 1442 if ($$buf =~ $qr) {1442 if ($$buf =~ /$qr/gc) { 1443 1443 my $len = $+[0] - $P; 1444 1444 $self->deb("PATTERN $qr matched @{[substr($$buf,$P,$len)]} at $P $len") if $DEBUG & DEBUG::matchers; -
src/perl6/STD.pm
r21852 r21901 10 10 11 11 # random rule for debugging, please ignore 12 regexfoo {12 token foo { 13 13 'foo' 'bar' 'baz' 14 14 } … … 154 154 constant %loose_and = (:prec<d=>, :assoc<left>, :assign); 155 155 constant %loose_or = (:prec<c=>, :assoc<left>, :assign); 156 constant %feed_infix = (:prec<b=>, :assoc<left>); 156 157 constant %LOOSEST = (:prec<a=!>); 157 158 constant %terminator = (:prec<a=>, :assoc<list>); … … 257 258 our %o = %loose_or; 258 259 } # end class 260 class Feed_infix does PrecOp { 261 our %o = %feed_infix; 262 } # end class 259 263 class Terminator does PrecOp { 260 264 our %o = %terminator; … … 419 423 token vws { 420 424 \v :: 421 { $COMPILING::LINE++ } # XXX wrong several ways 425 { $COMPILING::LINE++ } # XXX wrong several ways, use self.lineof($¢.pos) 422 426 [ '#DEBUG -1' { say "DEBUG"; $STD::DEBUG = $*DEBUG = -1; } ]? 423 427 } … … 441 445 } 442 446 443 token identi sh{447 token identifier { 444 448 <.alpha> \w* 445 449 } 446 450 451 token apostrophe { 452 <[ ' \- ]> 453 } 454 447 455 token ident { 448 <.identi sh> [<[ ' \- ]><identish>]*456 <.identifier> [ <.apostrophe> <.identifier> ]* 449 457 } 450 458 … … 454 462 ^^ '=' <.unsp>? 455 463 [ 456 | 'begin' \h+ <ident> :: .*? \n457 '='<.unsp>? 'end' \h+ $<ident> » \N* {*} #= tagged458 | 'begin' » :: \h* \n .*? \n459 '='<.unsp>? 'end' » \N* {*} #= anon464 | 'begin' \h+ <ident> :: .*? 465 "\n=" <.unsp>? 'end' \h+ $<ident> » \N* {*} #= tagged 466 | 'begin' » :: \h* \n .*? 467 "\n=" <.unsp>? 'end' » \N* {*} #= anon 460 468 | :: 461 469 [ <?before .*? ^^ '=cut' » > <.panic: "Obsolete pod format, please use =begin/=end instead"> ]? … … 540 548 :my $PARSER is context<rw> = self; 541 549 [ 550 | $ 542 551 | <?before <[\)\]\}]> > 543 552 | [<statement><.eat_terminator> ]* … … 570 579 :my $endargs is context = -1; 571 580 <!before <[\)\]\}]> > 581 582 # this could either be a statement that follows a declaration 583 # or a statement that is within the block of a code declaration 572 584 <!!{ bless $¢, ref $PARSER; }> 585 573 586 [ 574 587 | <label> <statement> {*} #= label … … 2995 3008 token infix:sym<.=> ( --> Item_assignment) { 2996 3009 <sym> <.ws> 2997 [ <?before \w+';' | < new sort subst trans >> || <worryobs('.= as append operator', '~=')> ]3010 [ <?before \w+';' | 'new' | 'sort' | 'subst' | 'trans' > || <worryobs('.= as append operator', '~=')> ] 2998 3011 { $<O><nextterm> = 'dottyop' } 2999 3012 } … … 3097 3110 # unrecognized names are assumed to be post-declared listops. 3098 3111 || <args>? 3099 # || <?before \s> <arglist>3100 # {*} #= listop args3101 # ||3102 # [3103 # | '.(' <in: ')', 'semilist', 'argument list'>3104 # {*} #= func args3105 #3106 # | '(' <in: ')', 'semilist', 'argument list'>3107 # {*} #= func args3108 #3109 # | <.unsp> '.'? '(' <in: ')', 'semilist', 'argument list'>3110 # {*} #= func args3111 #3112 #3113 # | :: {*} #= listop noarg3114 # ]3115 #3116 # [3117 # || ':' <?before \s> <arglist> # either switch to listopiness3118 # || {{ $+prevop = $<O> = {}; }} # or allow adverbs3119 # ]3120 3112 ] 3121 3113 } … … 3144 3136 { <sym> } 3145 3137 3138 token infix:sym« <== » ( --> Feed_infix) 3139 { <sym> } 3140 3141 token infix:sym« ==> » ( --> Feed_infix) 3142 { <sym> {*} } #' 3143 3144 token infix:sym« <<== » ( --> Feed_infix) 3145 { <sym> } 3146 3147 token infix:sym« ==>> » ( --> Feed_infix) 3148 { <sym> {*} } #' 3149 3146 3150 ## expression terminator 3147 3151 … … 3169 3173 token terminator:sym<when> ( --> Terminator) 3170 3174 { <?before 'when' » > } 3171 3172 token terminator:sym« <== » ( --> Terminator)3173 { <?before '<==' > }3174 3175 token terminator:sym« ==> » ( --> Terminator)3176 { <?before '==>' > {*} } #'3177 3175 3178 3176 token terminator:sym« --> » ( --> Terminator) … … 3725 3723 token mod_internal:sym<:a( )> { $<sym>=[':a'|':ignoreaccent'] <mod_arg> { $+ignoreaccent = $<mod_arg>.eval } } 3726 3724 3727 token mod_internal:sym<:s> { <sym>'igspace'? » { $+sigspace = 1 } }3728 token mod_internal:sym<:!s> { <sym>'igspace'? » { $+sigspace = 0 } }3729 token mod_internal:sym<:s( )> { <sym>'igspace'? <mod_arg> { $+sigspace = $<mod_arg>.eval } }3730 3731 token mod_internal:sym<:r> { <sym>'atchet'? » { $+ratchet = 1 } }3732 token mod_internal:sym<:!r> { <sym>'atchet'? » { $+ratchet = 0 } }3733 token mod_internal:sym<:r( )> { <sym>'atchet'? » <mod_arg> { $+ratchet = $<mod_arg>.eval } }3725 token mod_internal:sym<:s> { ':s' 'igspace'? » { $+sigspace = 1 } } 3726 token mod_internal:sym<:!s> { ':s' 'igspace'? » { $+sigspace = 0 } } 3727 token mod_internal:sym<:s( )> { ':s' 'igspace'? <mod_arg> { $+sigspace = $<mod_arg>.eval } } 3728 3729 token mod_internal:sym<:r> { ':r' 'atchet'? » { $+ratchet = 1 } } 3730 token mod_internal:sym<:!r> { ':r' 'atchet'? » { $+ratchet = 0 } } 3731 token mod_internal:sym<:r( )> { ':r' 'atchet'? » <mod_arg> { $+ratchet = $<mod_arg>.eval } } 3734 3732 3735 3733 token mod_internal:adv { -
src/perl6/gimme5
r21819 r21901 5 5 use warnings; 6 6 use Text::Balanced qw(extract_bracketed); 7 binmode(STDIN, ":utf8"); 8 binmode(STDOUT, ":utf8"); 9 binmode(STDERR, ":utf8"); 10 use Encode; 11 use utf8; 7 12 8 13 my $failover = 0; … … 201 206 202 207 { 203 local $/; 204 $_ = <>; 208 open(IN, $ARGV[0]) or die "Can't open $ARGV[0]: $!\n"; 209 { 210 local $/; 211 binmode(IN, ':utf8'); 212 $_ = <IN>; 213 } 214 close IN; 205 215 push @impure, m/^method (\w+)/mg; 206 216 @impure{@impure} = (1) x @impure; … … 448 458 my $p = ""; 449 459 450 local $MAYBACKTRACK = 1; # XXX ratchet current broken451 if ($KIND eq ' regex') {452 $MAYBACKTRACK = 1;460 local $MAYBACKTRACK = 1; 461 if ($KIND eq 'token' or $KIND eq 'rule') { 462 $MAYBACKTRACK = 0; 453 463 } 454 464 … … 880 890 our %INSTANTIATED; 881 891 require 'mangle.pl'; 892 use utf8; 882 893 883 894 END … … 889 900 if (%$RETREE) { 890 901 $out .= "BEGIN {\n \$retree = YAML::XS::Load(<<'RETREE_END');\n"; 891 $out .= Dump($RETREE);902 $out .= Encode::decode("utf8", Dump($RETREE)); 892 903 $out .= "RETREE_END\n}\n"; 893 904 } … … 905 916 return if $KIND eq 'rule'; # meta whitespace parsed in atom 906 917 for (;;) { 907 next if s/^\s+//; 908 next if s/^#\(.*?\)//s; 909 next if s/^#\{.*?\}//s; 910 next if s/^#\[.*?\]//s; 911 next if s/^#\<.*?\>//s; 912 next if s/^#.*\n//; 918 # next if s/^(?!=[\0-~])\s+//; 919 next if s/^[\x20\t\n\r]+//; 920 last unless s/^#//; 921 next if s/^\(.*?\)//s; 922 next if s/^\{.*?\}//s; 923 next if s/^\[.*?\]//s; 924 next if s/^\<.*?\>//s; 925 next if s/^.*\n//; 913 926 last; 914 927 } … … 1651 1664 $text =~ s/^-\[/[^/; 1652 1665 if ($$self{i}) { 1653 $self->bind("\$C->_ CCLASS$REV(qr/^(?i)$text\$/)");1666 $self->bind("\$C->_PATTERN$REV(qr/\\G(?i:$text)/)"); 1654 1667 } 1655 1668 else { 1656 $self->bind("\$C->_ CCLASS$REV(qr/^$text\$/)");1669 $self->bind("\$C->_PATTERN$REV(qr/\\G$text/)"); 1657 1670 } 1658 1671 } … … 1673 1686 my $self = shift; 1674 1687 my $text = $$self{text}; 1688 $text = "(?<=$text)" if $REV; 1675 1689 if ($$self{i}) { 1676 $self->bind('$C->_PATTERN ' . $REV . '(qr/(?i)\\Q' . $text . '\\E/")');1690 $self->bind('$C->_PATTERN(qr/\\G(?i:' . $text . ')/")'); 1677 1691 } 1678 1692 else { 1679 $self->bind('$C->_ EXACT' . $REV . '("' . $text . '")');1693 $self->bind('$C->_PATTERN(qr/\\G' . $text . '/)'); 1680 1694 } 1681 1695 } … … 1685 1699 sub walk { 1686 1700 my $self = shift; 1687 my $text = $$self{text};1688 $text =~ s/(['\\])/\\$1/g;1701 my $text = quotemeta($$self{text}); 1702 $text = "(?<=$text)" if $REV; 1689 1703 if ($$self{i}) { 1690 '$C->_PATTERN ' . $REV . '(qr/(?i)\\Q' . $text . '\\E/")';1704 '$C->_PATTERN(qr/\\G(?i:' . $text . ')/)'; 1691 1705 } 1692 1706 else { 1693 "\$C->_EXACT$REV('" . $text . "')"; 1707 "\$C->_PATTERN(qr/\\G$text/)"; 1708 # my $l = length($text); 1709 # "(substr(\$\$buf, \$C->{_pos}, $l) eq '" . $text . "' ? \$C->cursor(\$C->{_pos} + $l) : ())" 1694 1710 } 1695 1711 } … … 1707 1723 } 1708 1724 if ($text eq '.') { 1709 $code = "\$C->_ANY$REV()"; 1725 if ($REV) { 1726 $code = "\$C->_PATTERN(qr/\\G(?<=(?s:.))/)"; 1727 } 1728 else { 1729 $code = "\$C->_PATTERN(qr/\\G(?s:.)/)"; 1730 } 1731 # $code = "\$C->_ANY$REV()"; 1710 1732 } 1711 1733 elsif ($text eq '.*') { 1734 # if ($REV) { 1735 # $code = "\$C->_PATTERN(qr/\\G(?<=.*)/)"; 1736 # } 1737 # else { 1738 # $code = "\$C->_PATTERN(qr/\\G.*/)"; 1739 # } 1712 1740 $code = "\$C->_SCANg$REV()"; 1713 1741 } 1714 1742 elsif ($text eq '.*?') { 1743 # if ($REV) { 1744 # $code = "\$C->_PATTERN(qr/\\G(?<=.*?)/)"; 1745 # } 1746 # else { 1747 # $code = "\$C->_PATTERN(qr/\\G.*?/)"; 1748 # } 1715 1749 $code = "\$C->_SCANf$REV()"; 1716 1750 } 1717 1751 elsif ($text eq '^') { 1718 $code = "\$C->_BOS$REV()"; 1752 $code = "\$C->_PATTERN(qr/\\G\\A/)"; 1753 # $code = "\$C->_BOS$REV()"; 1719 1754 } 1720 1755 elsif ($text eq '^^') { 1721 $code = "\$C->_BOL$REV()"; 1756 $code = "\$C->_PATTERN(qr/\\G(?m:^)/)"; 1757 # $code = "\$C->_BOL$REV()"; 1722 1758 } 1723 1759 elsif ($text eq '$') { 1724 $code = "\$C->_EOS$REV()"; 1760 $code = "\$C->_PATTERN(qr/\\G\\z/)"; 1761 # $code = "\$C->_EOS$REV()"; 1725 1762 } 1726 1763 elsif ($text eq '$$') { 1727 $code = "\$C->_EOL$REV()"; 1764 $code = "\$C->_PATTERN(qr/\\G(?m:\$)/)"; 1765 # $code = "\$C->_EOL$REV()"; 1728 1766 } 1729 1767 elsif ($text eq ':') { … … 1733 1771 $PURE = 0; 1734 1772 $code = "\$C->_COMMITBRANCH$REV()"; 1773 $MAYBACKTRACK = 1; 1735 1774 } 1736 1775 elsif ($text eq ':::') { 1737 1776 $PURE = 0; 1738 1777 $code = "\$C->_COMMITRULE$REV()"; 1778 $MAYBACKTRACK = 1; 1739 1779 } 1740 1780 elsif ($text eq '\\d') { 1741 $code = "\$C->_DIGIT$REV()"; 1781 if ($REV) { 1782 $code = "\$C->_PATTERN(qr/\\G(?<=\\d)/)"; 1783 } 1784 else { 1785 $code = "\$C->_PATTERN(qr/\\G\\d/)"; 1786 } 1787 # $code = "\$C->_DIGIT$REV()"; 1742 1788 } 1743 1789 elsif ($text eq '\\w') { 1744 $code = "\$C->_ALNUM$REV()"; 1790 if ($REV) { 1791 $code = "\$C->_PATTERN(qr/\\G(?<=\\w)/)"; 1792 } 1793 else { 1794 $code = "\$C->_PATTERN(qr/\\G\\w/)"; 1795 } 1796 # $code = "\$C->_ALNUM$REV()"; 1745 1797 } 1746 1798 elsif ($text eq '\\s') { 1747 $code = "\$C->_SPACE$REV()"; 1799 if ($REV) { 1800 $code = "\$C->_PATTERN(qr/\\G(?<=\\s)/)"; 1801 } 1802 else { 1803 $code = "\$C->_PATTERN(qr/\\G\\s/)"; 1804 } 1805 # $code = "\$C->_SPACE$REV()"; 1748 1806 } 1749 1807 elsif ($text eq '\\h') { 1750 $code = "\$C->_HSPACE$REV()"; 1808 if ($REV) { 1809 $code = "\$C->_PATTERN(qr/\\G(?<=[\\x20\\t\\r])/)"; 1810 } 1811 else { 1812 $code = "\$C->_PATTERN(qr/\\G[\\x20\\t\\r]/)"; 1813 } 1814 # $code = "\$C->_HSPACE$REV()"; 1751 1815 } 1752 1816 elsif ($text eq '\\v') { 1753 $code = "\$C->_VSPACE$REV()"; 1817 if ($REV) { 1818 $code = "\$C->_PATTERN(qr/\\G(?<=[\\n])/)"; 1819 } 1820 else { 1821 $code = "\$C->_PATTERN(qr/\\G[\\n]/)"; 1822 } 1823 # $code = "\$C->_VSPACE$REV()"; 1754 1824 } 1755 1825 elsif ($text eq '»') { 1756 $code = "\$C->_RIGHTWB$REV()"; 1826 $code = "\$C->_PATTERN(qr/\\G\\b/)"; 1827 # $code = "\$C->_RIGHTWB$REV()"; 1757 1828 } 1758 1829 elsif ($text eq '«') { 1759 $code = "\$C->_LEFTWB$REV()"; 1830 $code = "\$C->_PATTERN(qr/\\G\\b/)"; 1831 # $code = "\$C->_LEFTWB$REV()"; 1760 1832 } 1761 1833 elsif ($text eq '>>') { … … 1797 1869 $$self{sym} = $SYM; 1798 1870 $$self{endsym} = $ENDSYM if $ENDSYM; 1871 if ($$self{i}) { 1872 return "\$C->_PATTERN(qr/\\G(?i:" . quotemeta($SYM) . ")/)"; 1873 } 1874 else { 1875 return "\$C->_PATTERN(qr/\\G" . quotemeta($SYM) . "/)"; 1876 } 1799 1877 return $re = '$C->_SYM($sym, ' . ($$self{i}//0) . ')'; # could pass endsym too here... 1800 1878 } 1879 elsif ($name eq "alpha") { 1880 return "\$C->_PATTERN(qr/\\G[_[:alpha:]]/)"; 1881 } 1882 elsif ($name eq "_ALNUM") { 1883 return "\$C->_PATTERN(qr/\\G\\w/)"; 1884 } 1885 # elsif ($name eq "ws") { 1886 # return "\$C->_PATTERN(qr/\\G(?{ \$C = \$C->ws; pos(\$_) = \$C->{_pos} })/)"; 1887 # } 1801 1888 elsif ($name eq "nextsame") { 1802 1889 $NEEDORIGARGS++; … … 2017 2104 my $rep = "_REP"; 2018 2105 my $q = $$self{quant}; 2019 if ($q) { 2020 my ($qfer,$how,$rest) = @{$$self{quant}}; 2021 my $h = $how eq '!' ? 'g' : 2022 $how eq '?' ? 'f' : 2023 'r'; 2024 if ($qfer eq '*') { 2025 $PURE = 0; 2026 $quant = "\$C->_STAR$h$REV("; 2027 } 2028 elsif ($qfer eq '+') { 2029 $quant = "\$C->_PLUS$h$REV("; 2030 } 2031 elsif ($qfer eq '?') { 2032 $PURE = 0; 2033 $quant = "\$C->_OPT$h$REV("; 2034 } 2035 elsif ($qfer eq '**') { 2036 if (ref $rest) { 2037 if (ref $rest eq "RE_block") { 2038 $PURE = 0; 2039 $rep = "_REPINDIRECT$REV"; 2040 $rest = $rest->walk(); 2041 } 2042 else { 2043 $rep = "_REPSEP$REV"; 2044 $rest = " sub { my \$C=shift;\n" . ::indent($rest->walk()) . "\n}"; 2045 } 2046 } 2047 else { 2048 $PURE = 0 if $rest =~ /^0/; 2049 $rest = "'$rest'"; 2050 } 2051 $quant = "\$C->$rep$h( $rest, "; 2052 } 2053 $result = $quant . "sub { my \$C=shift;\n" . ::indent($$self{atom}->walk(@_)) . "\n})"; 2054 } 2106 my $atom = $$self{atom}->walk(@_); 2107 if ($q) { 2108 if ($atom =~ s{ ^ \$C->_PATTERN\(qr/\\G(.*?)/\) $ }{(?:$1)}sx) { 2109 my ($qfer,$how,$rest) = @{$$self{quant}}; 2110 my $h = $how eq '!' ? '' : 2111 $how eq '?' ? '?' : 2112 '+'; 2113 if ($qfer eq '**') { 2114 $h = $how eq '!' ? 'g' : 2115 $how eq '?' ? 'f' : 2116 'r'; 2117 if (ref $rest) { 2118 if (ref $rest eq "RE_block") { 2119 $PURE = 0; 2120 $rep = "_REPINDIRECT$REV"; 2121 $rest = $rest->walk(); 2122 } 2123 else { 2124 $rep = "_REPSEP$REV"; 2125 $rest = " sub { my \$C=shift;\n" . ::indent($rest->walk()) . "\n}"; 2126 } 2127 } 2128 else { 2129 $PURE = 0 if $rest =~ /^0/; 2130 $rest = "'$rest'"; 2131 } 2132 $quant = "\$C->$rep$h( $rest, "; 2133 $result = $quant . "sub { my \$C=shift;\n" . ::indent($atom) . "\n})"; 2134 } 2135 else { 2136 $PURE = 0; 2137 $result = "\$C->_PATTERN\(qr/\\G($atom$qfer$h)/\)"; 2138 } 2139 } 2140 else { 2141 my ($qfer,$how,$rest) = @{$$self{quant}}; 2142 my $h = $how eq '!' ? 'g' : 2143 $how eq '?' ? 'f' : 2144 'r'; 2145 if ($qfer eq '*') { 2146 $PURE = 0; 2147 $quant = "\$C->_STAR$h$REV("; 2148 } 2149 elsif ($qfer eq '+') { 2150 $quant = "\$C->_PLUS$h$REV("; 2151 } 2152 elsif ($qfer eq '?') { 2153 $PURE = 0; 2154 $quant = "\$C->_OPT$h$REV("; 2155 } 2156 elsif ($qfer eq '**') { 2157 if (ref $rest) { 2158 if (ref $rest eq "RE_block") { 2159 $PURE = 0; 2160 $rep = "_REPINDIRECT$REV"; 2161 $rest = $rest->walk(); 2162 } 2163 else { 2164 $rep = "_REPSEP$REV"; 2165 $rest = " sub { my \$C=shift;\n" . ::indent($rest->walk()) . "\n}"; 2166 } 2167 } 2168 else { 2169 $PURE = 0 if $rest =~ /^0/; 2170 $rest = "'$rest'"; 2171 } 2172 $quant = "\$C->$rep$h( $rest, "; 2173 } 2174 $result = $quant . "sub { my \$C=shift;\n" . ::indent($atom) . "\n})"; 2175 } 2176 } 2055 2177 else { 2056 $result = $ $self{atom}->walk(@_);2178 $result = $atom; 2057 2179 } 2058 2180 } … … 2093 2215 } 2094 2216 else { 2095 "map({ my \$C=\$_;\n" . 2096 ::indent($inner) . 2097 "\n} ($outer)[0])"; 2217 my $oi = $outer . $inner; 2218 if ($oi =~ s{ ^ \$C->_PATTERN\(qr/\\G(.*?)/\) \$C->_PATTERN\(qr/\\G(.*?)/\) $ }{\$C->_PATTERN(qr/\\G$1$2/)}sx) { 2219 $oi; 2220 } 2221 else { 2222 my $in = ::indent($inner,2); 2223 substr(<<"END",0,-1); 2224 do { 2225 if (my (\$C) = ($outer)[0]) { 2226 $in; 2227 } 2228 else { 2229 (); 2230 } 2231 } 2232 END 2233 2234 # "map({ my \$C=\$_;\n" . 2235 # ::indent($inner) . 2236 # "\n} ($outer)[0])"; 2237 } 2098 2238 } 2099 2239 } -
src/perl6/mangle.pl
r21819 r21901 1 1 package main; 2 use utf8; 2 3 3 4 sub mangle { -
src/perl6/tryfoo
r21272 r21901 10 10 my $what = 'foo'; 11 11 my $text = "@ARGV"; 12 my $r = Perl->new($text)->$what();12 my $r = STD->new($text)->$what(); 13 13 print Dump($r);
