- Timestamp:
- 09/05/08 23:22:17 (3 months ago)
- Files:
-
- 1 modified
-
src/perl6/STD.pm (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
-
src/perl6/STD.pm
r22110 r22162 3747 3747 token backslash:unspace { <?before \s> <.SUPER::ws> } 3748 3748 3749 token backslash:a { :i <sym> } 3749 token backslash:sym<0> { '0' <!before <[0..7]> > } 3750 3751 token backslash:A { <sym> <.obs('\\A as beginning-of-string matcher', '^')> } 3752 token backslash:a { <sym> <.panic: "\\a is allowed only in strings, not regexes"> } 3750 3753 token backslash:b { :i <sym> } 3751 3754 token backslash:c { :i <sym> … … 3762 3765 token backslash:n { :i <sym> } 3763 3766 token backslash:o { :i <sym> [ <octint> | '['<octint>[','<octint>]*']' ] } 3767 token backslash:Q { <sym> <obs('\\Q as quotemeta', 'quotes or literal variable match')> } 3764 3768 token backslash:r { :i <sym> } 3765 3769 token backslash:s { :i <sym> } … … 3768 3772 token backslash:w { :i <sym> } 3769 3773 token backslash:x { :i <sym> [ <hexint> | '[' [<.ws><hexint><.ws> ] ** ',' ']' ] } 3774 token backslash:z { <sym> <obs('\\z as end-of-string matcher', '$')> } 3775 token backslash:Z { <sym> <obs('\\Z as end-of-string matcher', '\\n?$')> } 3770 3776 token backslash:misc { $<litchar>=(\W) } 3771 3777 token backslash:oops { <.panic: "Unrecognized regex backslash sequence"> } … … 3819 3825 } 3820 3826 3821 token assertion:sym<[> { <before '[' > <cclass_elem> +}3822 token assertion:sym<+> { < before '+' > <cclass_elem>+}3823 token assertion:sym<-> { < before '-' > <cclass_elem>+}3827 token assertion:sym<[> { <before '[' > <cclass_elem> ** < + - > } 3828 token assertion:sym<+> { <sym> <cclass_elem> ** < + - > } 3829 token assertion:sym<-> { <sym> <cclass_elem> ** < + - > } 3824 3830 token assertion:sym<.> { <sym> } 3825 3831 token assertion:sym<,> { <sym> } … … 3829 3835 3830 3836 token cclass_elem { 3831 [ '+' | '-' ]? 3837 <.ws> 3832 3838 [ 3833 3839 | <name> 3834 3840 | <before '['> <quibble($¢.cursor_fresh( ::STD::Q ).tweak(:q))> # XXX parse as q[] for now 3835 3841 ] 3842 <.ws> 3836 3843 } 3837 3844 … … 3840 3847 token mod_internal:sym<:my> { ':' <?before 'my' \s > [:lang($¢.cursor_fresh($+LANG)) <statement> <eat_terminator> ] } 3841 3848 3849 # XXX needs some generalization 3850 3842 3851 token mod_internal:sym<:i> { $<sym>=[':i'|':ignorecase'] » { $+ignorecase = 1 } } 3843 token mod_internal:sym<:!i> { $<sym>=[': i'|':ignorecase'] » { $+ignorecase = 0 } }3844 # XXX will this please work somehow ???3845 token mod_internal:sym<: i( )> { $<sym>=[':i'|':ignorecase'] <mod_arg> { $+ignorecase = $<mod_arg>.eval} }3852 token mod_internal:sym<:!i> { $<sym>=[':!i'|':!ignorecase'] » { $+ignorecase = 0 } } 3853 token mod_internal:sym<:i( )> { $<sym>=[':i'|':ignorecase'] <mod_arg> { $+ignorecase = eval $<mod_arg>.text } } 3854 token mod_internal:sym<:0i> { ':' (\d+) ['i'|'ignorecase'] { $+ignorecase = $0 } } 3846 3855 3847 3856 token mod_internal:sym<:a> { $<sym>=[':a'|':ignoreaccent'] » { $+ignoreaccent = 1 } } 3848 token mod_internal:sym<:!a> { $<sym>=[': a'|':ignoreaccent'] » { $+ignoreaccent = 0 } }3849 # XXX will this please work somehow ???3850 token mod_internal:sym<: a( )> { $<sym>=[':a'|':ignoreaccent'] <mod_arg> { $+ignoreaccent = $<mod_arg>.eval} }3857 token mod_internal:sym<:!a> { $<sym>=[':!a'|':!ignoreaccent'] » { $+ignoreaccent = 0 } } 3858 token mod_internal:sym<:a( )> { $<sym>=[':a'|':ignoreaccent'] <mod_arg> { $+ignoreaccent = eval $<mod_arg>.text } } 3859 token mod_internal:sym<:0a> { ':' (\d+) ['a'|'ignoreaccent'] { $+ignoreaccent = $0 } } 3851 3860 3852 3861 token mod_internal:sym<:s> { ':s' 'igspace'? » { $+sigspace = 1 } } 3853 token mod_internal:sym<:!s> { ':s' 'igspace'? » { $+sigspace = 0 } } 3854 token mod_internal:sym<:s( )> { ':s' 'igspace'? <mod_arg> { $+sigspace = $<mod_arg>.eval } } 3862 token mod_internal:sym<:!s> { ':!s' 'igspace'? » { $+sigspace = 0 } } 3863 token mod_internal:sym<:s( )> { ':s' 'igspace'? <mod_arg> { $+sigspace = eval $<mod_arg>.text } } 3864 token mod_internal:sym<:0s> { ':' (\d+) 's' 'igspace'? » { $+sigspace = $0 } } 3855 3865 3856 3866 token mod_internal:sym<:r> { ':r' 'atchet'? » { $+ratchet = 1 } } 3857 token mod_internal:sym<:!r> { ':r' 'atchet'? » { $+ratchet = 0 } } 3858 token mod_internal:sym<:r( )> { ':r' 'atchet'? » <mod_arg> { $+ratchet = $<mod_arg>.eval } } 3867 token mod_internal:sym<:!r> { ':!r' 'atchet'? » { $+ratchet = 0 } } 3868 token mod_internal:sym<:r( )> { ':r' 'atchet'? » <mod_arg> { $+ratchet = eval $<mod_arg>.text } } 3869 token mod_internal:sym<:0r> { ':' (\d+) 'r' 'atchet'? » { $+ratchet = $0 } } 3870 3871 token mod_internal:sym<:Perl5> { [':Perl5' | ':P5'] [ :lang( $¢.cursor_fresh( ::STD::P5Regex ).unbalanced($+GOAL) ) <nibbler> ] } 3859 3872 3860 3873 token mod_internal:adv { … … 3862 3875 } 3863 3876 3864 token mod_internal:oops { ':' <.panic: "Unrecognized regex modifier"> }3877 token mod_internal:oops { ':'\w+ <.panic: "Unrecognized regex modifier"> } 3865 3878 3866 3879 token quantifier:sym<*> { <sym> <quantmod> } … … 3882 3895 <sigspace> <quantified_atom> } 3883 3896 3884 token quantmod { [ '?' | '!' | ':' | '+' ]? } 3897 token quantmod { ':'? [ '?' | '!' | '+' ]? } 3898 3899 } # end grammar 3900 3901 grammar P5Regex is STD { 3902 3903 # begin tweaks (DO NOT ERASE) 3904 multi method tweak (:global(:$g)) { self } 3905 multi method tweak (:ignorecase(:$i)) { self } 3906 # end tweaks (DO NOT ERASE) 3907 3908 token category:metachar { <sym> } 3909 proto token metachar { <...> } 3910 3911 token category:backslash { <sym> } 3912 proto token backslash { <...> } 3913 3914 token category:assertion { <sym> } 3915 proto token assertion { <...> } 3916 3917 token category:quantifier { <sym> } 3918 proto token quantifier { <...> } 3919 3920 token category:mod_internal { <sym> } 3921 proto token mod_internal { <...> } 3922 3923 proto token rxinfix { <...> } 3924 3925 # suppress fancy end-of-line checking 3926 token codeblock { 3927 :my $GOAL is context = '}'; 3928 '{' :: [ :lang($¢.cursor_fresh($+LANG)) <statementlist> ] 3929 [ '}' || <.panic: "Unable to parse statement list; couldn't find right brace"> ] 3930 } 3931 3932 rule nibbler { 3933 :my $ignorecase is context<rw> = $+ignorecase // 0; 3934 <EXPR> 3935 } 3936 3937 token termish { 3938 <.ws> # XXX assuming old /x here? 3939 <quantified_atom>+ 3940 } 3941 token infixish { 3942 <!infixstopper> 3943 <!stdstopper> 3944 <rxinfix> 3945 { 3946 $<O> = $<rxinfix><O>; 3947 $<sym> = $<rxinfix><sym>; 3948 } 3949 } 3950 3951 token rxinfix:sym<|> ( --> Junctive_or ) { <sym> } 3952 3953 token quantified_atom { 3954 <!stopper> 3955 <!rxinfix> 3956 <atom> 3957 [ <.ws> <quantifier> 3958 # <?{ $<atom>.max_width }> 3959 # || <.panic: "Can't quantify zero-width atom"> 3960 ]? 3961 <.ws> 3962 } 3963 3964 token atom { 3965 [ 3966 | \w 3967 | <metachar> 3968 | '\\' :: . 3969 ] 3970 } 3971 3972 # sequence stoppers 3973 token metachar:sym<|> { '|' :: <fail> } 3974 token metachar:sym<)> { ')' :: <fail> } 3975 3976 token metachar:quant { <quantifier> <.panic: "quantifier quantifies nothing"> } 3977 3978 # "normal" metachars 3979 3980 token metachar:sym<[ ]> { 3981 <before '['> <quibble($¢.cursor_fresh( ::STD::Q ).tweak(:q))> # XXX parse as q[] for now 3982 } 3983 3984 token metachar:sym«(? )» { 3985 '(?' {} <assertion> 3986 [ ')' || <.panic: "Perl 5 regex assertion not terminated by parenthesis"> ] 3987 } 3988 3989 token metachar:sym<( )> { 3990 '(' {} [:lang(self.unbalanced(')')) <nibbler>] 3991 [ ')' || <.panic: "Unable to parse Perl 5 regex; couldn't find right parenthesis"> ] 3992 { $/<sym> := <( )> } 3993 } 3994 3995 token metachar:sym<\\> { <sym> <backslash> } 3996 token metachar:sym<.> { <sym> } 3997 token metachar:sym<^> { <sym> } 3998 token metachar:sym<$> { 3999 '$' <?before \W | $> 4000 } 4001 4002 token metachar:var { 4003 <?before <sigil>\w> 4004 <.panic: "Can't interpolate variable in Perl 5 regex"> 4005 } 4006 4007 token backslash:A { <sym> } 4008 token backslash:a { <sym> } 4009 token backslash:b { :i <sym> } 4010 token backslash:c { :i <sym> 4011 <[ ?.._ ]> || <.panic: "Unrecognized \\c character"> 4012 } 4013 token backslash:d { :i <sym> } 4014 token backslash:e { :i <sym> } 4015 token backslash:f { :i <sym> } 4016 token backslash:h { :i <sym> } 4017 token backslash:l { :i <sym> } 4018 token backslash:n { :i <sym> } 4019 token backslash:o { '0' [ <octint> | '{'<octint>[','<octint>]*'}' ]? } 4020 token backslash:Q { <sym> } 4021 token backslash:r { :i <sym> } 4022 token backslash:s { :i <sym> } 4023 token backslash:t { :i <sym> } 4024 token backslash:u { :i <sym> } 4025 token backslash:v { :i <sym> } 4026 token backslash:w { :i <sym> } 4027 token backslash:x { :i <sym> [ <hexint> | '{' [<.ws><hexint><.ws> ] ** ',' '}' ] } 4028 token backslash:z { :i <sym> } 4029 token backslash:misc { $<litchar>=(\W) } 4030 token backslash:oops { <.panic: "Unrecognized Perl 5 regex backslash sequence"> } 4031 4032 token assertion:sym<?> { <sym> <codeblock> } 4033 token assertion:sym<{ }> { <codeblock> } 4034 4035 token assertion:sym«<» { <sym> <?before '=' | '!'> <assertion> } 4036 token assertion:sym<=> { <sym> [ <?before ')'> | <rx> ] } 4037 token assertion:sym<!=> { <sym> [ <?before ')'> | <rx> ] } 4038 token assertion:sym«>» { <sym> <rx> } 4039 4040 token rx { 4041 [:lang(self.unbalanced(')')) <nibbler>] 4042 [ ')' || <.panic: "Unable to parse Perl 5 regex; couldn't find right parenthesis"> ] 4043 } 4044 4045 token assertion:identifier { <identifier> [ # is qq right here? 4046 | <?before ')' > 4047 | <.ws> <nibbler> 4048 ] 4049 [ ':' <rx> ]? 4050 } 4051 4052 token assertion:bogus { <.panic: "Unrecognized Perl 5 regex assertion"> } 4053 4054 token quantifier:sym<*> { <sym> <quantmod> } 4055 token quantifier:sym<+> { <sym> <quantmod> } 4056 token quantifier:sym<?> { <sym> <quantmod> } 4057 token quantifier:sym<{ }> { '{' \d+ [','\d*]? '}' <quantmod> } 4058 4059 token quantmod { [ '?' | '+' ]? } 3885 4060 3886 4061 } # end grammar
