root/src/perl6/STD.pm

Revision 23048, 119.7 kB (checked in by lwall, 4 hours ago)

[gimme5] switch back to YAML::Syck till YAML::XS is fixed
[viv] allow input from stdin
[STD] always report worries for now

  • Property svn:mime-type set to text/plain; charset=UTF-8
  • Property svn:eol-style set to native
Line 
1grammar STD:ver<6.0.0.alpha>:auth<http://perl.org>;
2
3# should some of these be parser instance attributes?
4my $LANG is context;
5my $PKGDECL is context = "";
6my $PKG is context = "";
7my @PKGS;
8my $GOAL is context = "(eof)";
9my $PARSER is context<rw>;
10my $ACTIONS is context<rw>;
11my $IN_DECL is context<rw>;
12my %ROUTINES;
13my $ORIG is context;
14my @MEMOS is context;
15my $VOID is context<rw>;
16my @PADS;
17
18# random rule for debugging, please ignore
19token foo {
20   'foo' <.ws> 'bar' <.ws> 'baz'
21}
22
23=begin comment overview
24
25This file is designed to be either preprocessed into a grammar with
26action statements or used as-is without any preprocessing.  The {*}
27notation is a no-op action block, but can be identified uniquely via a
28combination of the preceding token or rule name plus any additional text
29following a #= comment.  We put this into a comment rather than using
30a macro so that bootstrap compilers don't have to worry about macros
31yet, and to keep the main grammar relatively uncluttered by action
32statements.  Note that the preprocessor can certainly generate accesses
33to the match state within the action block, so we need not mention it
34explicitly.
35
36Also, some rules are named by syntactic category plus an additonal symbol
37specified in adverbial form, either in bare :name form or in :sym<name>
38form.  (It does not matter which form you use for identifier symbols,
39except that to specify a symbol "sym" you must use the :sym<sym> form
40of adverb.)  If you use the <sym> rule within the rule, it will parse the
41symbol at that point.  At the final reduction point of a rule, if $sym
42has been set, that is used as the final symbol name for the rule.  This
43need not match the symbol specified as part the rule name; that is just
44for disambiguating the name.  However, if no $sym is set, the original
45symbol will be used by default.
46
47Note that rules automatically get an implicit {*} at their return, so
48for the TOP rule the implicit action name is also simply "TOP".
49
50Another nod toward preprocessing is that blocks that contain nested braces
51are delimited by double braces so that the preprocessor does not need to
52understand Perl 6 code.
53
54This grammar relies on transitive longest-token semantics, though
55initially we made a feeble attempt to order rules so a procedural
56interpretation of alternation could usually produce a correct parse.
57(This is becoming less true over time.)
58
59=end comment overview
60
61method TOP ($STOP = undef) {
62    if defined $STOP {
63        my $GOAL is context = $STOP;
64        self.unitstop($STOP).comp_unit;
65    }
66    else {
67        self.comp_unit;
68    }
69}
70
71
72# XXX shouldn't need this, it should all be defined/imported by the prelude
73
74my @basetypenames = qw[
75    Object Any Junction Whatever
76    Capture Match Signature Proxy Matcher
77    Package Module Class Role Grammar
78    Scalar Array Hash KeyHash KeySet KeyBag
79    Pair List Seq Range Set Bag Mapping
80    Void Undef Failure Exception
81    Code Block Routine Sub Macro
82    Method Submethod Regex
83
84    Str Blob
85    Char Byte Codepoint Grapheme StrPos StrLen Version
86
87    Num Complex
88    num complex
89
90    Int  int   int1  int2  int4 int8  int16  int32  int64
91    Rat  rat   rat1  rat2  rat4 rat8  rat16  rat32  rat64
92    UInt uint uint1 uint2 uint4 uint8 uint16 uint32 uint64
93    Buf  buf   buf1  buf2  buf4 buf8  buf16  buf32  buf64
94
95    Bit Bool
96    bit bool
97
98    Order Increasing Decreasing
99    Ordered Callable Positional Associatve
100    Ordering KeyExtractor Comparator OrderingPair
101
102    IO
103
104    KitchenSink
105];
106push @basetypenames, "True", "False", "Bool::True", "Bool::False";  # in quotes lest gimme5 translate them
107
108method is_type ($name) {
109    for reverse @PADS {
110        return True if $_.{$name};
111    }
112    return False;
113}
114
115method add_type ($name) {
116    my $typename = main::mangle($name);
117    my $qualname = ($+PKG // 'GLOBAL') ~ '::' ~ $typename;
118    @PADS[*-1]{$typename} = 'TYPE';
119    @PADS[*-1]{$qualname} = 'TYPE';
120    @PADS[*-1]{$name} = 'TYPE';
121}
122
123# XXX likewise for routine defs
124
125my @baseroutinenames = qw[
126    WHAT WHICH VAR
127    any all none one
128
129    die exit warn
130    caller want
131    eval evalfile
132    callsame callwith nextsame nextwith lastcall
133    defined undefine item list slice eager hyper
134
135    cat classify
136    quotemeta
137    chr ord
138    p5chop chop p5chomp chomp
139    index rindex substr
140    join split comb pack unpack
141    uc ucfirst lc lcfirst
142    normalize
143    nfc nfd nfkc nfkd
144    samecase sameaccent
145    capitalize
146    chars graphs codes bytes
147
148    say print open close printf sprintf slurp unlink link symlink
149    elems grep map first reduce sort uniq push reverse take splice
150
151    zip each roundrobin caller
152    return leave pop shift unshift reduce
153    keys values hash kv key value pairs pair
154
155    sign abs floor ceiling round truncate
156    exp log log10 sqrt roots
157    rand srand pick
158    cis unpolar
159
160    sin cos tan asin acos atan sec cosec cotan asec acosec
161    acotan sinh cosh tanh asinh acosh atanh sech cosech cotanh
162    asech acosech acotanh atan2
163
164    plan is ok dies_ok lives_ok skip todo pass flunk force_todo use_ok
165    isa_ok cmp_ok diag is_deeply isnt like skip_rest unlike nonce
166    skip_rest eval_dies_ok eval_lives_ok approx is_approx throws_ok version_lt
167
168    gmtime localtime time times
169    gethost getpw chroot getlogin
170    run runinstead
171    fork wait kill sleep
172];
173push @baseroutinenames, "HOW", "fail", "temp", "let";
174
175# please don't add: ref length bless delete exists
176
177my @routinenames;
178my %routinenames;
179
180sub init_pads {
181    @PKGS = ();
182    %ROUTINES = ();
183
184    @PADS = ();
185    @PADS[0] = {};
186    for @basetypenames {
187        @PADS[0]{$_} = 'TYPE';
188        @PADS[0]{'&' ~ $_} = 'CODE';
189    }
190    for @baseroutinenames {
191        @PADS[0]{'&' ~ $_} = 'CODE';
192    }
193}
194
195method is_routine ($name) {
196    my $aname;
197    if substr($name,0,1) eq '&' {
198        $aname = $name;
199    }
200    else {
201        $aname = '&' ~ $name;
202    }
203    for reverse @PADS {
204        return True if $_.{$aname};
205        return True if $_.{$name}; # type as routine?
206    }
207    return False;
208}
209
210method add_routine ($name) {
211    @PADS[*-1]{'&' ~ $name} = 'CODE';
212}
213
214# The internal precedence levels are *not* part of the public interface.
215# The current values are mere implementation; they may change at any time.
216# Users should specify precedence only in relation to existing levels.
217
218constant %term            = (:prec<z=>);
219constant %methodcall      = (:prec<y=>);
220constant %autoincrement   = (:prec<x=>);
221constant %exponentiation  = (:prec<w=>, :assoc<right>, :assign);
222constant %symbolic_unary  = (:prec<v=>);
223constant %multiplicative  = (:prec<u=>, :assoc<left>,  :assign);
224constant %additive        = (:prec<t=>, :assoc<left>,  :assign);
225constant %replication     = (:prec<s=>, :assoc<left>,  :assign);
226constant %concatenation   = (:prec<r=>, :assoc<left>,  :assign);
227constant %junctive_and    = (:prec<q=>, :assoc<list>,  :assign);
228constant %junctive_or     = (:prec<p=>, :assoc<list>,  :assign);
229constant %named_unary     = (:prec<o=>);
230constant %nonchaining     = (:prec<n=>, :assoc<non>);
231constant %chaining        = (:prec<m=>, :assoc<chain>, :bool);
232constant %tight_and       = (:prec<l=>, :assoc<left>,  :assign);
233constant %tight_or        = (:prec<k=>, :assoc<left>,  :assign);
234constant %conditional     = (:prec<j=>, :assoc<right>);
235constant %item_assignment = (:prec<i=>, :assoc<right>);
236constant %loose_unary     = (:prec<h=>);
237constant %comma           = (:prec<g=>, :assoc<list>, :nextterm<nulltermish>);
238constant %list_infix      = (:prec<f=>, :assoc<list>,  :assign);
239constant %list_assignment = (:prec<i=>, :sub<e=>, :assoc<right>);
240constant %list_prefix     = (:prec<e=>);
241constant %loose_and       = (:prec<d=>, :assoc<left>,  :assign);
242constant %loose_or        = (:prec<c=>, :assoc<left>,  :assign);
243constant %sequencer      = (:prec<b=>, :assoc<left>, :nextterm<statement>);
244constant %LOOSEST         = (:prec<a=!>);
245constant %terminator      = (:prec<a=>, :assoc<list>);
246
247# "epsilon" tighter than terminator
248#constant $LOOSEST = %LOOSEST<prec>;
249constant $LOOSEST = "a=!"; # XXX preceding line is busted
250
251
252role PrecOp {
253
254    # This is hopefully called on a match to mix in operator info by type.
255    method coerce (Match $m) {
256        # $m but= ::?CLASS;
257        my $var = self.WHAT ~ '::o';
258        my $d = %::($var);
259        if not $d<transparent> {
260            for keys(%$d) { $m<O>{$_} = $d.{$_} };
261            $m.deb("coercing to " ~ self) if $*DEBUG +& DEBUG::EXPR;
262        }
263        $m<O><kind> = self.WHAT;
264        return $m;
265    }
266
267} # end role
268
269class Hyper does PrecOp {
270 our %o = (:transparent);
271} # end class
272
273class Term does PrecOp {
274    our %o = %term;
275} # end class
276class Methodcall does PrecOp {
277    our %o = %methodcall;
278} # end class
279class Autoincrement does PrecOp {
280    our %o = %autoincrement;
281} # end class
282class Exponentiation does PrecOp {
283    our %o = %exponentiation;
284} # end class
285class Symbolic_unary does PrecOp {
286    our %o = %symbolic_unary;
287} # end class
288class Multiplicative does PrecOp {
289    our %o = %multiplicative;
290} # end class
291class Additive does PrecOp {
292    our %o = %additive;
293} # end class
294class Replication does PrecOp {
295    our %o = %replication;
296} # end class
297class Concatenation does PrecOp {
298    our %o = %concatenation;
299} # end class
300class Junctive_and does PrecOp {
301    our %o = %junctive_and;
302} # end class
303class Junctive_or does PrecOp {
304    our %o = %junctive_or;
305} # end class
306class Named_unary does PrecOp {
307    our %o = %named_unary;
308} # end class
309class Nonchaining does PrecOp {
310    our %o = %nonchaining;
311} # end class
312class Chaining does PrecOp {
313    our %o = %chaining;
314} # end class
315class Tight_and does PrecOp {
316    our %o = %tight_and;
317} # end class
318class Tight_or does PrecOp {
319    our %o = %tight_or;
320} # end class
321class Conditional does PrecOp {
322    our %o = %conditional;
323} # end class
324class Item_assignment does PrecOp {
325    our %o = %item_assignment;
326} # end class
327class Loose_unary does PrecOp {
328    our %o = %loose_unary;
329} # end class
330class Comma does PrecOp {
331    our %o = %comma;
332} # end class
333class List_infix does PrecOp {
334    our %o = %list_infix;
335} # end class
336class List_assignment does PrecOp {
337    our %o = %list_assignment;
338} # end class
339class List_prefix does PrecOp {
340    our %o = %list_prefix;
341} # end class
342class Loose_and does PrecOp {
343    our %o = %loose_and;
344} # end class
345class Loose_or does PrecOp {
346    our %o = %loose_or;
347} # end class
348class Sequencer does PrecOp {
349    our %o = %sequencer;
350} # end class
351class Terminator does PrecOp {
352    our %o = %terminator;
353} # end class
354
355# Categories are designed to be easily extensible in derived grammars
356# by merely adding more rules in the same category.  The rules within
357# a given category start with the category name followed by a differentiating
358# adverbial qualifier to serve (along with the category) as the longer name.
359
360# The endsym context, if specified, says what to implicitly check for in each
361# rule right after the initial <sym>.  Normally this is used to make sure
362# there's appropriate whitespace.  # Note that endsym isn't called if <sym>
363# isn't called.
364
365my $endsym is context = "null";
366my $endargs is context = -1;
367
368proto token category { <...> }
369
370token category:category { <sym> }
371
372token category:sigil { <sym> }
373proto token sigil { <...> }
374
375token category:twigil { <sym> }
376proto token twigil { <...> }
377
378token category:special_variable { <sym> }
379proto token special_variable { <...> }
380
381token category:version { <sym> }
382proto token version { <...> }
383
384token category:module_name { <sym> }
385proto token module_name { <...> }
386
387token category:term { <sym> }
388proto token term { <...> }
389
390token category:quote { <sym> }
391proto token quote () { <...> }
392
393token category:prefix { <sym> }
394proto token prefix is unary is defequiv(%symbolic_unary) { <...> }
395
396token category:infix { <sym> }
397proto token infix is binary is defequiv(%additive) { <...> }
398
399token category:postfix { <sym> }
400proto token postfix is unary is defequiv(%autoincrement) { <...> }
401
402token category:dotty { <sym> }
403proto token dotty (:$endsym is context = 'unspacey') { <...> }
404
405token category:circumfix { <sym> }
406proto token circumfix { <...> }
407
408token category:postcircumfix { <sym> }
409proto token postcircumfix is unary { <...> }  # unary as far as EXPR knows...
410
411token category:quote_mod { <sym> }
412proto token quote_mod { <...> }
413
414token category:trait_verb { <sym> }
415proto token trait_verb (:$endsym is context = 'spacey') { <...> }
416
417token category:trait_auxiliary { <sym> }
418proto token trait_auxiliary (:$endsym is context = 'spacey') { <...> }
419
420token category:type_declarator { <sym> }
421proto token type_declarator () { <...> }
422
423token category:scope_declarator { <sym> }
424proto token scope_declarator () { <...> }
425
426token category:package_declarator { <sym> }
427proto token package_declarator () { <...> }
428
429token category:multi_declarator { <sym> }
430proto token multi_declarator () { <...> }
431
432token category:routine_declarator { <sym> }
433proto token routine_declarator () { <...> }
434
435token category:regex_declarator { <sym> }
436proto token regex_declarator () { <...> }
437
438token category:statement_prefix { <sym> }
439proto rule  statement_prefix () { <...> }
440
441token category:statement_control { <sym> }
442proto rule  statement_control (:$endsym is context = 'spacey') { <...> }
443
444token category:statement_mod_cond { <sym> }
445proto rule  statement_mod_cond () { <...> }
446
447token category:statement_mod_loop { <sym> }
448proto rule  statement_mod_loop () { <...> }
449
450token category:infix_prefix_meta_operator { <sym> }
451proto token infix_prefix_meta_operator is binary { <...> }
452
453token category:infix_postfix_meta_operator { <sym> }
454proto token infix_postfix_meta_operator ($op) is binary { <...> }
455
456token category:infix_circumfix_meta_operator { <sym> }
457proto token infix_circumfix_meta_operator is binary { <...> }
458
459token category:postfix_prefix_meta_operator { <sym> }
460proto token postfix_prefix_meta_operator is unary { <...> }
461
462token category:prefix_postfix_meta_operator { <sym> }
463proto token prefix_postfix_meta_operator is unary { <...> }
464
465token category:prefix_circumfix_meta_operator { <sym> }
466proto token prefix_circumfix_meta_operator is unary { <...> }
467
468token category:terminator { <sym> }
469proto token terminator { <...> }
470
471token unspacey { <.unsp>? }
472token spacey { <?before \s | '#'> }
473
474# Lexical routines
475
476token ws {
477    :my @stub = return self if @+MEMOS[self.pos]<ws> :exists;
478    :my $startpos = self.pos;
479
480    :dba('whitespace')
481    [
482        | \h+ <![#\s\\]> { @+MEMOS[$¢.pos]<ws> = $startpos; }   # common case
483        | <?before \w> <?after \w> :::
484            { @+MEMOS[$startpos]<ws> = undef; }
485            <!>        # must \s+ between words
486    ]
487    ||
488    [
489    | <.unsp>
490    | <.vws> <.heredoc>
491    | <.unv>
492    | $ { $¢.moreinput }
493    ]*
494
495    {{
496        if ($¢.pos == $startpos) {
497            @+MEMOS[$¢.pos]<ws> = undef;
498        }
499        else {
500            @+MEMOS[$¢.pos]<ws> = $startpos;
501            @+MEMOS[$¢.pos]<endstmt> = @+MEMOS[$startpos]<endstmt>
502                if @+MEMOS[$startpos]<endstmt> :exists;
503        }
504    }}
505}
506
507token unsp {
508    \\ <?before [\s|'#'] >
509    :dba('unspace')
510    [
511    | <.vws>                     {*}                             #= vwhite
512    | <.unv>                  {*}                                #= unv
513    | $ { $¢.moreinput }
514    ]*
515}
516
517token vws {
518    :dba('vertical whitespace')
519    \v
520    [ '#DEBUG -1' { say "DEBUG"; $STD::DEBUG = $*DEBUG = -1; } ]?
521}
522
523# We provide two mechanisms here:
524# 1) define $+moreinput, or
525# 2) override moreinput method
526method moreinput () {
527    $+moreinput.() if $+moreinput;
528}
529
530token unv {
531   :dba('horizontal whitespace')
532   [
533   | \h+                 {*}                                    #= hwhite
534   | <?before '='> ^^ <.pod_comment>  {*}                    #= pod
535   | \h* '#' [
536         |  <?opener>
537            [ <!after ^^ . > || <.panic: "Can't use embedded comments in column 1"> ]
538            <.quibble($¢.cursor_fresh( ::STD::Q ))>   {*}                               #= embedded
539         | {} \N*            {*}                                 #= end
540         ]
541    ]
542}
543
544token ident {
545    <.alpha> \w*
546}
547
548token apostrophe {
549    <[ ' \- ]>
550}
551
552token identifier {
553    <.ident> [ <.apostrophe> <.ident> ]*
554}
555
556# XXX We need to parse the pod eventually to support $= variables.
557
558token pod_comment {
559    ^^ '=' <.unsp>?
560    [
561    | 'begin' \h+ <identifier> ::
562        [
563        ||  .*? "\n=" <.unsp>? 'end' \h+ $<identifier> » \N*          {*} #= tagged
564        ||  .*?                                                       {*} #= end
565        ]
566    | 'begin' » :: \h* [ $$ || '#' || <.panic: "Unrecognized token after =begin"> ]
567        [ .*?  "\n=" <.unsp>? 'end' » \N* || <.panic: "=begin without =end"> ]   {*}       #= anon
568    | ::
569        [ <?before .*? ^^ '=cut' » > <.panic: "Obsolete pod format, please use =begin/=end instead"> ]?
570        \N*                                           {*}       #= misc
571    ]
572}
573
574# Top-level rules
575
576# Note: we only check for the stopper.  We don't check for ^ because
577# we might be embedded in something else.
578rule comp_unit {
579    :my $begin_compunit is context = 1;
580    :my $endargs        is context<rw> = -1;
581
582    :my $LANG is context;
583    :my $PKGDECL is context = "";
584    :my $PKG is context = "";
585    :my $GOAL is context = "(eof)";
586    :my $PARSER is context<rw>;
587    :my $IN_DECL is context<rw>;
588
589    { init_pads(); }
590
591    <statementlist>
592    [ <?unitstopper> || <.panic: "Can't understand next input--giving up"> ]
593    # "CHECK" time...
594    {{
595        if @COMPILING::WORRIES {
596            warn "Potential difficulties:\n  " ~ join( "\n  ", @COMPILING::WORRIES) ~ "\n";
597        }
598
599        my %UNKNOWN;
600        for keys(%ROUTINES) {
601            next if $¢.is_routine($_);
602            %UNKNOWN{$_} = %ROUTINES{$_};
603        }
604        if %UNKNOWN {
605            warn "Unknown routines:\n";
606            for sort keys(%UNKNOWN) {
607                warn "\t$_ called at ", %UNKNOWN{$_}, "\n";
608            }
609        }
610    }}
611}
612
613# Note: because of the possibility of placeholders we can't determine arity of
614# the block syntactically, so this must be determined via semantic analysis.
615# Also, pblocks used in an if/unless statement do not treat $_ as a placeholder,
616# while most other blocks treat $_ as equivalent to $^x.  Therefore the first
617# possible place to check arity is not here but in the rule that calls this
618# rule.  (Could also be done in a later pass.)
619
620token pblock {
621    :dba('parameterized block')
622    [ <lambda> <signature> ]? <block>
623}
624
625token lambda { '->' | '<->' }
626
627# Look for an expression followed by a required lambda.
628token xblock {
629    :my $GOAL is context = '{';
630    <EXPR>
631    <.ws>
632    <pblock>
633}
634
635token block {
636    '{' ~ '}' <statementlist>
637
638    [
639    | <?before \h* $$>  # (usual case without comments)
640        { @+MEMOS[$¢.pos]<endstmt> = 2; } {*}                    #= endstmt simple
641    | \h* <.unsp>? <?before <[,:]>> {*}                         #= normal
642    | <.unv>? $$
643        { @+MEMOS[$¢.pos]<endstmt> = 2; } {*}                    #= endstmt complex
644    | <.unsp>? { @+MEMOS[$¢.pos]<endargs> = 1; } {*}             #= endargs
645    ]
646}
647
648token regex_block {
649    :my $lang = ::Regex;
650    :my $GOAL is context = '}';
651
652    [ <quotepair> <.ws>
653        {
654            my $kv = $<quotepair>[*-1];
655            $lang = $lang.tweak($kv.<k>, $kv.<v>)
656                or self.panic("Unrecognized adverb :" ~ $kv.<k> ~ '(' ~ $kv.<v> ~ ')');
657        }
658    ]*
659
660    '{'
661    <nibble( $¢.cursor_fresh($lang).unbalanced('}') )>
662    [ '}' || <.panic: "Unable to parse regex; couldn't find right brace"> ]
663
664    [
665    | <?before \h* $$>  # (usual case without comments)
666        { @+MEMOS[$¢.pos]<endstmt> = 2; } {*}                    #= endstmt simple
667    | \h* <.unsp>? <?before <[,:]>> {*}                         #= normal
668    | <.unv>? $$
669        { @+MEMOS[$¢.pos]<endstmt> = 2; } {*}                    #= endstmt complex
670    | <.unsp>? { @+MEMOS[$¢.pos]<endargs> = 1; }   {*}           #= endargs
671    ]
672}
673
674# statement semantics
675rule statementlist {
676    :my $PARSER is context<rw> = self;
677    :dba('statement list')
678    [
679    | $
680    | <?before <[\)\]\}]> >
681    | [<statement><eat_terminator> ]*
682    ]
683}
684
685# embedded semis, context-dependent semantics
686rule semilist {
687    :dba('semicolon list')
688    [
689    | <?before <[\)\]\}]> >
690    | [<statement><eat_terminator> ]*
691    ]
692}
693
694
695token label {
696    <identifier> ':' <?before \s> <.ws>
697
698    [ <?{ $¢.is_type($<identifier>.text) }>
699      <.panic("You tried to use an existing typename as a label")>
700#      <suppose("You tried to use an existing name $/{'identifier'} as a label")>
701    ]?
702
703    # add label as a pseudo type
704    {{ $¢.add_type($<identifier>.text); }}
705
706}
707
708token statement {
709    :my $endargs is context = -1;
710    <!before <[\)\]\}]> >
711
712    # this could either be a statement that follows a declaration
713    # or a statement that is within the block of a code declaration
714    <!!{ $¢ = $+PARSER.bless($¢); }>
715
716    [
717    | <label> <statement>                        {*}            #= label
718    | <statement_control>                        {*}            #= control
719    | <EXPR> {*}                                                #= expr
720        :dba('statement end')
721        [
722        || <?{ (@+MEMOS[$¢.pos]<endstmt> // 0) == 2 }>   # no mod after end-line curly
723        ||
724            :dba('statement modifier')
725            <.ws>
726            [
727            | <statement_mod_loop> {*}                              #= mod loop
728            | <statement_mod_cond> {*}                              #= mod cond
729                :dba('statement modifier loop')
730                [
731                || <?{ (@+MEMOS[$¢.pos]<endstmt> // 0) == 2 }>
732                || <.ws> <statement_mod_loop>? {*}                  #= mod condloop
733                ]
734            ]?
735        ]
736        {*}                                                     #= modexpr
737    | <?before ';'> {*}                                         #= null
738    ]
739}
740
741
742token eat_terminator {
743    [
744    || ';'
745    || <?{ @+MEMOS[$¢.pos]<endstmt> }> <.ws>
746    || <?terminator>
747    || $
748    || {{ if @+MEMOS[$¢.pos]<ws> { $¢.pos = @+MEMOS[$¢.pos]<ws>; } }}   # undo any line transition
749        <.panic: "Syntax error">
750    ]
751}
752
753token statement_control:use {
754    <sym> :s
755    [
756    | <version>
757    | <module_name><arglist>?
758        {{
759            my $longname = $<module_name><longname>;
760            $¢.add_type($longname.text);
761        }}
762    ]
763}
764
765
766token statement_control:no {
767    <sym> :s
768    <module_name><arglist>?
769}
770
771
772token statement_control:if {
773    <sym> :s
774    <xblock>
775    [$<elsif> = (
776        'elsif'<?spacey> <xblock>       {*}                #= elsif
777    )]*
778    [$<else> = (
779        'else'<?spacey> <pblock>       {*}             #= else
780    )]?
781}
782
783
784token statement_control:unless {
785    <sym> :s
786    <xblock>
787    [ <!before 'else'> || <.panic: "unless does not take \"else\" in Perl 6; please rewrite using \"if\""> ]
788}
789
790
791token statement_control:while {
792    <sym> :s
793    [ <?before '(' ['my'? '$'\w+ '=']? '<' '$'?\w+ '>' ')'>   #'
794        <.panic: "This appears to be Perl 5 code"> ]?
795    <xblock>
796}
797
798
799token statement_control:until {
800    <sym> :s
801    <xblock>
802}
803
804
805token statement_control:repeat {
806    <sym> :s
807    [
808        | ('while'|'until')
809          <xblock>
810        | <block>                      {*}                      #= block wu
811          ('while'|'until') <EXPR>         {*}                      #= expr wu
812    ]
813}
814
815
816token statement_control:loop {
817    <sym> :s
818    $<eee> = (
819        '('
820            <e1=EXPR>? ';'   {*}                            #= e1
821            <e2=EXPR>? ';'   {*}                            #= e2
822            <e3=EXPR>?       {*}                            #= e3
823        ')'                      {*}                            #= eee
824    )?
825    <block>                     {*}                             #= block
826}
827
828
829token statement_control:for {
830    <sym> :s
831    [ <?before 'my'? '$'\w+ '(' >
832        <.panic: "This appears to be Perl 5 code"> ]?
833    <xblock>
834}
835
836token statement_control:given {
837    <sym> :s
838    <xblock>
839}
840token statement_control:when {
841    <sym> :s
842    <xblock>
843}
844rule statement_control:default {<sym> <block> }
845
846rule statement_control:BEGIN   {<sym> <block> }
847rule statement_control:CHECK   {<sym> <block> }
848rule statement_control:INIT    {<sym> <block> }
849rule statement_control:END     {<sym> <block> }
850rule statement_control:START   {<sym> <block> }
851rule statement_control:ENTER   {<sym> <block> }
852rule statement_control:LEAVE   {<sym> <block> }
853rule statement_control:KEEP    {<sym> <block> }
854rule statement_control:UNDO    {<sym> <block> }
855rule statement_control:FIRST   {<sym> <block> }
856rule statement_control:NEXT    {<sym> <block> }
857rule statement_control:LAST    {<sym> <block> }
858rule statement_control:PRE     {<sym> <block> }
859rule statement_control:POST    {<sym> <block> }
860rule statement_control:CATCH   {<sym> <block> }
861rule statement_control:CONTROL {<sym> <block> }
862rule statement_control:TEMP    {<sym> <block> }
863
864rule term:BEGIN   {<sym> <block> }
865rule term:CHECK   {<sym> <block> }
866rule term:INIT    {<sym> <block> }
867rule term:START   {<sym> <block> }
868rule term:ENTER   {<sym> <block> }
869rule term:FIRST   {<sym> <block> }
870
871rule modifier_expr { <EXPR> }
872
873rule statement_mod_cond:if     {<sym> <modifier_expr> {*} }     #= if
874rule statement_mod_cond:unless {<sym> <modifier_expr> {*} }     #= unless
875rule statement_mod_cond:when   {<sym> <modifier_expr> {*} }     #= when
876
877rule statement_mod_loop:while {<sym> <modifier_expr> {*} }      #= while
878rule statement_mod_loop:until {<sym> <modifier_expr> {*} }      #= until
879
880rule statement_mod_loop:for   {<sym> <modifier_expr> {*} }      #= for
881rule statement_mod_loop:given {<sym> <modifier_expr> {*} }      #= given
882
883token module_name:normal {
884    <longname>
885    [ <?{ ($+PKGDECL//'') eq 'role' }> <?before '['> <postcircumfix> ]?
886}
887
888token module_name:deprecated { 'v6-alpha' }
889
890token vnum {
891    \d+ | '*'
892}
893
894token version:sym<v> {
895    'v' <?before \d> :: <vnum> ** '.' '+'?
896}
897
898###################################################
899
900token PRE {
901    :dba('prefix or meta-prefix')
902    [
903    | <prefix>
904        { $<O> = $<prefix><O>; $<sym> = $<prefix><sym> }
905                                                    {*}         #= prefix
906    | <prefix_circumfix_meta_operator>
907        { $<O> = $<prefix_circumfix_meta_operator><O>; $<sym> = $<prefix_circumfix_meta_operator>.text }
908                                                    {*}         #= precircum
909    ]
910    # XXX assuming no precedence change
911   
912    <prefix_postfix_meta_operator>*                 {*}         #= prepost
913    <.ws>
914}
915
916# (for when you want to tell EXPR that infix already parsed the term)
917token nullterm {
918    <?>
919}
920
921token nulltermish {
922    :dba('null term')
923    [
924    | <?stdstopper>
925    | <termish>?
926    ]
927}
928
929token termish {
930    :dba('prefix or noun')
931    [
932    | <PRE>+ <noun>
933    | <noun>
934    ]
935
936    # also queue up any postfixes
937    :dba('postfix')
938    [ <?stdstopper> ||
939        <POST>*
940    ]
941}
942
943token noun {
944    [
945    | <fatarrow>
946    | <variable> { $<SIGIL> = $<variable><sigil> }
947    | <package_declarator>
948    | <scope_declarator>
949    | <?before 'multi'|'proto'|'only'> <multi_declarator>
950    | <routine_declarator>
951    | <regex_declarator>
952    | <type_declarator>
953    | <circumfix>
954    | <dotty>
955    | <value>
956    | <capterm>
957    | <sigterm>
958    | <term>
959    | <statement_prefix>
960    | [ <colonpair> <.ws> ]+
961    ]
962}
963
964
965token fatarrow {
966    <key=identifier> \h* '=>' <.ws> <val=EXPR(item %item_assignment)>
967}
968
969token colonpair {
970    :my $key;
971    :my $value;
972
973    ':'
974    :dba('colon pair')
975    [
976    | '!' <identifier>
977        { $key = $<identifier>.text; $value = 0; }
978        {*}                                                     #= false
979    | $<num> = [\d+] <identifier>
980    | <identifier>
981        { $key = $<identifier>.text; }
982        [
983        || <.unsp>? '.'? <postcircumfix> { $value = $<postcircumfix>; }
984        || { $value = 1; }
985        ]
986        {*}                                                     #= value
987    | :dba('signature') '(' ~ ')' <signature>
988    | <postcircumfix>
989        { $key = ""; $value = $<postcircumfix>; }
990        {*}                                                     #= structural
991    | $<var> = (<sigil> {} <twigil>? <desigilname>)
992        { $key = $<var><desigilname>.text; $value = $<var>; }
993        {*}                                                     #= varname
994    ]
995    { $<k> = $key; $<v> = $value; }
996}
997
998token quotepair {
999    :my $key;
1000    :my $value;
1001
1002    ':'
1003    :dba('colon pair (restricted)')
1004    [
1005    | '!' <identifier>
1006        { $key = $<identifier>.text; $value = 0; }
1007        {*}                                                     #= false
1008    | <identifier>
1009        { $key = $<identifier>.text; }
1010        [
1011        || <.unsp>? '.'? <?before '('> <postcircumfix> { $value = $<postcircumfix>; }
1012        || { $value = 1; }
1013        ]
1014        {*}                                                     #= value
1015    | $<n>=(\d+) $<id>=(<[a..z]>+)
1016        { $key = $<id>.text; $value = $<n>.text; }
1017        {*}                                                     #= nth
1018    ]
1019    { $<k> = $key; $<v> = $value; }
1020}
1021
1022token infixish {
1023    <!stdstopper>
1024    <!infixstopper>
1025    :dba('infix or meta-infix')
1026    [
1027    | <colonpair> {
1028            $<fake> = 1;
1029            $<sym> = ':';
1030            %<O><prec> = %loose_unary<prec>;
1031            %<O><assoc> = 'left';
1032        }
1033    | <infix>
1034           { $<O> = $<infix>.<O>; $<sym> = $<infix>.<sym>; }
1035    | <infix_prefix_meta_operator>
1036        { $<O> = $<infix_prefix_meta_operator><O>;
1037          $<sym> = $<infix_prefix_meta_operator><sym>; }
1038    | <infix_circumfix_meta_operator>
1039        { $<O> = $<infix_circumfix_meta_operator><O>;
1040          $<sym> = $<infix_circumfix_meta_operator><sym>; }
1041    | <infix> <?before '='> <infix_postfix_meta_operator($<infix>)>
1042           { $<O> = $<infix_postfix_meta_operator>.<O>; $<sym> = $<infix_postfix_meta_operator>.<sym>; }
1043    ]
1044}
1045
1046# doing fancy as one rule simplifies LTM
1047token dotty:sym<.*> ( --> Methodcall) {
1048    ('.' [ <[+*?=:]> | '^' '!'? ]) :: <.unspacey> <dottyop>
1049    { $<sym> = $0.item; }
1050}
1051
1052token dotty:sym<.> ( --> Methodcall) {
1053    <sym> <dottyop>
1054}
1055
1056token privop ( --> Methodcall) {
1057    '!' <methodop>
1058}
1059
1060token dottyop {
1061    :dba('dotty method or postfix')
1062    [
1063    | <methodop>
1064    | <postop>     # forcing postop's precedence to methodcall here
1065    ]
1066}
1067
1068# Note, this rule mustn't do anything irreversible because it's used
1069# as a lookahead by the quote interpolator.
1070
1071token POST {
1072    <!stdstopper>
1073
1074    # last whitespace didn't end here
1075    <!{ @+MEMOS[$¢.pos]<ws> }>
1076
1077    [ <.unsp> | '\\' <?before '.'> ]?
1078
1079    [ ['.' <.unsp>?]? <postfix_prefix_meta_operator> <.unsp>? ]*
1080
1081    :dba('postfix')
1082    [
1083    | <dotty>  { $<O> = $<dotty><O> }
1084    | <privop> { $<O> = $<privop><O> }
1085    | <postop> { $<O> = $<postop><O> }
1086    ]
1087}
1088
1089regex prefix_circumfix_meta_operator:reduce (--> List_prefix) {
1090    $<s> = (
1091        '['
1092        [
1093        | <op=infix> ']' ['«'|<?>]
1094        | <op=infix_prefix_meta_operator> ']' ['«'|<?>]
1095        | <op=infix_circumfix_meta_operator> ']' ['«'|<?>]
1096        | \\<op=infix> ']' ['«'|<?>]
1097        | \\<op=infix_prefix_meta_operator> ']' ['«'|<?>]
1098        | \\<op=infix_circumfix_meta_operator> ']' ['«'|<?>]
1099        ]
1100    ) <?before \s | '(' >
1101
1102    { $<O> = $<s><op><O>; $<sym> = $<s>.text; }
1103
1104    [ <!{ $<O><assoc> eq 'non' }>
1105        || <.panic: "Can't reduce a non-associative operator"> ]
1106
1107    [ <!{ $<O><prec> eq %conditional<prec> }>
1108        || <.panic: "Can't reduce a conditional operator"> ]
1109
1110    { $<O><assoc> = 'unary'; }
1111
1112}
1113
1114token prefix_postfix_meta_operator:sym< « >    { <sym> | '<<' }
1115
1116token postfix_prefix_meta_operator:sym< » >    { <sym> | '>>' }
1117
1118token infix_prefix_meta_operator:sym<!> ( --> Chaining) {
1119    <sym> <!before '!'> <infix>
1120
1121    <!!{ $<O> = $<infix><O>; }>
1122    <!!lex1: 'negation'>
1123
1124    [
1125    || <!!{ $<O><assoc> eq 'chain'}>
1126    || <!!{ $<O><assoc> and $<O><bool> }>
1127    || <.panic: "Only boolean infix operators may be negated">
1128    ]
1129
1130    <!{ $<O><hyper> and $¢.panic("Negation of hyper operator not allowed") }>
1131
1132}
1133
1134method lex1 (Str $s) {
1135    self.<O>{$s}++ and self.panic("Nested $s metaoperators not allowed");
1136    self;
1137}
1138
1139token infix_circumfix_meta_operator:sym<X X> ( --> List_infix) {
1140    X <infix> X
1141    <!!{ $<O> = $<infix><O>; }>
1142    <!!lex1: 'cross'>
1143}
1144
1145token infix_circumfix_meta_operator:sym<« »> ( --> Hyper) {
1146    [
1147    | '«' <infix> [ '«' | '»' ]
1148    | '»' <infix> [ '«' | '»' ]
1149    | '<<' <infix> [ '<<' | '>>' ]
1150    | '>>' <infix> [ '<<' | '>>' ]
1151    ]
1152    <!!{ $<O> := $<infix><O>; }>
1153    <!!lex1: 'hyper'>
1154}
1155
1156token infix_postfix_meta_operator:sym<=> ($op --> Item_assignment) {
1157    '='
1158    { $<O> = $op<O>; }
1159    <?lex1: 'assignment'>
1160
1161    [ <?{ ($<O><assoc> // '') eq 'chain' }> <.panic: "Can't make assignment op of boolean operator"> ]?
1162    [ <?{ ($<O><assoc> // '') eq 'non'   }> <.panic: "Can't make assignment op of non-associative operator"> ]?
1163}
1164
1165token postcircumfix:sym<( )> ( --> Methodcall)
1166    { :dba('argument list') '(' ~ ')' <semilist> }
1167
1168token postcircumfix:sym<[ ]> ( --> Methodcall)
1169    { :dba('subscript') '[' ~ ']' <semilist> }
1170
1171token postcircumfix:sym<{ }> ( --> Methodcall)
1172    { :dba('subscript') '{' ~ '}' <semilist> }
1173
1174token postcircumfix:sym«< >» ( --> Methodcall)
1175    { '<' <nibble($¢.cursor_fresh( ::STD::Q ).tweak(:q).tweak(:w).balanced('<','>'))> [ '>' || <.panic: "Unable to parse quote-words subscript; couldn't find right angle quote"> ] }
1176
1177token postcircumfix:sym«<< >>» ( --> Methodcall)
1178    { '<<' <nibble($¢.cursor_fresh( ::STD::Q ).tweak(:qq).tweak(:ww).balanced('<<','>>'))> [ '>>' || <.panic: "Unable to parse quote-words subscript; couldn't find right double-angle quote"> ] }
1179
1180token postcircumfix:sym<« »> ( --> Methodcall)
1181    { '«' <nibble($¢.cursor_fresh( ::STD::Q ).tweak(:qq).tweak(:ww).balanced('«','»'))> [ '»' || <.panic: "Unable to parse quote-words subscript; couldn't find right double-angle quote"> ] }
1182
1183token postop {
1184    | <postfix>         { $<O> := $<postfix><O> }
1185    | <postcircumfix>   { $<O> := $<postcircumfix><O> }
1186}
1187
1188token methodop {
1189    [
1190    | <longname>
1191    | <?before '$' | '@' > <variable>
1192    | <?before <[ ' " ]> > <quote>
1193        { $<quote> ~~ /\W/ or $¢.panic("Useless use of quotes") }
1194    ] <.unsp>?
1195
1196    :dba('method arguments')
1197    [
1198    | '.'? <.unsp>? '(' ~ ')' <semilist>
1199    | ':' <?before \s> <!{ $+inquote }> <arglist>
1200    ]?
1201}
1202
1203token arglist {
1204    :my StrPos $endargs is context<rw> = 0;
1205    :my $GOAL is context = 'endargs';
1206    <.ws>
1207    :dba('argument list')
1208    [
1209    | <?stdstopper>
1210    | <EXPR(item %list_prefix)>
1211    ]
1212}
1213
1214token circumfix:sym<{ }> ( --> Term) {
1215    <?before '{' | <lambda> > <pblock>
1216}
1217
1218token variable_declarator {
1219    :my $IN_DECL is context<rw> = 1;
1220    <variable> { $<SIGIL> = $<variable><sigil> }
1221    { $IN_DECL = 0; }
1222    [   # Is it a shaped array or hash declaration?
1223      #  <?{ $<sigil> eq '@' | '%' }>
1224        <.unsp>?
1225        $<shape> = [
1226        | '(' ~ ')' <signature>
1227        | :dba('shape definition') '[' ~ ']' <semilist>
1228        | :dba('shape definition') '{' ~ '}' <semilist>
1229        | <?before '<'> <postcircumfix>
1230        ]*
1231    ]?
1232    <.ws>
1233
1234    <trait>*
1235
1236    <post_constraint>*
1237}
1238
1239rule scoped {
1240    :dba('scoped declarator')
1241    [
1242    | <declarator> { $<SIGIL> = $<declarator><SIGIL> }
1243    | <regex_declarator>
1244    | <package_declarator>
1245    | <fulltypename>+ <multi_declarator>
1246    | <multi_declarator> { $<SIGIL> = $<multi_declarator><SIGIL> }
1247#    | <?before <[A..Z]> > <name> <.panic("Apparent type name " ~ $<name>.text ~ " is not declared yet")>
1248    ]
1249}
1250
1251
1252token scope_declarator:my       { <sym> <scoped> { $<SIGIL> = $<scoped><SIGIL> } }
1253token scope_declarator:our      { <sym> <scoped> { $<SIGIL> = $<scoped><SIGIL> } }
1254token scope_declarator:state    { <sym> <scoped> { $<SIGIL> = $<scoped><SIGIL> } }
1255token scope_declarator:constant { <sym> <scoped> { $<SIGIL> = $<scoped><SIGIL> } }
1256token scope_declarator:has      { <sym> <scoped> { $<SIGIL> = $<scoped><SIGIL> } }
1257
1258
1259token package_declarator:class {
1260    :my $PKGDECL is context = 'class';
1261    <sym> <package_def>
1262}
1263
1264token package_declarator:grammar {
1265    :my $PKGDECL is context = 'grammar';
1266    <sym> <package_def>
1267}
1268
1269token package_declarator:module {
1270    :my $PKGDECL is context = 'module';
1271    <sym> <package_def>
1272}
1273
1274token package_declarator:package {
1275    :my $PKGDECL is context = 'package';
1276    <sym> <package_def>
1277}
1278
1279token package_declarator:role {
1280    :my $PKGDECL is context = 'role';
1281    <sym> <package_def>
1282}
1283
1284token package_declarator:knowhow {
1285    :my $PKGDECL is context = 'knowhow';
1286    <sym> <package_def>
1287}
1288
1289token package_declarator:require {   # here because of declarational aspects
1290    <sym> <.ws>
1291    [
1292    || <module_name> <EXPR>?
1293    || <EXPR>
1294    ]
1295}
1296
1297token package_declarator:trusts {
1298    <sym> <.ws>
1299    <module_name>
1300}
1301
1302token package_declarator:does {
1303    <sym> <.ws>
1304    <typename>
1305}
1306
1307rule package_def {
1308    :my $longname;
1309    [
1310        <module_name>{
1311            $longname = $<module_name>[0]<longname>;
1312            $¢.add_type($longname.text);
1313        }
1314    ]?
1315    <trait>*
1316    [
1317       <?before '{'>
1318       {{
1319           # figure out the actual full package name (nested in outer package)
1320            my $pkg = $+PKG // "GLOBAL";
1321            push @PKGS, $pkg;
1322            if $longname {
1323                my $shortname = $longname.<name>.text;
1324                $+PKG = $pkg ~ '::' ~ $shortname;
1325            }
1326            else {
1327                $+PKG = $pkg ~ '::_anon_';
1328            }
1329        }}
1330        <block>
1331        {{
1332            $+PKG = pop(@PKGS);
1333        }}
1334        {*}                                                     #= block
1335    || <?{ $+begin_compunit }> {} <?before ';'>
1336        {
1337            $longname orelse $¢.panic("Compilation unit cannot be anonymous");
1338            my $shortname = $longname.<name>.text;
1339            $+PKG = $shortname;
1340            $+begin_compunit = 0;
1341        }
1342        {*}