| 1 | #!/usr/bin/perl -w |
|---|
| 2 | use strict; |
|---|
| 3 | use warnings; |
|---|
| 4 | =begin |
|---|
| 5 | |
|---|
| 6 | This is a deconstruction of src/perl6/STD.pm, |
|---|
| 7 | targeted at parser implementers. |
|---|
| 8 | |
|---|
| 9 | It is intended to |
|---|
| 10 | help clarify STD.pm, |
|---|
| 11 | help clarify what implementers need to do, |
|---|
| 12 | support the metaprograming of maintainable Perl 6 parsers. |
|---|
| 13 | |
|---|
| 14 | =cut |
|---|
| 15 | |
|---|
| 16 | =begin |
|---|
| 17 | |
|---|
| 18 | Categories (of tokens and rules) |
|---|
| 19 | |
|---|
| 20 | =cut |
|---|
| 21 | |
|---|
| 22 | sub categories { eval('qw{'.remove_comments(<<'END').'}'); } |
|---|
| 23 | category |
|---|
| 24 | special_variable |
|---|
| 25 | version module_name |
|---|
| 26 | |
|---|
| 27 | terminator |
|---|
| 28 | |
|---|
| 29 | sigil twigil |
|---|
| 30 | |
|---|
| 31 | term |
|---|
| 32 | |
|---|
| 33 | quote quote_mod q_backslash qq_backslash |
|---|
| 34 | |
|---|
| 35 | dotty |
|---|
| 36 | |
|---|
| 37 | infix prefix postfix circumfix postcircumfix |
|---|
| 38 | |
|---|
| 39 | infix_prefix_meta_operator |
|---|
| 40 | infix_postfix_meta_operator |
|---|
| 41 | infix_circumfix_meta_operator |
|---|
| 42 | prefix_postfix_meta_operator |
|---|
| 43 | prefix_circumfix_meta_operator |
|---|
| 44 | postfix_prefix_meta_operator |
|---|
| 45 | |
|---|
| 46 | trait_verb |
|---|
| 47 | trait_auxiliary |
|---|
| 48 | |
|---|
| 49 | type_declarator |
|---|
| 50 | scope_declarator |
|---|
| 51 | package_declarator |
|---|
| 52 | routine_declarator |
|---|
| 53 | plurality_declarator |
|---|
| 54 | |
|---|
| 55 | statement_prefix |
|---|
| 56 | statement_control |
|---|
| 57 | statement_mod_cond |
|---|
| 58 | statement_mod_loop |
|---|
| 59 | |
|---|
| 60 | regex_assertion |
|---|
| 61 | regex_backslash |
|---|
| 62 | regex_declarator |
|---|
| 63 | regex_metachar |
|---|
| 64 | regex_mod_internal |
|---|
| 65 | regex_quantifier |
|---|
| 66 | END |
|---|
| 67 | |
|---|
| 68 | =begin |
|---|
| 69 | |
|---|
| 70 | Categories - which are rules, not tokens |
|---|
| 71 | |
|---|
| 72 | =cut |
|---|
| 73 | |
|---|
| 74 | sub categories_which_are_rules_not_tokens { |
|---|
| 75 | qw( |
|---|
| 76 | statement_prefix |
|---|
| 77 | statement_control |
|---|
| 78 | statement_mod_cond |
|---|
| 79 | statement_mod_loop |
|---|
| 80 | ); |
|---|
| 81 | } |
|---|
| 82 | |
|---|
| 83 | =begin |
|---|
| 84 | |
|---|
| 85 | Categories with symbol constraints |
|---|
| 86 | |
|---|
| 87 | =cut |
|---|
| 88 | |
|---|
| 89 | sub category_symbol_constraints { |
|---|
| 90 | my %h = qw{ |
|---|
| 91 | quote nofat |
|---|
| 92 | dotty unspacey |
|---|
| 93 | trait_verb nofat_space |
|---|
| 94 | trait_auxiliary nofat_space |
|---|
| 95 | |
|---|
| 96 | type_declarator nofat |
|---|
| 97 | scope_declarator nofat |
|---|
| 98 | package_declarator nofat |
|---|
| 99 | routine_declarator nofat |
|---|
| 100 | |
|---|
| 101 | statement_prefix nofat |
|---|
| 102 | statement_control nofat_space |
|---|
| 103 | statement_mod_cond nofat |
|---|
| 104 | statement_mod_loop nofat |
|---|
| 105 | }; |
|---|
| 106 | \%h; |
|---|
| 107 | } |
|---|
| 108 | |
|---|
| 109 | =begin |
|---|
| 110 | |
|---|
| 111 | Operator precedence |
|---|
| 112 | |
|---|
| 113 | =cut |
|---|
| 114 | |
|---|
| 115 | sub precedence_table { <<'END'; } |
|---|
| 116 | hyper transparent |
|---|
| 117 | term z= |
|---|
| 118 | methodcall y= |
|---|
| 119 | autoincrement x= |
|---|
| 120 | exponentiation w= right assign |
|---|
| 121 | symbolic_unary v= |
|---|
| 122 | multiplicative u= left assign |
|---|
| 123 | additive t= left assign |
|---|
| 124 | replication s= left assign |
|---|
| 125 | concatenation r= left assign |
|---|
| 126 | junctive_and q= list assign |
|---|
| 127 | junctive_or p= list assign |
|---|
| 128 | named_unary o= |
|---|
| 129 | nonchaining n= non |
|---|
| 130 | chaining m= chain bool |
|---|
| 131 | tight_and l= left assign |
|---|
| 132 | tight_or k= left assign |
|---|
| 133 | conditional j= right |
|---|
| 134 | item_assignment i= right |
|---|
| 135 | loose_unary h= |
|---|
| 136 | comma g= list |
|---|
| 137 | list_infix f= list assign |
|---|
| 138 | list_assignment i= right sub:e= |
|---|
| 139 | list_prefix e= |
|---|
| 140 | loose_and d= left assign |
|---|
| 141 | loose_or c= left assign |
|---|
| 142 | LOOSEST a=! |
|---|
| 143 | terminator a= list |
|---|
| 144 | END |
|---|
| 145 | #do_precedence_table(remove_comments($precedence_table)); |
|---|
| 146 | |
|---|
| 147 | sub precedence_aliases { |
|---|
| 148 | my %h=qw{ |
|---|
| 149 | prefix symbolic_unary |
|---|
| 150 | infix additive |
|---|
| 151 | postfix autoincrement |
|---|
| 152 | }; |
|---|
| 153 | \%h; |
|---|
| 154 | } |
|---|
| 155 | |
|---|
| 156 | =begin |
|---|
| 157 | |
|---|
| 158 | Very simple tokens |
|---|
| 159 | |
|---|
| 160 | =cut |
|---|
| 161 | |
|---|
| 162 | sub very_simple_tokens_without_precedence {<<'END';} |
|---|
| 163 | |
|---|
| 164 | prefix_postfix_meta_operator « |
|---|
| 165 | prefix_postfix_meta_operator << |
|---|
| 166 | postfix_prefix_meta_operator » |
|---|
| 167 | postfix_prefix_meta_operator >> |
|---|
| 168 | |
|---|
| 169 | sigil $ @@ @ % & :: |
|---|
| 170 | twigil . ! ^ : * + ? = |
|---|
| 171 | quote_mod w ww x to s a h f c b |
|---|
| 172 | |
|---|
| 173 | regex_metachar . ^^ ^ $$ |
|---|
| 174 | qq_backslash \\ a b e f n r t 0 |
|---|
| 175 | regex_assertion . , |
|---|
| 176 | |
|---|
| 177 | regex_mod_internal :!i |
|---|
| 178 | |
|---|
| 179 | regex_metachar <( )> << >> « » |
|---|
| 180 | |
|---|
| 181 | END |
|---|
| 182 | |
|---|
| 183 | sub very_simple_tokens_with_precedence {<<'END';} |
|---|
| 184 | |
|---|
| 185 | term term self * |
|---|
| 186 | infix methodcall . |
|---|
| 187 | postfix methodcall -> |
|---|
| 188 | postfix autoincrement ++ -- i |
|---|
| 189 | prefix autoincrement ++ -- |
|---|
| 190 | infix exponentiation ** |
|---|
| 191 | prefix symbolic_unary ! + - ~ ? = * ** ~^ +^ ?^ ^ | |
|---|
| 192 | infix multiplicative * / % +& +< << >> +> ~&> ~< ~> |
|---|
| 193 | infix additive + - +| +^ ~| ~^ ?| ?^ |
|---|
| 194 | infix replication x xx |
|---|
| 195 | infix concatenation ~ |
|---|
| 196 | infix junctive_and & |
|---|
| 197 | infix junctive_or | ^ |
|---|
| 198 | prefix named_unary rand sleep abs |
|---|
| 199 | infix nonchaining <=> cmp is but does .. ^.. ..^ ^..^ ff ^ff ff^ ^ff^ fff ^fff fff^ ^fff^ |
|---|
| 200 | infix chaining == != < <= > >= ~~ !~ =~ eq ne lt le gt ge =:= === |
|---|
| 201 | infix tight_and && |
|---|
| 202 | infix tight_or || // |
|---|
| 203 | infix item_assignment := ::= |
|---|
| 204 | infix item_assignment .= |
|---|
| 205 | prefix loose_unary true not |
|---|
| 206 | infix comma , p5=> |
|---|
| 207 | infix list_infix X Z minmax |
|---|
| 208 | infix loose_and and andthen |
|---|
| 209 | infix loose_or or xor orelse |
|---|
| 210 | |
|---|
| 211 | END |
|---|
| 212 | |
|---|
| 213 | =begin |
|---|
| 214 | |
|---|
| 215 | Simple tokens |
|---|
| 216 | |
|---|
| 217 | =end |
|---|
| 218 | |
|---|
| 219 | |
|---|
| 220 | |
|---|
| 221 | |
|---|
| 222 | |
|---|
| 223 | |
|---|
| 224 | =begin |
|---|
| 225 | |
|---|
| 226 | Typenames kludge. |
|---|
| 227 | |
|---|
| 228 | =cut |
|---|
| 229 | |
|---|
| 230 | sub typenames { |
|---|
| 231 | qw( |
|---|
| 232 | Bit Int Str Num Complex Bool Rat |
|---|
| 233 | Exception Code Block List Seq Range Set Bag Junction Pair |
|---|
| 234 | Mapping Signature Capture Blob Whatever Undef Failure |
|---|
| 235 | StrPos StrLen Version P6opaque |
|---|
| 236 | bit int uint buf num complex bool rat |
|---|
| 237 | Scalar Array Hash KeyHash KeySet KeyBag Buf IO Routine Sub Method |
|---|
| 238 | Submethod Macro Regex Match Package Module Class Role Grammar Any Object |
|---|
| 239 | ); |
|---|
| 240 | } |
|---|
| 241 | |
|---|
| 242 | =begin |
|---|
| 243 | |
|---|
| 244 | A copy of the unicode bracket pairs. |
|---|
| 245 | |
|---|
| 246 | =cut |
|---|
| 247 | |
|---|
| 248 | sub open2close { |
|---|
| 249 | qw{ |
|---|
| 250 | 0028 0029 003C 003E 005B 005D 007B 007D 00AB 00BB 0F3A 0F3B |
|---|
| 251 | 0F3C 0F3D 169B 169C 2039 203A 2045 2046 207D 207E 208D 208E |
|---|
| 252 | 2208 220B 2209 220C 220A 220D 2215 29F5 223C 223D 2243 22CD |
|---|
| 253 | 2252 2253 2254 2255 2264 2265 2266 2267 2268 2269 226A 226B |
|---|
| 254 | 226E 226F 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 |
|---|
| 255 | 227A 227B 227C 227D 227E 227F 2280 2281 2282 2283 2284 2285 |
|---|
| 256 | 2286 2287 2288 2289 228A 228B 228F 2290 2291 2292 2298 29B8 |
|---|
| 257 | 22A2 22A3 22A6 2ADE 22A8 2AE4 22A9 2AE3 22AB 2AE5 22B0 22B1 |
|---|
| 258 | 22B2 22B3 22B4 22B5 22B6 22B7 22C9 22CA 22CB 22CC 22D0 22D1 |
|---|
| 259 | 22D6 22D7 22D8 22D9 22DA 22DB 22DC 22DD 22DE 22DF 22E0 22E1 |
|---|
| 260 | 22E2 22E3 22E4 22E5 22E6 22E7 22E8 22E9 22EA 22EB 22EC 22ED |
|---|
| 261 | 22F0 22F1 22F2 22FA 22F3 22FB 22F4 22FC 22F6 22FD 22F7 22FE |
|---|
| 262 | 2308 2309 230A 230B 2329 232A 23B4 23B5 2768 2769 276A 276B |
|---|
| 263 | 276C 276D 276E 276F 2770 2771 2772 2773 2774 2775 27C3 27C4 |
|---|
| 264 | 27C5 27C6 27D5 27D6 27DD 27DE 27E2 27E3 27E4 27E5 27E6 27E7 |
|---|
| 265 | 27E8 27E9 27EA 27EB 2983 2984 2985 2986 2987 2988 2989 298A |
|---|
| 266 | 298B 298C 298D 298E 298F 2990 2991 2992 2993 2994 2995 2996 |
|---|
| 267 | 2997 2998 29C0 29C1 29C4 29C5 29CF 29D0 29D1 29D2 29D4 29D5 |
|---|
| 268 | 29D8 29D9 29DA 29DB 29F8 29F9 29FC 29FD 2A2B 2A2C 2A2D 2A2E |
|---|
| 269 | 2A34 2A35 2A3C 2A3D 2A64 2A65 2A79 2A7A 2A7D 2A7E 2A7F 2A80 |
|---|
| 270 | 2A81 2A82 2A83 2A84 2A8B 2A8C 2A91 2A92 2A93 2A94 2A95 2A96 |
|---|
| 271 | 2A97 2A98 2A99 2A9A 2A9B 2A9C 2AA1 2AA2 2AA6 2AA7 2AA8 2AA9 |
|---|
| 272 | 2AAA 2AAB 2AAC 2AAD 2AAF 2AB0 2AB3 2AB4 2ABB 2ABC 2ABD 2ABE |
|---|
| 273 | 2ABF 2AC0 2AC1 2AC2 2AC3 2AC4 2AC5 2AC6 2ACD 2ACE 2ACF 2AD0 |
|---|
| 274 | 2AD1 2AD2 2AD3 2AD4 2AD5 2AD6 2AEC 2AED 2AF7 2AF8 2AF9 2AFA |
|---|
| 275 | 2E02 2E03 2E04 2E05 2E09 2E0A 2E0C 2E0D 2E1C 2E1D 3008 3009 |
|---|
| 276 | 300A 300B 300C 300D 300E 300F 3010 3011 3014 3015 3016 3017 |
|---|
| 277 | 3018 3019 301A 301B 301D 301E FD3E FD3F FE17 FE18 FE35 FE36 |
|---|
| 278 | FE37 FE38 FE39 FE3A FE3B FE3C FE3D FE3E FE3F FE40 FE41 FE42 |
|---|
| 279 | FE43 FE44 FE47 FE48 FE59 FE5A FE5B FE5C FE5D FE5E FF08 FF09 |
|---|
| 280 | FF1C FF1E FF3B FF3D FF5B FF5D FF5F FF60 FF62 FF63 |
|---|
| 281 | }; |
|---|
| 282 | } |
|---|
| 283 | {package O2cP6; |
|---|
| 284 | sub wrap{shift;'"\u'.$_[0].'"'} |
|---|
| 285 | sub link{shift;$_[0].' => '.$_[1]} |
|---|
| 286 | sub comma{shift;my $s=""; while(@_){$s.=join(", ",splice(@_,0,3)).",\n "}; $s} |
|---|
| 287 | sub context{shift;'constant %open2close = ('."\n ".$_[0].");\n"} |
|---|
| 288 | sub no_high_bit_codes{0} |
|---|
| 289 | sub code{ |
|---|
| 290 | my($cls)=@_; |
|---|
| 291 | $cls->context($cls->comma( |
|---|
| 292 | map{ |
|---|
| 293 | my($o,$c)=@$_; |
|---|
| 294 | (($cls->no_high_bit_codes and $o !~ /^00/) |
|---|
| 295 | ? () |
|---|
| 296 | : $cls->link(map{$cls->wrap($_)}($o,$c))); |
|---|
| 297 | } array_2_pairs(open2close()))); |
|---|
| 298 | } |
|---|
| 299 | } |
|---|
| 300 | |
|---|
| 301 | # Helper subs |
|---|
| 302 | sub remove_comments {my($s)=@_;$s=~s/\#.*//g;$s} |
|---|
| 303 | sub array_to_pairs {my @pairs; push(@pairs,[splice(@_,0,2)]) while @_; @pairs;} |
|---|