- Timestamp:
- 05/12/05 04:01:26 (4 years ago)
- svk:copy_cache_prev:
- 4536
- Location:
- src/pge
- Files:
-
- 4 modified
-
PGE.pir (modified) (1 diff)
-
PGE/Exp.pir (modified) (3 diffs)
-
PGE/P6Rule.pir (modified) (7 diffs)
-
README (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
src/pge/PGE.pir
r3027 r3029 18 18 .sub "__onload" @LOAD 19 19 .local pmc load 20 load_bytecode "Data/Escape.imc" 20 21 load = find_global "PGE::TokenHash", "__onload" 21 22 load() -
src/pge/PGE/Exp.pir
r3027 r3029 15 15 PGE::Dot - match any character 16 16 PGE::CCShortcut - character class shortcuts (\d, \D, \w, etc.) 17 PGE::CharClass - character classes (<[abcde]>, <-[abcde]>) 17 18 PGE::WS - <?ws> rule 18 19 PGE::Anchor - matching of ^, ^^, $, $$, \b, \B anchors … … 45 46 $P0 = subclass expclass, "PGE::Exp::Dot" 46 47 $P0 = subclass expclass, "PGE::Exp::CCShortcut" 48 $P0 = subclass expclass, "PGE::Exp::CharClass" 47 49 $P0 = subclass expclass, "PGE::Exp::WS" 48 50 $P0 = subclass expclass, "PGE::Exp::Anchor" … … 640 642 emit(code, " %s_f:", label) 641 643 emit(code, " goto fail") 644 .end 645 646 .namespace [ "PGE::Exp::CharClass" ] 647 648 # Note: The implementation interface for CharClass may change 649 # in the near future, so don't rely on this too heavily just yet. 650 # (pmichaud, 2005-05-11) 651 652 .sub gen method 653 .param pmc code 654 .param string label 655 .param string next 656 .local string token 657 .local int min, max, isgreedy, iscut 658 .local pmc emit 659 .local string charclass, charmatch 660 (min, max, isgreedy, iscut) = self."_getattributes"() 661 emit = find_global "PGE::Exp", "emit" 662 $P0 = find_global "Data::Escape", "String" 663 charclass = self["charclass"] 664 charclass = $P0(charclass, '"') 665 charmatch = self["charmatch"] 666 emit(code, "\n %s:", label) 667 emit(code, " rep = 0") 668 unless isgreedy goto lazy 669 emit(code, " %s_1:", label) 670 emit(code, " if pos >= lastpos goto %s_2", label) 671 emit(code, " if rep >= %d goto %s_2", max, label) 672 emit(code, " $S0 = substr target, pos, 1") 673 emit(code, " $I0 = index \"%s\", $S0", charclass) 674 emit(code, " %s $I0 == -1 goto %s_2", charmatch, label) 675 emit(code, " inc pos") 676 emit(code, " inc rep") 677 emit(code, " goto %s_1", label) 678 emit(code, " %s_2:", label) 679 emit(code, " if rep < %d goto fail", min) 680 unless iscut goto greedy_1 681 emit(code, " goto %s", next) 682 .return () 683 greedy_1: 684 emit(code, " if rep == %d goto %s", min, next) 685 self.emitsub(code, next, "pos", "rep") 686 emit(code, " dec pos") 687 emit(code, " dec rep") 688 emit(code, " goto %s_2", label) 689 .return () 690 lazy: 691 emit(code, " %s_0:", label) 692 emit(code, " if rep < %d goto %s_1", min, label) 693 unless iscut goto lazy_1 694 emit(code, " goto %s", next) 695 goto lazy_2 696 lazy_1: 697 emit(code, " if rep >= %d goto %s", max, next) 698 emit(code, " if pos > lastpos goto fail") 699 self.emitsub(code, next, "pos", "rep") 700 lazy_2: 701 emit(code, " %s_1:", label) 702 emit(code, " $S0 = substr target, pos, 1") 703 emit(code, " $I0 = index \"%s\", pos") 704 emit(code, " %s $I0 == -1 goto fail", charmatch) 705 emit(code, " inc rep") 706 emit(code, " inc pos") 707 emit(code, " goto %s_0", label) 642 708 .end 643 709 -
src/pge/PGE/P6Rule.pir
r3027 r3029 67 67 p6meta['$8'] = $P0 68 68 p6meta['$9'] = $P0 69 $P0 = find_global "PGE::P6Rule", "p6rule_parse_subrule" # XXX: TODO69 $P0 = find_global "PGE::P6Rule", "p6rule_parse_subrule" 70 70 p6meta['<'] = $P0 71 71 p6meta['>'] = u 72 $P0 = find_global "PGE::P6Rule", "p6rule_parse_c harclass"72 $P0 = find_global "PGE::P6Rule", "p6rule_parse_ccshortcut" 73 73 p6meta['\d'] = $P0 74 74 p6meta['\D'] = $P0 … … 79 79 p6meta['\n'] = $P0 80 80 p6meta['\N'] = $P0 81 $P0 = find_global "PGE::P6Rule", "p6rule_parse_charclass" 82 p6meta['<['] = $P0 83 p6meta['<-['] = $P0 84 p6meta['<+['] = $P0 81 85 .end 82 86 … … 438 442 439 443 440 =item C<p6rule_parse_c harclass(STR pattern, PMC lex)>444 =item C<p6rule_parse_ccshortcut(STR pattern, PMC lex)> 441 445 442 446 Parses a character class of some sort, including the \n, \N, \s, \S, … … 445 449 =cut 446 450 447 .sub p6rule_parse_c harclass451 .sub p6rule_parse_ccshortcut 448 452 .param string pattern 449 453 .param pmc lex … … 455 459 $I0 = length token 456 460 p6rule_parse_skip(pattern, lex, $I0) 461 .return (exp) 462 .end 463 464 =item C<p6rule_parse_charclass(STR pattern, PMC lex, STR token)> 465 466 Parse a character class in a rule expression. 467 468 Note: The interface for PGE::Exp::CharClass may change in the 469 near future, so don't rely on this code too strongly just yet. 470 (pmichaud, 2005-05-11) 471 472 =cut 473 474 .sub p6rule_parse_charclass 475 .param string pattern 476 .param pmc lex 477 .param string token 478 .local int pos, plen 479 .local string charclass 480 .local int range 481 .local pmc exp 482 pos = lex["pos"] 483 plen = lex["plen"] 484 $I0 = length token 485 pos += $I0 486 charclass = '' 487 range = 0 488 scan: 489 if pos >= plen goto no_close_err 490 $S0 = substr pattern, pos, 1 491 if $S0 == ']' goto end_class 492 if $S0 == '-' goto unescaped_hyphen 493 if $S0 == '.' goto start_range 494 unless $S0 == '\\' goto add_char 495 backslash: 496 inc pos 497 $S0 = substr pattern, pos, 1 498 $I0 = index "nrtfae0", $S0 499 if $I0 == -1 goto add_char 500 $S0 = substr "\n\r\t\f\a\e\0", $I0, 1 501 add_char: 502 inc pos 503 if range goto add_range 504 concat charclass, $S0 505 goto scan 506 add_range: 507 range = 0 508 $I2 = ord charclass, -1 509 $I0 = ord $S0 510 add_range_1: 511 inc $I2 512 if $I2 > $I0 goto scan 513 $S1 = chr $I2 514 concat charclass, $S1 515 goto add_range_1 516 start_range: 517 if range goto add_range 518 $S1 = substr pattern, pos, 2 519 unless $S1 == ".." goto add_char 520 pos += 2 521 range = 1 522 goto scan 523 end_class: 524 $S0 = substr pattern, pos, 2 525 unless $S0 == "]>" goto unescaped_bracket 526 pos += 2 527 lex["pos"] = pos 528 p6rule_parse_skip(pattern, lex, 0) 529 $P0 = find_global "PGE::Exp", "new" 530 exp = $P0("PGE::Exp::CharClass") 531 exp["charclass"] = charclass 532 $S0 = substr token, 1, 1 533 if $S0 == "-" goto charclass_negate 534 exp["charmatch"] = "if" 535 goto end 536 charclass_negate: 537 exp["charmatch"] = "unless" 538 goto end 539 unescaped_hyphen: 540 p6rule_parse_error(pattern, lex, "Unescaped '-' in charclass (use '..' or '\\-')") 541 goto end 542 no_close_err: 543 p6rule_parse_error(pattern, lex, "No closing ']>' for character class") 544 goto end 545 unescaped_bracket: 546 p6rule_parse_error(pattern, lex, "Unescaped ']' in character class") 547 goto end 548 end: 457 549 .return (exp) 458 550 .end … … 543 635 quant: # qexp is the atom to quant 544 636 quant_quest: 545 if c != '?'goto quant_plus637 if c != "?" goto quant_plus 546 638 pos = "p6rule_parse_skip"(pattern, lex, 1) 547 639 qexp["min"] = 0 548 640 goto quant_greedy 549 641 quant_plus: 550 if c != '+'goto quant_star642 if c != "+" goto quant_star 551 643 pos = "p6rule_parse_skip"(pattern, lex, 1) 552 644 qexp["max"] = PGE_INF … … 554 646 goto quant_greedy 555 647 quant_star: 556 if c != '*'goto quant_greedy648 if c != "*" goto quant_greedy 557 649 pos = "p6rule_parse_skip"(pattern, lex, 1) 558 650 c = substr pattern, pos, 1 -
src/pge/README
r2879 r3029 43 43 the load_bytecode operation, as in 44 44 45 load_bytecode " runtime/parrot/library/PGE.pbc"45 load_bytecode "PGE.pbc" 46 46 47 47 This imports the C<PGE::p6rule> subroutine, which can be used to … … 94 94 95 95 PGE doesn't (yet) properly handle nested repetitions of zero-length 96 patterns in groups -- that's coming next.96 patterns in groups -- that's coming soon. 97 97 98 98 This is just the first-cut framework for building the 99 99 remainder of the engine, so many items (lookaround, 100 conjunctions, closures, character classes,and hypotheticals)100 conjunctions, closures, and hypotheticals) 101 101 just aren't implemented yet. They're on their way! 102 102
