Changeset 10086 for src/Pugs/Lexer.hs

Show
Ignore:
Timestamp:
04/26/06 16:54:08 (3 years ago)
Author:
audreyt
Message:

* Making Parsec into a Rules engine, part #2.

The CharParser? primitives ("satisfy" and "string")
are moved into Types.hs so they can tweak <ws> state
in the parser.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • src/Pugs/Lexer.hs

    r10068 r10086  
    3232import Pugs.Parser.Types 
    3333import qualified Pugs.Rule.Token as P 
     34import Text.ParserCombinators.Parsec.Language 
     35import qualified Text.ParserCombinators.Parsec.Char as C (satisfy) 
    3436 
    3537perl6Def  :: LanguageDef st 
     
    3941          , P.commentLine    = "#" 
    4042          , P.nestedComments = False 
    41           , P.identStart     = wordAlpha 
    42           , P.identLetter    = wordAny 
     43          , P.identStart     = C.satisfy isWordAlpha 
     44          , P.identLetter    = C.satisfy isWordAny 
    4345          , P.caseSensitive  = False 
    4446          } 
    4547 
    46 wordAlpha   :: GenParser Char st Char 
    47 wordAny     :: GenParser Char st Char 
     48wordAlpha   :: RuleParser Char 
     49wordAny     :: RuleParser Char 
    4850wordAlpha   = satisfy isWordAlpha <?> "alphabetic word character" 
    4951wordAny     = satisfy isWordAny <?> "word character" 
     
    5759perl6Lexer = P.makeTokenParser perl6Def 
    5860 
    59 maybeParens :: CharParser st a -> CharParser st a 
     61maybeParens :: RuleParser a -> RuleParser a 
    6062maybeParens p = choice [ parens p, p ] 
    6163 
    62 parens     :: CharParser st a -> CharParser st a 
     64parens     :: RuleParser a -> RuleParser a 
    6365parens     = P.parens     perl6Lexer 
    64 whiteSpace :: CharParser st () 
     66whiteSpace :: RuleParser () 
    6567whiteSpace = P.whiteSpace perl6Lexer 
    66 mandatoryWhiteSpace :: CharParser st () 
     68mandatoryWhiteSpace :: RuleParser () 
    6769mandatoryWhiteSpace = skipMany1 (oneOf " \t\n")  -- XXX unicode and whatnot 
    68 lexeme     :: CharParser st a -> CharParser st a 
     70lexeme     :: RuleParser a -> RuleParser a 
    6971lexeme     = P.lexeme     perl6Lexer 
    70 identifier :: CharParser st String 
     72identifier :: RuleParser String 
    7173identifier = P.identifier perl6Lexer 
    72 braces     :: CharParser st a -> CharParser st a 
     74braces     :: RuleParser a -> RuleParser a 
    7375braces     = P.braces     perl6Lexer 
    74 brackets   :: CharParser st a -> CharParser st a 
     76brackets   :: RuleParser a -> RuleParser a 
    7577brackets   = P.brackets   perl6Lexer 
    76 angles     :: CharParser st a -> CharParser st a 
     78angles     :: RuleParser a -> RuleParser a 
    7779angles     = P.angles     perl6Lexer 
    78 balanced   :: CharParser st String 
     80balanced   :: RuleParser String 
    7981balanced   = P.balanced 
    8082balancedDelim :: Char -> Char 
    8183balancedDelim = P.balancedDelim 
    82 decimal    :: CharParser st Integer 
     84decimal    :: RuleParser Integer 
    8385decimal    = P.decimal    perl6Lexer 
    8486 
     
    9193-} 
    9294ruleDelimitedIdentifier :: String -- ^ Delimiter (e.g. \'@::@\') 
    93                         -> GenParser Char st [String] 
     95                        -> RuleParser [String] 
    9496ruleDelimitedIdentifier delim = verbatimRule "delimited identifier" $ do 
    9597    -- Allowing the leading delim actually leads to subtle oddness with things 
     
    98100    ruleVerbatimIdentifier `sepBy1` (try $ string delim) 
    99101 
    100 ruleQualifiedIdentifier :: GenParser Char st String 
     102ruleQualifiedIdentifier :: RuleParser String 
    101103ruleQualifiedIdentifier = verbatimRule "qualified identifier" $ do 
    102104    chunks <- ruleDelimitedIdentifier "::" 
    103105    return $ concat (intersperse "::" chunks) 
    104106 
    105 ruleVerbatimIdentifier :: GenParser Char st String 
     107ruleVerbatimIdentifier :: RuleParser String 
    106108ruleVerbatimIdentifier = (<?> "identifier") $ do 
    107109    c  <- P.identStart perl6Def 
     
    113115(as matched by 'ruleEndOfLine'). 
    114116-} 
    115 ruleWhiteSpaceLine :: GenParser Char st () 
     117ruleWhiteSpaceLine :: RuleParser () 
    116118ruleWhiteSpaceLine = do 
    117119    many $ satisfy (\x -> isSpace x && x /= '\n') 
     
    122124line anyway). 
    123125-} 
    124 ruleEndOfLine :: GenParser Char st () 
     126ruleEndOfLine :: RuleParser () 
    125127ruleEndOfLine = choice [ do { char '\n'; return () }, eof ] 
    126128 
    127 symbol :: String -> GenParser Char st String 
     129symbol :: String -> RuleParser String 
    128130symbol s 
    129131    | isWordAny (last s) = try $ do 
     
    192194 
    193195-- | Backslashed non-alphanumerics (except for @\^@) translate into themselves. 
    194 escapeCode      :: GenParser Char st String 
     196escapeCode      :: RuleParser String 
    195197escapeCode      = ch charEsc <|> charNum <|> ch charAscii <|> ch charControl <|> ch anyChar 
    196198                <?> "escape code" 
     
    198200    ch = fmap (:[]) 
    199201 
    200 charControl :: GenParser Char st Char 
     202charControl :: RuleParser Char 
    201203charControl     = do{ char 'c' 
    202204                    ; code <- upper <|> char '@' 
     
    205207 
    206208-- This is currently the only escape that can return multiples. 
    207 charNum :: GenParser Char st String 
     209charNum :: RuleParser String 
    208210charNum = do 
    209211    codes <- choice 
     
    221223               ] 
    222224 
    223 ruleComma :: GenParser Char st () 
     225ruleComma :: RuleParser () 
    224226ruleComma = do 
    225227    lexeme (char ',') 
    226228    return () 
    227229 
    228 number :: Integer -> GenParser tok st Char -> GenParser tok st Integer 
     230number :: Integer -> RuleParser Char -> RuleParser Integer 
    229231number base baseDigit 
    230232    = do{ digits <- many1 baseDigit 
     
    233235        }           
    234236 
    235 charEsc         :: GenParser Char st Char 
     237charEsc         :: RuleParser Char 
    236238charEsc         = choice (map parseEsc escMap) 
    237239                where 
    238240                  parseEsc (c,code)     = do{ char c; return code } 
    239241                   
    240 charAscii       :: GenParser Char st Char 
     242charAscii       :: RuleParser Char 
    241243charAscii       = choice (map parseAscii asciiMap) 
    242244                where 
     
    266268                   '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'] 
    267269 
    268 rule :: String -> CharParser st a -> GenParser Char st a 
     270rule :: String -> RuleParser a -> RuleParser a 
    269271rule name action = (<?> name) $ lexeme $ action 
    270272 
    271 verbatimRule :: String -> GenParser tok st a -> GenParser tok st a 
     273verbatimRule :: String -> RuleParser a -> RuleParser a 
    272274verbatimRule name action = (<?> name) $ action 
    273275 
    274 literalRule :: String -> GenParser Char st a -> GenParser Char st a 
     276literalRule :: String -> RuleParser a -> RuleParser a 
    275277literalRule name action = (<?> name) $ postSpace $ action 
    276278 
    277 tryRule :: String -> GenParser Char st a -> GenParser Char st a 
     279tryRule :: String -> RuleParser a -> RuleParser a 
    278280tryRule name action = (<?> name) $ lexeme $ action 
    279281 
    280 tryVerbatimRule :: String -> GenParser tok st a -> GenParser tok st a 
     282tryVerbatimRule :: String -> RuleParser a -> RuleParser a 
    281283tryVerbatimRule name action = (<?> name) $ action 
    282284 
     
    298300    $ [SState .. SOur] 
    299301 
    300 postSpace :: GenParser Char st a -> GenParser Char st a 
     302postSpace :: RuleParser a -> RuleParser a 
    301303postSpace rule = try $ do 
    302304    rv <- rule 
     
    305307    return rv 
    306308 
    307 ruleTrait :: GenParser Char st String 
     309ruleTrait :: RuleParser String 
    308310ruleTrait = rule "trait" $ do 
    309311    symbol "is" <|> symbol "does" 
     
    319321    return trait 
    320322 
    321 ruleTraitName :: String -> GenParser Char st String 
     323ruleTraitName :: String -> RuleParser String 
    322324ruleTraitName trait = rule "named trait" $ do 
    323325    symbol "is" 
     
    325327    ruleQualifiedIdentifier 
    326328 
    327 ruleBareTrait :: String -> GenParser Char st String 
     329ruleBareTrait :: String -> RuleParser String 
    328330ruleBareTrait trait = rule "bare trait" $ do 
    329331    choice [ ruleTraitName trait 
     
    335337           ] 
    336338 
    337 ruleType :: GenParser Char st String 
     339ruleType :: RuleParser String 
    338340ruleType = literalRule "context" $ do 
    339341    -- Valid type names: Foo, Bar::Baz, ::Grtz, ::?CLASS, but not :Foo 
     
    347349the next one. 
    348350-} 
    349 tryChoice :: [GenParser tok st a] -- ^ List of candidate parsers 
    350           -> GenParser tok st a 
     351tryChoice :: [RuleParser a] -- ^ List of candidate parsers 
     352          -> RuleParser a 
    351353tryChoice = choice 
    352354 
     
    354356Match '@(@', followed by the given parser, followed by '@)@'. 
    355357-} 
    356 verbatimParens :: GenParser Char st a -> GenParser Char st a 
     358verbatimParens :: RuleParser a -> RuleParser a 
    357359verbatimParens = between (lexeme $ char '(') (char ')') 
    358360 
     
    360362Match '@\[@', followed by the given parser, followed by '@\]@'. 
    361363-} 
    362 verbatimBrackets :: GenParser Char st a -> GenParser Char st a 
     364verbatimBrackets :: RuleParser a -> RuleParser a 
    363365verbatimBrackets = between (lexeme $ char '[') (char ']') 
    364366 
     
    366368Match '@{@', followed by the given parser, followed by '@}@'. 
    367369-} 
    368 verbatimBraces :: GenParser Char st a -> GenParser Char st a 
     370verbatimBraces :: RuleParser a -> RuleParser a 
    369371verbatimBraces = between (lexeme $ char '{') (char '}')