Changeset 10086 for src/Pugs/Lexer.hs
- Timestamp:
- 04/26/06 16:54:08 (3 years ago)
- Files:
-
- 1 modified
-
src/Pugs/Lexer.hs (modified) (22 diffs)
Legend:
- Unmodified
- Added
- Removed
-
src/Pugs/Lexer.hs
r10068 r10086 32 32 import Pugs.Parser.Types 33 33 import qualified Pugs.Rule.Token as P 34 import Text.ParserCombinators.Parsec.Language 35 import qualified Text.ParserCombinators.Parsec.Char as C (satisfy) 34 36 35 37 perl6Def :: LanguageDef st … … 39 41 , P.commentLine = "#" 40 42 , P.nestedComments = False 41 , P.identStart = wordAlpha42 , P.identLetter = wordAny43 , P.identStart = C.satisfy isWordAlpha 44 , P.identLetter = C.satisfy isWordAny 43 45 , P.caseSensitive = False 44 46 } 45 47 46 wordAlpha :: GenParser Char stChar47 wordAny :: GenParser Char stChar48 wordAlpha :: RuleParser Char 49 wordAny :: RuleParser Char 48 50 wordAlpha = satisfy isWordAlpha <?> "alphabetic word character" 49 51 wordAny = satisfy isWordAny <?> "word character" … … 57 59 perl6Lexer = P.makeTokenParser perl6Def 58 60 59 maybeParens :: CharParser st a -> CharParser sta61 maybeParens :: RuleParser a -> RuleParser a 60 62 maybeParens p = choice [ parens p, p ] 61 63 62 parens :: CharParser st a -> CharParser sta64 parens :: RuleParser a -> RuleParser a 63 65 parens = P.parens perl6Lexer 64 whiteSpace :: CharParser st()66 whiteSpace :: RuleParser () 65 67 whiteSpace = P.whiteSpace perl6Lexer 66 mandatoryWhiteSpace :: CharParser st()68 mandatoryWhiteSpace :: RuleParser () 67 69 mandatoryWhiteSpace = skipMany1 (oneOf " \t\n") -- XXX unicode and whatnot 68 lexeme :: CharParser st a -> CharParser sta70 lexeme :: RuleParser a -> RuleParser a 69 71 lexeme = P.lexeme perl6Lexer 70 identifier :: CharParser stString72 identifier :: RuleParser String 71 73 identifier = P.identifier perl6Lexer 72 braces :: CharParser st a -> CharParser sta74 braces :: RuleParser a -> RuleParser a 73 75 braces = P.braces perl6Lexer 74 brackets :: CharParser st a -> CharParser sta76 brackets :: RuleParser a -> RuleParser a 75 77 brackets = P.brackets perl6Lexer 76 angles :: CharParser st a -> CharParser sta78 angles :: RuleParser a -> RuleParser a 77 79 angles = P.angles perl6Lexer 78 balanced :: CharParser stString80 balanced :: RuleParser String 79 81 balanced = P.balanced 80 82 balancedDelim :: Char -> Char 81 83 balancedDelim = P.balancedDelim 82 decimal :: CharParser stInteger84 decimal :: RuleParser Integer 83 85 decimal = P.decimal perl6Lexer 84 86 … … 91 93 -} 92 94 ruleDelimitedIdentifier :: String -- ^ Delimiter (e.g. \'@::@\') 93 -> GenParser Char st[String]95 -> RuleParser [String] 94 96 ruleDelimitedIdentifier delim = verbatimRule "delimited identifier" $ do 95 97 -- Allowing the leading delim actually leads to subtle oddness with things … … 98 100 ruleVerbatimIdentifier `sepBy1` (try $ string delim) 99 101 100 ruleQualifiedIdentifier :: GenParser Char stString102 ruleQualifiedIdentifier :: RuleParser String 101 103 ruleQualifiedIdentifier = verbatimRule "qualified identifier" $ do 102 104 chunks <- ruleDelimitedIdentifier "::" 103 105 return $ concat (intersperse "::" chunks) 104 106 105 ruleVerbatimIdentifier :: GenParser Char stString107 ruleVerbatimIdentifier :: RuleParser String 106 108 ruleVerbatimIdentifier = (<?> "identifier") $ do 107 109 c <- P.identStart perl6Def … … 113 115 (as matched by 'ruleEndOfLine'). 114 116 -} 115 ruleWhiteSpaceLine :: GenParser Char st()117 ruleWhiteSpaceLine :: RuleParser () 116 118 ruleWhiteSpaceLine = do 117 119 many $ satisfy (\x -> isSpace x && x /= '\n') … … 122 124 line anyway). 123 125 -} 124 ruleEndOfLine :: GenParser Char st()126 ruleEndOfLine :: RuleParser () 125 127 ruleEndOfLine = choice [ do { char '\n'; return () }, eof ] 126 128 127 symbol :: String -> GenParser Char stString129 symbol :: String -> RuleParser String 128 130 symbol s 129 131 | isWordAny (last s) = try $ do … … 192 194 193 195 -- | Backslashed non-alphanumerics (except for @\^@) translate into themselves. 194 escapeCode :: GenParser Char stString196 escapeCode :: RuleParser String 195 197 escapeCode = ch charEsc <|> charNum <|> ch charAscii <|> ch charControl <|> ch anyChar 196 198 <?> "escape code" … … 198 200 ch = fmap (:[]) 199 201 200 charControl :: GenParser Char stChar202 charControl :: RuleParser Char 201 203 charControl = do{ char 'c' 202 204 ; code <- upper <|> char '@' … … 205 207 206 208 -- This is currently the only escape that can return multiples. 207 charNum :: GenParser Char stString209 charNum :: RuleParser String 208 210 charNum = do 209 211 codes <- choice … … 221 223 ] 222 224 223 ruleComma :: GenParser Char st()225 ruleComma :: RuleParser () 224 226 ruleComma = do 225 227 lexeme (char ',') 226 228 return () 227 229 228 number :: Integer -> GenParser tok st Char -> GenParser tok stInteger230 number :: Integer -> RuleParser Char -> RuleParser Integer 229 231 number base baseDigit 230 232 = do{ digits <- many1 baseDigit … … 233 235 } 234 236 235 charEsc :: GenParser Char stChar237 charEsc :: RuleParser Char 236 238 charEsc = choice (map parseEsc escMap) 237 239 where 238 240 parseEsc (c,code) = do{ char c; return code } 239 241 240 charAscii :: GenParser Char stChar242 charAscii :: RuleParser Char 241 243 charAscii = choice (map parseAscii asciiMap) 242 244 where … … 266 268 '\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'] 267 269 268 rule :: String -> CharParser st a -> GenParser Char sta270 rule :: String -> RuleParser a -> RuleParser a 269 271 rule name action = (<?> name) $ lexeme $ action 270 272 271 verbatimRule :: String -> GenParser tok st a -> GenParser tok sta273 verbatimRule :: String -> RuleParser a -> RuleParser a 272 274 verbatimRule name action = (<?> name) $ action 273 275 274 literalRule :: String -> GenParser Char st a -> GenParser Char sta276 literalRule :: String -> RuleParser a -> RuleParser a 275 277 literalRule name action = (<?> name) $ postSpace $ action 276 278 277 tryRule :: String -> GenParser Char st a -> GenParser Char sta279 tryRule :: String -> RuleParser a -> RuleParser a 278 280 tryRule name action = (<?> name) $ lexeme $ action 279 281 280 tryVerbatimRule :: String -> GenParser tok st a -> GenParser tok sta282 tryVerbatimRule :: String -> RuleParser a -> RuleParser a 281 283 tryVerbatimRule name action = (<?> name) $ action 282 284 … … 298 300 $ [SState .. SOur] 299 301 300 postSpace :: GenParser Char st a -> GenParser Char sta302 postSpace :: RuleParser a -> RuleParser a 301 303 postSpace rule = try $ do 302 304 rv <- rule … … 305 307 return rv 306 308 307 ruleTrait :: GenParser Char stString309 ruleTrait :: RuleParser String 308 310 ruleTrait = rule "trait" $ do 309 311 symbol "is" <|> symbol "does" … … 319 321 return trait 320 322 321 ruleTraitName :: String -> GenParser Char stString323 ruleTraitName :: String -> RuleParser String 322 324 ruleTraitName trait = rule "named trait" $ do 323 325 symbol "is" … … 325 327 ruleQualifiedIdentifier 326 328 327 ruleBareTrait :: String -> GenParser Char stString329 ruleBareTrait :: String -> RuleParser String 328 330 ruleBareTrait trait = rule "bare trait" $ do 329 331 choice [ ruleTraitName trait … … 335 337 ] 336 338 337 ruleType :: GenParser Char stString339 ruleType :: RuleParser String 338 340 ruleType = literalRule "context" $ do 339 341 -- Valid type names: Foo, Bar::Baz, ::Grtz, ::?CLASS, but not :Foo … … 347 349 the next one. 348 350 -} 349 tryChoice :: [ GenParser tok sta] -- ^ List of candidate parsers350 -> GenParser tok sta351 tryChoice :: [RuleParser a] -- ^ List of candidate parsers 352 -> RuleParser a 351 353 tryChoice = choice 352 354 … … 354 356 Match '@(@', followed by the given parser, followed by '@)@'. 355 357 -} 356 verbatimParens :: GenParser Char st a -> GenParser Char sta358 verbatimParens :: RuleParser a -> RuleParser a 357 359 verbatimParens = between (lexeme $ char '(') (char ')') 358 360 … … 360 362 Match '@\[@', followed by the given parser, followed by '@\]@'. 361 363 -} 362 verbatimBrackets :: GenParser Char st a -> GenParser Char sta364 verbatimBrackets :: RuleParser a -> RuleParser a 363 365 verbatimBrackets = between (lexeme $ char '[') (char ']') 364 366 … … 366 368 Match '@{@', followed by the given parser, followed by '@}@'. 367 369 -} 368 verbatimBraces :: GenParser Char st a -> GenParser Char sta370 verbatimBraces :: RuleParser a -> RuleParser a 369 371 verbatimBraces = between (lexeme $ char '{') (char '}')
