Changeset 21929 for src/Pugs/Lexer.hs

Show
Ignore:
Timestamp:
08/18/08 08:49:33 (3 months ago)
Author:
audreyt
Message:

* Pugs.Lexer: Restore "\c[CHARACTER NAME]" support.

Add support for "\c[CHARNAME1, CHARNAME2]".
Restore support for control character escapes such as "\cL".
(However, "\c1" is now interpreted as chr(1), not "q" as in perl5.)

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • src/Pugs/Lexer.hs

    r21790 r21929  
    273273-- | Backslashed non-alphanumerics (except for @\^@) translate into themselves. 
    274274escapeCode      :: RuleParser String 
    275 escapeCode      = charNum <|> ch charEsc <|> ch charAscii <|> ch charControl <|> ch anyChar 
     275escapeCode      = charNum <|> ch charEsc <|> ch charAscii <|> charControl <|> ch anyChar 
    276276                <?> "escape code" 
    277277    where 
    278278    ch = fmap (:[]) 
    279279 
    280 charControl :: RuleParser Char 
     280charControl :: RuleParser String 
    281281charControl = do 
    282282    char 'c' 
    283     code <- upper <|> oneOf "@[" 
     283    code <- upper <|> oneOf "@[" <|> digit 
    284284    case code of 
    285285        '[' -> do 
    286             charName <- many (satisfy (/= ']')) 
     286            charNames <- many1 (noneOf ",]") `sepBy1` many1 ruleComma 
    287287            char ']' 
    288             case nameToCode charName of 
    289                 Just c  -> return (chr c) 
    290                 _       -> error $ "Invalid unicode character name: " ++ charName 
    291         _   -> return (toEnum (fromEnum code - fromEnum '@')) 
     288            forM charNames $ \charName -> do 
     289                if all isDigit charName 
     290                    then return $ chr (read charName) 
     291                    else case nameToCode charName of 
     292                        Just c  -> return (chr c) 
     293                        _       -> fail $ "Invalid unicode character name: " ++ charName 
     294        _ | isDigit code -> do 
     295            cs <- many digit 
     296            return [chr $ read (code:cs)] 
     297        _   -> return [toEnum (fromEnum code - fromEnum '@')] 
    292298 
    293299-- This is currently the only escape that can return multiples. 
     
    302308        , based 'o'  8 octDigit 
    303309        , based 'x' 16 hexDigit 
    304         , based 'c' 10 digit 
     310--        , based 'c' 10 digit 
    305311        ] 
    306312    return $ map (toEnum . fromInteger) codes