Changeset 3

Show
Ignore:
Timestamp:
02/06/05 18:37:44 (4 years ago)
Author:
autrijus
svk:copy_cache_prev:
1041
Message:

* add inc/ remove unnec lexer

Files:
12 added
1 modified

Legend:

Unmodified
Added
Removed
  • src/Lexer.hs

    r1 r3  
    2626          , P.identStart     = letter <|> oneOf "_:$@%&" 
    2727          , P.identLetter    = alphaNum <|> oneOf "_:" 
    28           , P.reservedNames  = [ "true", "false", "do", "else", "not", 
    29                                "if", "then", "while", "skip" 
    30                                -- , "begin", "proc", "is", "end", "val", "res", "malloc"  
    31                               ] 
     28          , P.reservedNames  = words $ 
     29                "if then else do while skip" 
    3230          , P.reservedOpNames= words $ 
    3331                " . .+ .? .* .+ .() .[] .{} .<<>> .= " ++ 
    3432                " ++ -- **  ! + - ~ ? * ** +^ ~^ ?^ \\ " ++ 
    35                 " * / % x xx +& +<< +>> ~& ~<< ~>> " ++ 
     33                " * / % x xx +& +< +> ~& ~<< ~>> " ++ 
    3634                " + - ~ +| +^ ~| ~^ " ++ 
    3735                " & | ^ " ++ 
     
    128126 
    129127    decimal         = number 10 digit         
    130     hexadecimal     = do{ oneOf "xX"; number 16 hexDigit } 
    131     octal           = do{ oneOf "oO"; number 8 octDigit  } 
    132     binary          = do{ oneOf "bB"; number 2 (oneOf "01")  } 
     128    hexadecimal     = do{ char 'x'; number 16 hexDigit } 
     129    octal           = do{ char 'o'; number 8 octDigit  } 
     130    binary          = do{ char 'b'; number 2 (oneOf "01")  } 
    133131 
    134132    -- number :: Integer -> CharParser st Char -> CharParser st Integer 
     
    154152    return '\'' 
    155153 
    156 -------------------------------------------------------------------------------- 
    157  
    158 newtype Token = Token (SourcePos, Symbol) 
    159  
    160 instance Show Token where 
    161    show (Token (pos, symbol)) = show symbol 
    162  
    163 data Symbol 
    164    = Word String 
    165    | SingleQuoted String  
    166    | DotString String  
    167    | QuestionString String  
    168    | Num Int 
    169    | Equals 
    170    | LeftRoundBracket 
    171    | RightRoundBracket 
    172    | LeftSquareBracket 
    173    | RightSquareBracket 
    174    | Comma 
    175    | Colon 
    176    | SemiColon 
    177    | Newline 
    178    | BackSlash 
    179    | Exclamation 
    180    | Dash 
    181    | GreaterThan 
    182    | Space  
    183    | Bad String 
    184    deriving (Eq) 
    185  
    186 -- slighlty pretty printing of Symbols via Show. The  
    187 -- prettiness is due to the fact that the Parsec parser 
    188 -- uses show to print Symbols as part of error messages 
    189 instance Show Symbol where 
    190    show (Word s)           = "word: " ++ s 
    191    show (SingleQuoted s)   = "single quoted string: " ++ s  
    192    show (DotString s)      = ".-string: " ++ s  
    193    show (QuestionString s) = "?-string: " ++ s  
    194    show (Num n)            = "number: " ++ show n 
    195    show Equals             = "equals sign: '='" 
    196    show LeftRoundBracket   = "bracket: '('" 
    197    show RightRoundBracket  = "bracket: ')'" 
    198    show LeftSquareBracket  = "bracket: '['" 
    199    show RightSquareBracket = "bracket: ']'" 
    200    show Comma              = "comma: ','" 
    201    show Colon              = "colon: ':'" 
    202    show SemiColon          = "semi-colon: ';'" 
    203    show Newline            = "newline: '\\n'" 
    204    show BackSlash          = "backslash: '\'" 
    205    show Exclamation        = "exclamation sign: '!'" 
    206    show Dash               = "dash '-'" 
    207    show GreaterThan        = "greater than sign: '>'" 
    208    show Space              = "space" 
    209    show (Bad str)          = str  
    210  
    211 -- turn a stream of characters into a stream of tokens 
    212 lexer :: String -> String -> [Token] 
    213 lexer filename input  
    214    = lexWork (newPos filename 1 1) input 
    215  
    216 lexWork :: SourcePos -> String -> [Token] 
    217 lexWork pos [] = [] 
    218 lexWork pos (x:xs) 
    219    | x == '='  = simpleToken Equals nextCol  
    220    | x == '('  = simpleToken LeftRoundBracket nextCol  
    221    | x == ')'  = simpleToken RightRoundBracket nextCol  
    222    | x == '['  = simpleToken LeftSquareBracket nextCol  
    223    | x == ']'  = simpleToken RightSquareBracket nextCol  
    224    | x == ','  = simpleToken Comma nextCol  
    225    | x == ':'  = simpleToken Colon nextCol  
    226    | x == ';'  = simpleToken SemiColon nextCol  
    227    | x == '\\' = simpleToken BackSlash nextCol  
    228    | x == '!'  = simpleToken Exclamation nextCol  
    229    | x == '-'  = simpleToken Dash nextCol  
    230    | x == '>'  = simpleToken GreaterThan nextCol  
    231    | x == '\n' = simpleToken Newline nextLine  
    232    -- source location does not need to be accurate within a comment 
    233    | x == '#'  = lexWork pos (dropWhile (/= '\n') xs) 
    234    | x == '.'  = Token (pos, DotString xs) : lexWork pos (dropWhile (/= '\n') xs) 
    235    | x == '?'  = Token (pos, QuestionString xs) : lexWork pos (dropWhile (/= '\n') xs) 
    236    | isWhiteSpace x = simpleToken Space nextCol 
    237    | isDigit x = let (num, rest) = span isDigit (x:xs) 
    238                      nextPos = incSourceColumn pos (length num) 
    239                  in Token (pos, Num $ read num) : lexWork nextPos rest 
    240    | isAlpha x 
    241         = let (restWord, rest) = span isWordChar xs 
    242               word = x:restWord 
    243               nextPos = incSourceColumn pos (length word) 
    244           in Token (pos, Word word) : lexWork nextPos rest 
    245    -- quoted strings need special care - escaped quotes can appear 
    246    -- within the string, and the string might not be terminated 
    247    -- by a quote in the case of a lexical error 
    248    | x == '\''  
    249         = let (thisString, rest) = lexTailQuotedString xs 
    250               nextPos = incSourceColumn pos (length thisString + 2) 
    251           in if null rest 
    252                 then [Token (pos, Bad $ "ill-quoted input: " ++ x:thisString)] 
    253                 else Token (pos, SingleQuoted thisString) : lexWork nextPos (tail rest)  
    254    | otherwise = simpleToken (Bad $ "symbol: " ++ show x) nextCol 
    255    where 
    256    simpleToken :: Symbol -> (SourcePos -> SourcePos) -> [Token] 
    257    simpleToken tok srcPosUpdate  
    258       = Token (pos, tok) : lexWork (srcPosUpdate pos) xs  
    259    isWhiteSpace :: Char -> Bool 
    260    isWhiteSpace c = c `elem` " \t\r\f\v\xA0" 
    261    isWordChar :: Char -> Bool 
    262    isWordChar c = isAlpha c || isDigit c  
    263    nextLine :: SourcePos -> SourcePos 
    264    nextLine pos = incSourceLine (setSourceColumn pos 1) 1 
    265    nextCol :: SourcePos -> SourcePos 
    266    nextCol pos = incSourceColumn pos 1 
    267  
    268 -- lex the rest of a string following the first quote mark 
    269 -- must skip escaped quotes, and escaped backslashes 
    270 -- include the final quote mark in the result 
    271 lexTailQuotedString :: String -> (String, String) 
    272 lexTailQuotedString [] = ([], []) 
    273 lexTailQuotedString str@('\'':xs) = ([], str) 
    274 -- a backslash then a quote is escaped 
    275 lexTailQuotedString ('\\':'\'':xs) 
    276    = let (string, rest) = lexTailQuotedString xs in ('\'':string, rest) 
    277 -- a backslash then a backslash is an escaped backslash 
    278 lexTailQuotedString ('\\':'\\':xs) 
    279    = let (string, rest) = lexTailQuotedString xs in ('\\':'\\':string, rest) 
    280 lexTailQuotedString (x:xs) 
    281    = let (string, rest) = lexTailQuotedString xs in (x:string, rest)