root/src/Pugs/Parser/Program.hs

Revision 17045, 7.8 kB (checked in by audreyt, 17 months ago)

* Pugs.Parser.Program: Remove the unused s_freeVars field.

  • Property svn:mime-type set to text/plain; charset=UTF-8
  • Property svn:eol-style set to native
Line 
1{-# OPTIONS_GHC -fglasgow-exts #-}
2module Pugs.Parser.Program (
3    -- Before you would import Pugs.Parser, now you import Pugs.Parser.Program.
4    parseProgram,
5) where
6import Pugs.Internals
7import Pugs.AST
8import Pugs.Types (isSigilChar)
9
10import Pugs.Parser
11import Pugs.Rule
12import Text.ParserCombinators.Parsec.Error (showErrorMessages, errorMessages)
13import qualified Data.Map as Map
14
15parseProgram :: Env -> FilePath -> String -> Env
16parseProgram env path str = runRule env ruleProgram path progWithEOL
17    where
18    prog = decodeProgram str
19    progWithEOL
20        | [] <- prog        = "\n"
21        | last prog == '\n' = prog
22        | otherwise         = prog ++ "\n"
23
24-- Based on: <http://hackage.haskell.org/trac/haskell-prime/wiki/SourceEncodingDetection>
25data EncodedSource
26    = UTF8 !String
27    | UTF16 !Endian !String
28    | UTF32 !Endian !String
29 -- ... | UserDefined ...
30
31data Endian = LittleEndian | BigEndian
32
33decodeProgram :: String -> String
34decodeProgram str = case detectSourceEncoding str of
35    UTF8 xs                 -> decodeUTF8 (removeCRLF xs)
36    UTF16 LittleEndian xs   -> removeCRLF (decodeUTF16LE xs)
37    UTF16 BigEndian xs      -> removeCRLF (decodeUTF16BE xs)
38    UTF32 LittleEndian xs   -> removeCRLF (decodeUTF32LE xs)
39    UTF32 BigEndian xs      -> removeCRLF (decodeUTF32BE xs)
40    where
41    removeCRLF ('\r':'\n':xs)   = '\n':removeCRLF xs
42    removeCRLF (x:xs)           = x:removeCRLF xs
43    removeCRLF []               = []
44    decodeUTF16BE (a:b:c:d:xs)
45        | a >= '\xD8', a <= '\xDB'  -- High surrogate
46        , c >= '\xDC', c <= '\xDF'  -- Low surrogate
47        = let rest = decodeUTF16BE xs
48              hi   = (ord a - 0xD8) * 0x100 + ord b
49              lo   = (ord c - 0xDC) * 0x100 + ord d
50        in seq rest (chr (0x10000 + hi * 0x400 + lo) : rest)
51    decodeUTF16BE (a:b:xs) = let rest = decodeUTF16BE xs
52        in seq rest (chr (ord a * 0x100 + ord b) : rest)
53    decodeUTF16BE _ = []
54    decodeUTF16LE (a:b:c:d:xs)
55        | b >= '\xD8', b <= '\xDB'  -- High surrogate
56        , d >= '\xDC', d <= '\xDF'  -- Low surrogate
57        = let rest = decodeUTF16LE xs
58              hi   = (ord b - 0xD8) * 0x100 + ord a
59              lo   = (ord d - 0xDC) * 0x100 + ord c
60        in seq rest (chr (0x10000 + hi * 0x400 + lo) : rest)
61    decodeUTF16LE (a:b:xs) = let rest = decodeUTF16LE xs
62        in seq rest (chr (ord b * 0x100 + ord a) : rest)
63    decodeUTF16LE _ = []
64    decodeUTF32BE (a:b:c:d:xs) = let rest = decodeUTF32BE xs
65        in seq rest (chr (ord a * 0x1000000 + ord b * 0x10000 + ord c * 0x100 + ord d) : rest)
66    decodeUTF32BE _ = []
67    decodeUTF32LE (a:b:c:d:xs) = let rest = decodeUTF32LE xs
68        in seq rest (chr (ord d * 0x1000000 + ord c * 0x10000 + ord b * 0x100 + ord a) : rest)
69    decodeUTF32LE _ = []
70
71detectSourceEncoding :: String -> EncodedSource
72detectSourceEncoding bytes = case bytes of
73    []                                  -> UTF8 []
74    ['\x00']                            -> invalidNulls
75    xs@[_]                              -> UTF8 xs
76    ['\xFF', '\xFE']                    -> UTF16 LittleEndian []
77    ('\xFE':'\xFF':xs)                  -> UTF16 BigEndian xs
78    ['\x00', '\x00']                    -> invalidNulls
79    xs@['\x00', _]                      -> UTF16 BigEndian xs
80    xs@[_, '\x00']                      -> UTF16 LittleEndian xs
81    xs@[_, _]                           -> UTF8 xs
82    ['\x00', '\x00', '\x00']            -> invalidNulls
83    xs@[_, _, _]                        -> UTF8 xs
84    ('\xEF':'\xBB':'\xBF':xs)           -> UTF8 xs
85    ('\x00':'\x00':'\xFE':'\xFF':xs)    -> UTF32 BigEndian xs
86    ('\xFF':'\xFE':'\x00':'\x00':xs)    -> UTF32 LittleEndian xs
87    ('\xFF':'\xFE':xs)                  -> UTF16 LittleEndian xs
88    ('\x00':'\x00':'\x00':'\x00':_)     -> invalidNulls
89    xs@('\x00':'\x00':'\x00':_)         -> UTF32 BigEndian xs
90    xs@(_:'\x00':'\x00':'\x00':_)       -> UTF32 LittleEndian xs
91    ('\x00':'\x00':_)                   -> invalidNulls
92    xs@('\x00':_)                       -> UTF16 BigEndian xs
93    xs@(_:'\x00':_)                     -> UTF16 LittleEndian xs
94    xs                                  -> UTF8 xs
95    where
96    invalidNulls = error "(invalid nulls)"
97
98makeState :: Env -> RuleState
99makeState env = MkState
100    { s_env             = env
101    , s_parseProgram    = parseProgram
102    , s_dynParsers      = MkDynParsersEmpty
103    , s_bracketLevel    = StatementBracket
104--  , s_char            = ' '
105--  , s_name            = nullID
106--  , s_pos             = 0
107    , s_wsLine          = 0
108    , s_wsColumn        = 0
109    , s_closureTraits   = [id]
110--  , s_freeVars        = Set.empty
111    , s_knownVars       = Map.map (const topMPad) (padEntries (envLexical env))
112    , s_outerVars       = Map.empty
113    , s_protoPad        = emptyPad
114    }
115
116-- ^ A fake 'top' MPad for s_knownVars above to refer to things outside the eval scope.
117{-# NOINLINE topMPad #-}
118topMPad :: MPad
119topMPad = unsafePerformIO $ do
120    tvar <- newTVarIO emptyPad
121    return $ MkMPad (addressOf tvar) tvar
122
123-- XXX - Pending clarification about those 3 -- are they routine-implicit or block-implicit?
124{-
125protoPad :: Pad
126protoPad = mkPad
127    [ (cast "$_", PELexical
128    , (cast "$/",
129    , (cast "$!",
130    ]
131-}
132
133runRule :: Env -> RuleParser Env -> FilePath -> String -> Env
134runRule env p name str =
135    case ( runParser p (makeState env) name str ) of
136        Left err    -> env { envBody = Val $ VError (VStr msg) [mkPos pos pos] }
137            where
138            msg = concat (intersperse "\n" (map filterUnexpected $ lines (showErr err)))
139            pos = errorPos err
140            cur = case takeSameClassWords (dropUntilPos pos str) of
141                ""  -> "end of input"
142                xs  -> show xs
143            filterUnexpected ('!':_)    = "Unexpected " ++ cur
144            filterUnexpected line       = line
145        Right env'  -> env'
146
147takeSameClassWords :: String -> String
148takeSameClassWords "" = ""
149takeSameClassWords (x:xs)
150    | isSigilChar x = x : takeSameClassWords xs
151    | otherwise = case charClassOf x of
152        SpaceClass  -> x : takeSameClassWords xs
153        cls         -> x : takeWhile ((== cls) . charClassOf) xs
154
155dropUntilPos :: SourcePos -> String -> String
156dropUntilPos pos str
157    | (curline:_) <- drop (ln - 1) (lines str) = drop (col - 1) curline
158    | otherwise = ""
159    where
160    col = sourceColumn pos
161    ln  = sourceLine pos
162
163showErr :: ParseError -> String
164showErr err =
165      showErrorMessages "or" "unknown parse error"
166                        "expecting" "!" "end of input"
167                       (errorMessages err)
168
169-- Lexical units --------------------------------------------------
170
171ruleProgram :: RuleParser Env
172ruleProgram = rule "program" $ do
173    env     <- getRuleEnv
174
175    topPad  <- genParamEntries SubRoutine [defaultArrayParam]
176    modify $ \s -> s{ s_protoPad = topPad }
177
178    block   <- ruleBlockBody `finallyM` eof
179    main    <- retVerbatimBlock SubPrim Nothing False $
180        block{ bi_body = mergeStmts emptyExp $ bi_body block }
181
182    -- We are still in the compile time.
183    modify $ \s -> s{ s_env = (s_env s){ envCompPad = Just (error "no comp pad") } }
184
185    -- Force a reclose-pad evaluation here by way of unsafeEvalExp.
186    main'@(Val (VCode vc)) <- unsafeEvalExp $ Syn "" [unwrap main]
187
188    -- S04: CHECK {...}*      at compile time, ALAP
189    --  $_() for @*CHECK
190    rv <- unsafeEvalExp $ Syn "for"
191        [ _Var "@*CHECK"
192        , Syn "sub"
193            [ Val . VCode $ mkPrim
194                { subBody       = App (_Var "$_") Nothing []
195                , subParams     = [defaultScalarParam]
196                , subInnerPad   = defaultScalarPad
197                }
198            ]
199        ]
200
201    -- If there was a exit() in a CHECK block, we have to exit.
202    possiblyExit rv
203
204    env' <- getRuleEnv
205    return $ env'
206        { envBody       = App (Syn "block" [main']) Nothing (replicate (length $ subParams vc) (_Var "$_")) -- _Var "@*ARGS"]
207        , envPackage    = envPackage env
208        , envCompPad    = Nothing
209        }
Note: See TracBrowser for help on using the browser.