Changeset 2992

Show
Ignore:
Timestamp:
05/12/05 03:47:09 (4 years ago)
Author:
autrijus
svk:copy_cache_prev:
4536
Message:

* rx:P5// now assumes Unicode semantics again.

Location:
src/Pugs
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • src/Pugs/Prim.hs

    r2978 r2992  
    598598    case val of 
    599599        VRule rx -> do 
    600             chunks <- rxSplit rx (encodeUTF8 str) 
    601             return $ VList $ map (VStr . decodeUTF8) chunks 
     600            chunks <- rxSplit rx str 
     601            return . VList $ map VStr chunks 
    602602        _ -> do 
    603603            delim <- fromVal val 
  • src/Pugs/Prim/Match.hs

    r2957 r2992  
    4343            return mkMatchFail 
    4444 
    45 doMatch cs MkRulePCRE{ rxRegex = re } = do 
    46     rv <- liftIO $ PCRE.execute re csUTF8 0 
     45doMatch csChars MkRulePCRE{ rxRegex = re } = do 
     46    rv <- liftIO $ PCRE.execute re csBytes 0 
    4747    if isNothing rv then return mkMatchFail else do 
    4848    let ((fromBytes, lenBytes):subs) = Array.elems (fromJust rv) 
    49         substr from len = genericTake len (genericDrop from cs) -- in bytes 
    50         subsMatch = [ VMatch $ mkMatchOk f (f + t) (substr f t) [] Map.empty | (f, t) <- subs ] 
     49        substr str from len = genericTake len (genericDrop from str) 
     50        subsMatch = [ 
     51            VMatch $ mkMatchOk 
     52                fChars (fChars + lChars) 
     53                (substr csChars fChars lChars) 
     54                [] Map.empty 
     55            | (fBytes, lBytes) <- subs 
     56            , let fChars = chars $ genericTake fBytes csBytes 
     57            , let lChars = chars $ substr csBytes fBytes lBytes 
     58            ] 
     59        fromChars = chars $ genericTake fromBytes csBytes 
     60        lenChars  = chars $ substr csBytes fromBytes lenBytes 
     61        chars = genericLength . decodeUTF8 
    5162 
    52         leftmatch = decodeUTF8 $ genericTake fromBytes csUTF8 
    53         fromChars = genericLength leftmatch 
    54         lenChars  = genericLength $ decodeUTF8 $ (substr fromBytes lenBytes) 
    55  
    56     return $ mkMatchOk fromChars (fromChars + lenChars) (substr fromBytes lenBytes) subsMatch Map.empty 
     63    return $ mkMatchOk fromChars (fromChars + lenChars) (substr csChars fromChars lenChars) subsMatch Map.empty 
    5764    where 
    58     csUTF8 = encodeUTF8 cs 
     65    csBytes = encodeUTF8 csChars 
    5966 
    6067matchFromMR mr = VMatch $ mkMatchOk 0 0 (decodeUTF8 all) subsMatch Map.empty