Show
Ignore:
Timestamp:
05/10/05 22:25:13 (4 years ago)
Author:
corion
svk:copy_cache_prev:
4498
Message:

PCRE matches work with Unicode strings now again

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • src/Pugs/Prim/Match.hs

    r2944 r2948  
    2727        return (name, rxRule rule) 
    2828    pge <- liftIO $ evalPGE pwd (encodeUTF8 cs) (encodeUTF8 re) subrules 
    29     rv  <- tryIO Nothing $ fmap Just (readIO $ decodeUTF8 pge)  
     29    rv  <- tryIO Nothing $ fmap Just (readIO $ decodeUTF8 pge) 
    3030    let matchToVal PGE_Fail = VMatch mkMatchFail 
    3131        matchToVal (PGE_Array ms) = VList (map matchToVal ms) 
     
    4343 
    4444doMatch cs MkRulePCRE{ rxRegex = re } = do 
    45     rv <- liftIO $ PCRE.execute re (encodeUTF8 cs) 0 
     45    rv <- liftIO $ PCRE.execute re csUTF8 0 
    4646    if isNothing rv then return mkMatchFail else do 
    47     let ((from, len):subs) = Array.elems (fromJust rv) 
    48         substr from len = genericTake len (genericDrop from cs) 
     47    let ((fromBytes, lenBytes):subs) = Array.elems (fromJust rv) 
     48        substr from len = genericTake len (genericDrop from cs) -- in bytes 
    4949        subsMatch = [ VMatch $ mkMatchOk f (f + t) (substr f t) [] Map.empty | (f, t) <- subs ] 
    50     return $ mkMatchOk from (from + len) (substr from len) subsMatch Map.empty 
     50 
     51        leftmatch = decodeUTF8 $ genericTake fromBytes csUTF8 
     52        fromChars = genericLength leftmatch 
     53        lenChars  = genericLength $ decodeUTF8 $ (substr fromBytes lenBytes) 
     54 
     55    return $ mkMatchOk fromChars (fromChars + lenChars) (substr fromBytes lenBytes) subsMatch Map.empty 
     56    where 
     57    csUTF8 = encodeUTF8 cs 
    5158 
    5259matchFromMR mr = VMatch $ mkMatchOk 0 0 (decodeUTF8 all) subsMatch Map.empty 
     
    131138rxSplit _  [] = return [] 
    132139rxSplit rx str = do 
    133     match <- str `doMatch` rx  
     140    match <- str `doMatch` rx 
    134141    if not (matchOk match) then return [str] else do 
    135142    if matchFrom match == matchTo match