Changeset 10467

Show
Ignore:
Timestamp:
06/03/06 08:13:11 (3 years ago)
Author:
audreyt
Message:

* Upgrade to FPS trunk for even faster list fusion.

Location:
third-party/fps/Data
Files:
6 modified

Legend:

Unmodified
Added
Removed
  • third-party/fps/Data/ByteString.hs

    r10434 r10467  
    8888        scanl,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 
    8989        scanl1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 
     90        scanr,                  -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 
     91        scanr1,                 -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 
    9092 
    9193        -- ** Accumulating maps 
    9294        mapAccumL,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 
    93 --      mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 
     95        mapAccumR,              -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 
    9496        mapIndexed,             -- :: (Int -> Word8 -> Word8) -> ByteString -> ByteString 
    9597 
     
    186188        -- | These functions perform memcpy(3) operations 
    187189        copy,                   -- :: ByteString -> ByteString 
    188         copyCString,            -- :: CString -> ByteString 
    189         copyCStringLen,         -- :: CStringLen -> ByteString 
     190        copyCString,            -- :: CString -> IO ByteString 
     191        copyCStringLen,         -- :: CStringLen -> IO ByteString 
    190192 
    191193        -- * I\/O with 'ByteString's 
     
    203205        readFile,               -- :: FilePath -> IO ByteString 
    204206        writeFile,              -- :: FilePath -> ByteString -> IO () 
     207        appendFile,             -- :: FilePath -> ByteString -> IO () 
    205208--      mmapFile,               -- :: FilePath -> IO ByteString 
    206209 
     
    220223        unpackList, -- eek, otherwise it gets thrown away by the simplifier 
    221224#endif 
     225        lengthU, maximumU, minimumU 
    222226  ) where 
    223227 
     
    228232                                ,dropWhile,span,break,elem,filter,maximum 
    229233                                ,minimum,all,concatMap,foldl1,foldr1 
    230                                 ,scanl,scanl1,readFile,writeFile,replicate 
     234                                ,scanl,scanl1,scanr,scanr1 
     235                                ,readFile,writeFile,appendFile,replicate 
    231236                                ,getContents,getLine,putStr,putStrLn 
    232237                                ,zip,zipWith,unzip,notElem) 
     
    243248 
    244249-- Control.Exception.bracket not available in yhc or nhc 
    245 import Control.Exception        (bracket) 
     250import Control.Exception        (bracket, assert) 
    246251import Control.Monad            (when) 
    247252 
     
    371376-- | /O(1)/ The empty 'ByteString' 
    372377empty :: ByteString 
    373 empty = inlinePerformIO $ mallocByteString 1 >>= \fp -> return $ PS fp 0 0 
     378empty = unsafeCreate 0 $ const $ return () 
    374379{-# NOINLINE empty #-} 
    375380 
    376381-- | /O(1)/ Convert a 'Word8' into a 'ByteString' 
    377382singleton :: Word8 -> ByteString 
    378 singleton c = unsafePerformIO $ mallocByteString 2 >>= \fp -> do 
    379     withForeignPtr fp $ \p -> poke p c 
    380     return $ PS fp 0 1 
     383singleton c = unsafeCreate 1 $ \p -> poke p c 
    381384{-# INLINE singleton #-} 
    382385 
     
    408411#if !defined(__GLASGOW_HASKELL__) 
    409412 
    410 pack str = create (P.length str) $ \p -> go p str 
     413pack str = unsafeCreate (P.length str) $ \p -> go p str 
    411414    where 
    412415        go _ []     = return () 
     
    415418#else /* hack away */ 
    416419 
    417 pack str = create (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) 
     420pack str = unsafeCreate (P.length str) $ \(Ptr p) -> stToIO (go p 0# str) 
    418421    where 
    419422        go _ _ []        = return () 
     
    468471 
    469472-- TODO just use normal foldr here. 
     473-- 
     474-- or 
     475-- unpack xs | null xs = [] 
     476--           | otherwise = unsafeHead xs : unpack (unsafeTail xs) 
     477-- 
     478-- ? 
    470479 
    471480#endif 
     
    476485-- conversion function 
    477486packWith :: (a -> Word8) -> [a] -> ByteString 
    478 packWith k str = create (P.length str) $ \p -> go p str 
     487packWith k str = unsafeCreate (P.length str) $ \p -> go p str 
    479488    where 
    480489        STRICT2(go) 
     
    501510-- | /O(1)/ Test whether a ByteString is empty. 
    502511null :: ByteString -> Bool 
    503 null (PS _ _ l) = l == 0 
     512null (PS _ _ l) = assert (l >= 0) $ l <= 0 
    504513{-# INLINE null #-} 
    505514 
     515-- --------------------------------------------------------------------- 
    506516-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'. 
    507517length :: ByteString -> Int 
    508 length (PS _ _ l) = l 
     518length (PS _ _ l) = assert (l >= 0) $ l 
     519 
     520-- 
     521-- length/loop fusion. When taking the length of any fuseable loop, 
     522-- rewrite it as a foldl', and thus avoid allocating the result buffer 
     523-- worth around 10% in speed testing. 
     524-- 
    509525 
    510526#if defined(__GLASGOW_HASKELL__) 
     
    512528#endif 
    513529 
    514 {-#  
    515  
    516 -- Translate length into a loop.  
    517 -- Performace ok, but allocates too much, so disable for now. 
    518  
    519   "length/loop" forall f acc s . 
    520   length (loopArr (loopU f acc s)) = foldl' (const . (+1)) (0::Int) (loopArr (loopU f acc s)) 
     530lengthU :: ByteString -> Int 
     531lengthU = foldl' (const . (+1)) (0::Int) 
     532{-# INLINE lengthU #-} 
     533 
     534{-# RULES 
     535 
     536-- v2 fusion 
     537"length/loop" forall loop s . 
     538  length  (loopArr (loopWrapper loop s)) = 
     539  lengthU (loopArr (loopWrapper loop s)) 
    521540 
    522541  #-} 
     542 
     543------------------------------------------------------------------------ 
    523544 
    524545-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different 
    525546-- complexity, as it requires a memcpy. 
    526547cons :: Word8 -> ByteString -> ByteString 
    527 cons c (PS x s l) = create (l+1) $ \p -> withForeignPtr x $ \f -> do 
     548cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 
    528549        poke p c 
    529550        memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l) 
    530551{-# INLINE cons #-} 
    531552 
    532 -- todo fuse 
    533  
    534553-- | /O(n)/ Append a byte to the end of a 'ByteString' 
    535554snoc :: ByteString -> Word8 -> ByteString 
    536 snoc (PS x s l) c = create (l+1) $ \p -> withForeignPtr x $ \f -> do 
     555snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do 
    537556        memcpy p (f `plusPtr` s) (fromIntegral l) 
    538557        poke (p `plusPtr` l) c 
     
    542561 
    543562-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. 
     563-- An exception will be thrown in the case of an empty ByteString. 
    544564head :: ByteString -> Word8 
    545565head ps@(PS x s _) 
     
    549569 
    550570-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty. 
     571-- An exception will be thrown in the case of an empty ByteString. 
    551572tail :: ByteString -> ByteString 
    552573tail (PS p s l) 
     
    556577 
    557578-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty. 
     579-- An exception will be thrown in the case of an empty ByteString. 
    558580last :: ByteString -> Word8 
    559581last ps@(PS x s l) 
     
    563585 
    564586-- | /O(1)/ Return all the elements of a 'ByteString' except the last one. 
     587-- An exception will be thrown in the case of an empty ByteString. 
    565588init :: ByteString -> ByteString 
    566 init (PS p s l) 
    567     | l <= 0    = errorEmptyList "init" 
     589init ps@(PS p s l) 
     590    | null ps   = errorEmptyList "init" 
    568591    | otherwise = PS p s (l-1) 
    569592{-# INLINE init #-} 
     
    582605-- element of @xs@. This function is subject to array fusion. 
    583606map :: (Word8 -> Word8) -> ByteString -> ByteString 
     607#if defined(LOOPU_FUSION) 
    584608map f = loopArr . loopU (mapEFL f) NoAcc 
     609#elif defined(LOOPUP_FUSION) 
     610map f = loopArr . loopUp (mapEFL f) NoAcc 
     611#elif defined(LOOPNOACC_FUSION) 
     612map f = loopArr . loopNoAcc (mapEFL f) 
     613#else 
     614map f = loopArr . loopMap f 
     615#endif 
    585616{-# INLINE map #-} 
    586617 
     
    588619-- slightly faster for one-shot cases. 
    589620map' :: (Word8 -> Word8) -> ByteString -> ByteString 
    590 map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> do 
    591     np <- mallocByteString (len+1) 
    592     withForeignPtr np $ \p -> do 
    593         map_ 0 (a `plusPtr` s) p 
    594         return (PS np 0 len) 
     621map' f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a -> 
     622    create len $ map_ 0 (a `plusPtr` s) 
    595623  where 
    596  
    597624    map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO () 
    598625    STRICT3(map_) 
     
    607634-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. 
    608635reverse :: ByteString -> ByteString 
    609 reverse (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> 
     636reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> 
    610637        c_reverse p (f `plusPtr` s) (fromIntegral l) 
    611638 
    612 {- 
    613 reverse = pack . P.reverse . unpack 
    614 -} 
     639-- todo, fuseable version 
    615640 
    616641-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a 
     
    621646intersperse c ps@(PS x s l) 
    622647    | length ps < 2  = ps 
    623     | otherwise      = create (2*l-1) $ \p -> withForeignPtr x $ \f -> 
     648    | otherwise      = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f -> 
    624649        c_intersperse p (f `plusPtr` s) (fromIntegral l) c 
    625650 
     
    641666-- This function is subject to array fusion. 
    642667foldl :: (a -> Word8 -> a) -> a -> ByteString -> a 
     668#if !defined(LOOPU_FUSION) 
     669foldl f z = loopAcc . loopUp (foldEFL f) z 
     670#else 
    643671foldl f z = loopAcc . loopU (foldEFL f) z 
     672#endif 
    644673{-# INLINE foldl #-} 
    645674 
     
    659688 
    660689-- | 'foldl\'' is like 'foldl', but strict in the accumulator. 
     690-- Though actually foldl is also strict in the accumulator. 
    661691foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a 
    662 foldl' f z = loopAcc . loopU (foldEFL' f) z 
     692foldl' = foldl 
     693-- foldl' f z = loopAcc . loopU (foldEFL' f) z 
    663694{-# INLINE foldl' #-} 
    664695 
     
    667698-- reduces the ByteString using the binary operator, from right to left. 
    668699foldr :: (Word8 -> a -> a) -> a -> ByteString -> a 
    669 foldr k z (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr -> 
    670         go (ptr `plusPtr` s) (ptr `plusPtr` (s+l)) 
    671     where 
    672         STRICT2(go) 
    673         go p q | p == q    = return z 
    674                | otherwise = do c  <- peek p 
    675                                 ws <- go (p `plusPtr` 1) q 
    676                                 return $ c `k` ws 
     700foldr k z = loopAcc . loopDown (foldEFL (flip k)) z 
     701{-# INLINE foldr #-} 
    677702 
    678703-- | 'foldl1' is a variant of 'foldl' that has no starting value 
    679704-- argument, and thus must be applied to non-empty 'ByteStrings'. 
    680 -- This function is subject to array fusion. 
     705-- This function is subject to array fusion.  
     706-- An exception will be thrown in the case of an empty ByteString. 
    681707foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 
    682708foldl1 f ps 
    683709    | null ps   = errorEmptyList "foldl1" 
    684710    | otherwise = foldl f (unsafeHead ps) (unsafeTail ps) 
     711{-# INLINE foldl1 #-} 
    685712 
    686713-- | 'foldl1\'' is like 'foldl1', but strict in the accumulator. 
     714-- An exception will be thrown in the case of an empty ByteString. 
    687715foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 
    688716foldl1' f ps 
    689717    | null ps   = errorEmptyList "foldl1'" 
    690718    | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps) 
     719{-# INLINE foldl1' #-} 
    691720 
    692721-- | 'foldr1' is a variant of 'foldr' that has no starting value argument, 
    693722-- and thus must be applied to non-empty 'ByteString's 
     723-- An exception will be thrown in the case of an empty ByteString. 
    694724foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 
    695725foldr1 f ps 
    696726    | null ps        = errorEmptyList "foldr1" 
    697727    | otherwise      = foldr f (last ps) (init ps) 
     728{-# INLINE foldr1 #-} 
    698729 
    699730-- --------------------------------------------------------------------- 
     
    704735concat []     = empty 
    705736concat [ps]   = ps 
    706 concat xs     = create len $ \ptr -> go xs ptr 
     737concat xs     = unsafeCreate len $ \ptr -> go xs ptr 
    707738  where len = P.sum . P.map length $ xs 
    708739        STRICT2(go) 
     
    745776                                    then go (p `plusPtr` 1) q 
    746777                                    else return False 
    747 -- todo fuse 
     778 
     779------------------------------------------------------------------------ 
    748780 
    749781-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString' 
     782-- This function will fuse. 
     783-- An exception will be thrown in the case of an empty ByteString. 
    750784maximum :: ByteString -> Word8 
    751785maximum xs@(PS x s l) 
    752786    | null xs   = errorEmptyList "maximum" 
    753787    | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> 
    754                     return $ c_maximum (p `plusPtr` s) (fromIntegral l) 
    755 {-# INLINE maximum #-} 
     788                      c_maximum (p `plusPtr` s) (fromIntegral l) 
    756789 
    757790-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString' 
     791-- This function will fuse. 
     792-- An exception will be thrown in the case of an empty ByteString. 
    758793minimum :: ByteString -> Word8 
    759794minimum xs@(PS x s l) 
    760795    | null xs   = errorEmptyList "minimum" 
    761796    | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> 
    762                     return $ c_minimum (p `plusPtr` s) (fromIntegral l) 
    763 {-# INLINE minimum #-} 
    764  
    765 -- fusion is too slow here (10x) 
    766  
    767 {- 
    768 maximum xs@(PS x s l) 
    769     | null xs   = errorEmptyList "maximum" 
    770     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do 
    771                         w <- peek p 
    772                         maximum_ (p `plusPtr` s) 0 l w 
    773  
    774 maximum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 
    775 STRICT4(maximum_) 
    776 maximum_ ptr n m c 
    777     | n >= m    = return c 
    778     | otherwise = do w <- peekByteOff ptr n 
    779                      maximum_ ptr (n+1) m (if w > c then w else c) 
    780  
    781 minimum xs@(PS x s l) 
    782     | null xs   = errorEmptyList "minimum" 
    783     | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> do 
    784                         w <- peek p 
    785                         minimum_ (p `plusPtr` s) 0 l w 
    786  
    787 minimum_ :: Ptr Word8 -> Int -> Int -> Word8 -> IO Word8 
    788 STRICT4(minimum_) 
    789 minimum_ ptr n m c 
    790     | n >= m    = return c 
    791     | otherwise = do w <- peekByteOff ptr n 
    792                      minimum_ ptr (n+1) m (if w < c then w else c) 
    793 -} 
     797                      c_minimum (p `plusPtr` s) (fromIntegral l) 
     798 
     799-- 
     800-- minimum/maximum/loop fusion. As for length (and other folds), when we 
     801-- see we're applied after a fuseable op, switch from using the C 
     802-- version, to the fuseable version. The result should then avoid 
     803-- allocating a buffer. 
     804-- 
     805 
     806#if defined(__GLASGOW_HASKELL__) 
     807{-# INLINE [1] minimum #-} 
     808{-# INLINE [1] maximum #-} 
     809#endif 
     810 
     811maximumU :: ByteString -> Word8 
     812maximumU = foldl1' max 
     813{-# INLINE maximumU #-} 
     814 
     815minimumU :: ByteString -> Word8 
     816minimumU = foldl1' min 
     817{-# INLINE minimumU #-} 
     818 
     819{-# RULES 
     820 
     821"minimum/loop" forall loop s . 
     822  minimum  (loopArr (loopWrapper loop s)) = 
     823  minimumU (loopArr (loopWrapper loop s)) 
     824 
     825"maximum/loop" forall loop s . 
     826  maximum  (loopArr (loopWrapper loop s)) = 
     827  maximumU (loopArr (loopWrapper loop s)) 
     828 
     829  #-} 
     830 
     831------------------------------------------------------------------------ 
    794832 
    795833mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 
     834#if !defined(LOOPU_FUSION) 
     835mapAccumL f z = unSP . loopUp (mapAccumEFL f) z 
     836#else 
    796837mapAccumL f z = unSP . loopU (mapAccumEFL f) z 
    797  
    798 --mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 
     838#endif 
     839{-# INLINE mapAccumL #-} 
     840 
     841mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) 
     842mapAccumR f z = unSP . loopDown (mapAccumEFL f) z 
     843{-# INLINE mapAccumR #-} 
    799844 
    800845-- | /O(n)/ map Word8 functions, provided with the index at each position 
    801846mapIndexed :: (Int -> Word8 -> Word8) -> ByteString -> ByteString 
    802 mapIndexed f = loopArr . loopU (mapIndexEFL f) 0 
     847mapIndexed f = loopArr . loopUp (mapIndexEFL f) 0 
     848{-# INLINE mapIndexed #-} 
    803849 
    804850-- --------------------------------------------------------------------- 
     
    814860-- > last (scanl f z xs) == foldl f z xs. 
    815861scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 
    816 scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) -- extra space 
     862#if !defined(LOOPU_FUSION) 
     863scanl f z ps = loopArr . loopUp (scanEFL f) z $ (ps `snoc` 0) 
     864#else 
     865scanl f z ps = loopArr . loopU (scanEFL f) z $ (ps `snoc` 0) 
     866#endif 
     867 
     868    -- n.b. haskell's List scan returns a list one bigger than the 
     869    -- input, so we need to snoc here to get some extra space, however, 
     870    -- it breaks map/up fusion (i.e. scanl . map no longer fuses) 
    817871{-# INLINE scanl #-} 
    818872 
     
    827881{-# INLINE scanl1 #-} 
    828882 
     883-- | scanr is the right-to-left dual of scanl. 
     884scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString 
     885scanr f z ps = loopArr . loopDown (scanEFL (flip f)) z $ (0 `cons` ps) -- extra space 
     886{-# INLINE scanr #-} 
     887 
     888-- | 'scanr1' is a variant of 'scanr' that has no starting value argument. 
     889scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString 
     890scanr1 f ps 
     891    | null ps   = empty 
     892    | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions 
     893{-# INLINE scanr1 #-} 
     894 
    829895-- --------------------------------------------------------------------- 
    830896-- Unfolds and replicates 
     
    837903-- This implemenation uses @memset(3)@ 
    838904replicate :: Int -> Word8 -> ByteString 
    839 replicate w c | w <= 0    = empty 
    840               | otherwise = create w $ \ptr -> memset ptr c (fromIntegral w) >> return () 
    841  
    842 {- 
    843 -- About 5x slower 
    844 replicate w c = inlinePerformIO $ generate w $ \ptr -> go ptr w 
    845     where 
    846         STRICT2(go) 
    847         go _   0 = return w 
    848         go ptr n = poke ptr c >> go (ptr `plusPtr` 1) (n-1) 
    849 -} 
     905replicate w c 
     906    | w <= 0    = empty 
     907    | otherwise = unsafeCreate w $ \ptr -> 
     908                      memset ptr c (fromIntegral w) >> return () 
    850909 
    851910-- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'  
     
    880939unfoldrN i f x0 
    881940    | i < 0     = (empty, Just x0) 
    882     | otherwise = inlinePerformIO $ do 
    883                     fp <- mallocByteString i 
    884                     withForeignPtr fp (\p -> go fp p x0 0) 
    885   where STRICT4(go) 
    886         go fp p x n = 
     941    | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 
     942  where STRICT3(go) 
     943        go p x n = 
    887944          case f x of 
    888             Nothing      -> let s = copy (PS fp 0 n) 
    889                              in s `seq` return (s, Nothing) 
     945            Nothing      -> return (0, n, Nothing) 
    890946            Just (w,x') 
    891              | n == i    -> return (PS fp 0 i, Just x) 
     947             | n == i    -> return (0, n, Just x) 
    892948             | otherwise -> do poke p w 
    893                                go fp (p `plusPtr` 1) x' (n+1) 
     949                               go (p `plusPtr` 1) x' (n+1) 
    894950 
    895951-- --------------------------------------------------------------------- 
     
    10631119 
    10641120        STRICT1(loop) 
    1065         loop n = do 
    1066             let q = memchr (ptr `plusPtr` n) w (fromIntegral (l-n)) 
    1067             if q == nullPtr 
     1121        loop n = 
     1122            let q = inlinePerformIO $ memchr (ptr `plusPtr` n) 
     1123                                           w (fromIntegral (l-n)) 
     1124            in if q == nullPtr 
    10681125                then [PS x (s+n) (l-n)] 
    10691126                else let i = q `minusPtr` ptr in PS x (s+n) (i-n) : loop (i+1) 
     
    11431200-- 
    11441201joinWithByte :: Word8 -> ByteString -> ByteString -> ByteString 
    1145 joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = create len $ \ptr -> 
     1202joinWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr -> 
    11461203    withForeignPtr ffp $ \fp -> 
    11471204    withForeignPtr fgp $ \gp -> do 
     
    11721229elemIndex c (PS x s l) = inlinePerformIO $ withForeignPtr x $ \p -> do 
    11731230    let p' = p `plusPtr` s 
    1174         q  = memchr p' c (fromIntegral l) 
     1231    q <- memchr p' c (fromIntegral l) 
    11751232    return $ if q == nullPtr then Nothing else Just $! q `minusPtr` p' 
    11761233{-# INLINE elemIndex #-} 
     
    12041261 
    12051262        STRICT1(loop) 
    1206         loop n = let q = memchr (ptr `plusPtr` n) w (fromIntegral (l - n)) 
     1263        loop n = let q = inlinePerformIO $ memchr (ptr `plusPtr` n) 
     1264                                                w (fromIntegral (l - n)) 
    12071265                 in if q == nullPtr 
    12081266                        then [] 
     
    12291287count :: Word8 -> ByteString -> Int 
    12301288count w (PS x s m) = inlinePerformIO $ withForeignPtr x $ \p -> 
    1231     return $ fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w 
     1289    fmap fromIntegral $ c_count (p `plusPtr` s) (fromIntegral m) w 
    12321290{-# INLINE count #-} 
    12331291 
     
    12421300        STRICT3(go) 
    12431301        go p l i = do 
    1244             let q = memchr p w l 
     1302            q <- memchr p w l 
    12451303            if q == nullPtr 
    12461304                then return i 
     
    13031361-- predicate. This function is subject to array fusion. 
    13041362filter :: (Word8 -> Bool) -> ByteString -> ByteString 
     1363#if defined(LOOPU_FUSION) 
    13051364filter p  = loopArr . loopU (filterEFL p) NoAcc 
     1365#elif defined(LOOPUP_FUSION) 
     1366filter p  = loopArr . loopUp (filterEFL p) NoAcc 
     1367#elif defined(LOOPNOACC_FUSION) 
     1368filter p  = loopArr . loopNoAcc (filterEFL p) 
     1369#else 
     1370filter f = loopArr . loopFilter f 
     1371#endif 
    13061372{-# INLINE filter #-} 
    13071373 
     
    13111377filter' k ps@(PS x s l) 
    13121378    | null ps   = ps 
    1313     | otherwise = inlinePerformIO $ generate l $ \p -> withForeignPtr x $ \f -> do 
     1379    | otherwise = unsafePerformIO $ createAndTrim l $ \p -> withForeignPtr x $ \f -> do 
    13141380        t <- go (f `plusPtr` s) p (f `plusPtr` (s + l)) 
    13151381        return (t `minusPtr` p) -- actual length 
     
    14901556-- | /O(n)/ Sort a ByteString efficiently, using counting sort. 
    14911557sort :: ByteString -> ByteString 
    1492 sort (PS input s l) = create l $ \p -> allocaArray 256 $ \arr -> do 
     1558sort (PS input s l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do 
    14931559 
    14941560    memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize))) 
     
    15041570{- 
    15051571sort :: ByteString -> ByteString 
    1506 sort (PS x s l) = create l $ \p -> withForeignPtr x $ \f -> do 
     1572sort (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f -> do 
    15071573        memcpy p (f `plusPtr` s) l 
    15081574        c_qsort p l -- inplace 
     
    15281594-- /strlen(3)/, and thus the complexity is a /O(n)/. 
    15291595packCString :: CString -> ByteString 
    1530 packCString cstr = inlinePerformIO $ do 
     1596packCString cstr = unsafePerformIO $ do 
    15311597    fp <- newForeignPtr_ (castPtr cstr) 
    1532     return $ PS fp 0 (fromIntegral $ c_strlen cstr) 
     1598    l <- c_strlen cstr 
     1599    return $ PS fp 0 (fromIntegral l) 
    15331600 
    15341601-- | /O(1)/ Build a @ByteString@ from a @CStringLen@. This value will 
     
    15371604-- required. 
    15381605packCStringLen :: CStringLen -> ByteString 
    1539 packCStringLen (ptr,len) = inlinePerformIO $ do 
     1606packCStringLen (ptr,len) = unsafePerformIO $ do 
    15401607    fp <- newForeignPtr_ (castPtr ptr) 
    15411608    return $ PS fp 0 (fromIntegral len) 
     
    15441611-- have a @free(3)@ finalizer associated to it. 
    15451612packMallocCString :: CString -> ByteString 
    1546 packMallocCString cstr = inlinePerformIO $ do 
     1613packMallocCString cstr = unsafePerformIO $ do 
    15471614    fp <- newForeignFreePtr (castPtr cstr) 
    1548     return $ PS fp 0 (fromIntegral $ c_strlen cstr) 
     1615    len <- c_strlen cstr 
     1616    return $ PS fp 0 (fromIntegral len) 
    15491617 
    15501618-- | /O(n) construction/ Use a @ByteString@ with a function requiring a null-terminated @CString@.