module GHC.IO.Encoding.Failure ( CodingFailureMode(..), codingFailureModeSuffix, isSurrogate, recoverDecode, recoverEncode ) where import GHC.IO import GHC.IO.Buffer import GHC.IO.Exception import GHC.Base import GHC.Word import GHC.Show import GHC.Num import GHC.Real ( fromIntegral ) import Data.Maybe data CodingFailureMode = ErrorOnCodingFailure | IgnoreCodingFailure | TransliterateCodingFailure | RoundtripFailure deriving (Show) codingFailureModeSuffix :: CodingFailureMode -> String codingFailureModeSuffix ErrorOnCodingFailure = "" codingFailureModeSuffix IgnoreCodingFailure = "//IGNORE" codingFailureModeSuffix TransliterateCodingFailure = "//TRANSLIT" codingFailureModeSuffix RoundtripFailure = "//ROUNDTRIP" unrepresentableChar :: Char unrepresentableChar = '\xFFFD' isSurrogate :: Char -> Bool isSurrogate c = (0xD800 <= x && x <= 0xDBFF) || (0xDC00 <= x && x <= 0xDFFF) where x = ord c escapeToRoundtripCharacterSurrogate :: Word8 -> Char escapeToRoundtripCharacterSurrogate b | b < 128 = chr (fromIntegral b) | otherwise = chr (0xDC00 + fromIntegral b) unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8 unescapeRoundtripCharacterSurrogate c | 0xDC80 <= x && x < 0xDD00 = Just (fromIntegral x) | otherwise = Nothing where x = ord c recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char) recoverDecode cfm input@Buffer{ bufRaw=iraw, bufL=ir, bufR=_ } output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow } = do case cfm of ErrorOnCodingFailure -> ioe_decodingError IgnoreCodingFailure -> return (input { bufL=ir+1 }, output) TransliterateCodingFailure -> do ow' <- writeCharBuf oraw ow unrepresentableChar return (input { bufL=ir+1 }, output { bufR=ow' }) RoundtripFailure -> do b <- readWord8Buf iraw ir ow' <- writeCharBuf oraw ow (escapeToRoundtripCharacterSurrogate b) return (input { bufL=ir+1 }, output { bufR=ow' }) recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8) recoverEncode cfm input@Buffer{ bufRaw=iraw, bufL=ir, bufR=_ } output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow } = do (c,ir') <- readCharBuf iraw ir case cfm of IgnoreCodingFailure -> return (input { bufL=ir' }, output) TransliterateCodingFailure -> do if c == '?' then return (input { bufL=ir' }, output) else do _ir' <- writeCharBuf iraw ir '?' return (input, output) RoundtripFailure | Just x <- unescapeRoundtripCharacterSurrogate c -> do writeWord8Buf oraw ow x return (input { bufL=ir' }, output { bufR=ow+1 }) _ -> ioe_encodingError ioe_decodingError :: IO a ioe_decodingError = ioException (IOError Nothing InvalidArgument "recoverDecode" "invalid byte sequence" Nothing Nothing) ioe_encodingError :: IO a ioe_encodingError = ioException (IOError Nothing InvalidArgument "recoverEncode" "invalid character" Nothing Nothing)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4