-- | Module : Data.BaseSystem -- Description : Defines + implements the BaseSystem type-class -- Copyright : Zoey McBride (c) 2026 -- License : BSD-3-Clause -- Maintainer : zoeymcbride@mailbox.org -- Stability : experimental -- -- This file provides methods for encoding/decoding binary data to/from strings -- of digits in some basesystem. Digits can represent a sum of value placements -- (RadixSystem, eg: binary, base10) *OR* a concatination of fixed-width bit -- groups (BitwiseSystem, eg: base64, base32). module Data.BaseSystem ( Encoder, Decoder, BaseSystem (encoder, decoder), ) where import Data.BaseSystem.Alphabet (Alphabet, alphaRadix) import Data.BaseSystem.Alphabet qualified as Alpha import Data.BaseSystem.Internal import Data.Bits ((.&.), (.<<.), (.>>.), (.|.)) import Data.ByteString (ByteString) import Data.ByteString qualified as Bytes import Data.Maybe (fromJust, fromMaybe) import Data.Text qualified as Text -- | Type-class (interface) for implementing encode/decode functionality for -- some data structure w/ Alphabet `a`. class BaseSystem a where encoder :: a -> Encoder decoder :: a -> Decoder -- | Function signature for encoding some ByteString to String of digits. type Encoder = ByteString -> String -- | Function signature for decoding some String of digits to ByteString, given -- all digits in String are valid Symbols. type Decoder = String -> Maybe ByteString -- | Implements BaseSystem for number systems built by modular arithmetic w/ the -- radix. instance BaseSystem RadixSystem where encoder :: RadixSystem -> ByteString -> String encoder (RadixSystem _ abc) = let radix = fromIntegral $ alphaRadix abc in concatMap Text.unpack . divModSymbols . takeWhile divModContinue . iterateInit (\(num, _) -> num `divMod` radix) mkDivMod . bytesToInteger where -- Initial value to iterate on divMod. mkDivMod numerator = (numerator, 0) -- Predicates divMod iteration results. divModContinue (nextinput, curresult) = nextinput > 0 || curresult > 0 -- Resolves the symbols from the result of iterating divMod. divModSymbols = reverse . fromJust . mapM (\(_, digit) -> Alpha.resolveSymbol abc digit) . replaceNull [(undefined, 0)] decoder :: RadixSystem -> String -> Maybe ByteString decoder (RadixSystem _ abc) str = let radix = fromIntegral $ alphaRadix abc in binaryDecoder Nothing (Text.pack str) $ \curvalue symbol -> do value <- Alpha.resolveValue abc symbol return $ curvalue * radix + value -- | Resolves symbols from Alphabet for a BitwiseSystem's encoder. Partitions -- a ByteString into N sized bitgroups where N is the bitwidth of the Alphabet's -- radix. *IMPORTANT*: this function requires the groupsize to be a multiple of -- two because it generates a mask from subtracting it by 1. groupSymbols :: Alphabet -> Int -> Int -> ByteString -> [Alpha.Symbol] groupSymbols abc symbits groupsize groupbytes = let groupint = case bytesToInteger groupbytes of groupnum | fitsGroup groupsize groupbytes groupnum -> fromIntegral groupnum | otherwise -> error "invalid group size" in -- Crash if the implementation isn't complete fromJust -- Extract the value from the shift and resolve its symbol. . mapM (Alpha.resolveSymbol abc . valueExtract . nextInt groupint) -- Take all non-zero shifts. . takeWhile (>= 0) -- Generate a list of shift values from the # bits in groupbytes. $ iterateInit shiftValue mkBitLength groupbytes where -- Gets the next int to extract group. nextInt groupint shift = groupint .>>. shift -- Extracts the first group from LSB from an Int. valueExtract int = fromIntegral $ int .&. (alphaRadix abc - 1) -- Finds the length in bits of a ByteString. mkBitLength bstr = fromIntegral $ 8 * Bytes.length bstr -- Gives the current shift value in iteration. shiftValue bitstotal = bitstotal - symbits -- | Implements encoder/decoder for a BitwiseSystem. instance BaseSystem BitwiseSystem where encoder :: BitwiseSystem -> ByteString -> String encoder (BitwiseSystem _ abc symbits groupbytes _ padmethod) input = let -- Total # of bytes from input. bytestotal = Bytes.length input -- Total # of bits from input. bitstotal = fromIntegral (8 * bytestotal) :: Double -- Actual # of symbols for the # of bits in bytes. numsymbols = ceiling (bitstotal / fromIntegral symbits) -- Minimum # of symbols to put in the resulting string. putsymbols = max 2 numsymbols -- Result from applying the padding method to the # of bytes. padapply = paddingResolve <$> padmethod <*> Just bytestotal in (++ fromMaybe "" padapply) . take putsymbols . concatMap (\(group, _) -> groupString group) . takeWhile (\(group, _) -> Bytes.length group > 0) . iterateInit (\(_, rest) -> Bytes.splitAt groupbytes rest) mkSplit $ minimalBytes bytestotal where -- Inits the iteration for Bytes.splitAt. mkSplit initial = (Bytes.empty, initial) -- Creates a string of symbols from a ByteString splitAt. groupString = Text.unpack . Text.concat . groupSymbols abc symbits groupbytes -- Gives the minimal amount of bytes to produce a correct encoding. {-# INLINE minimalBytes #-} minimalBytes bytestotal | bytestotal == 0 = Bytes.pack [0, 0] | bytestotal == 1 = Bytes.snoc input 0 | bytesmodulus /= 0 = Bytes.append input $ Bytes.replicate zeros 0 | otherwise = input where bytesmodulus = bytestotal `mod` groupbytes zeros = groupbytes - bytesmodulus decoder :: BitwiseSystem -> String -> Maybe ByteString decoder (BitwiseSystem _ abc symbits _ groupsyms pm) str = let -- Gives just padding char if not nothing. padchar = paddingChar <$> pm -- Creates symbol from maybe padding char. padsymbol = Text.singleton <$> padchar -- Removes the trailing padding chars from the Text of str. nopad = Text.dropWhileEnd (\sym -> Just sym == padchar) $ Text.pack str -- Gives the total available characters. totalsyms = Text.length nopad -- Finds the needs # of symbols for the final padding chars. needsyms = groupsyms - (totalsyms `mod` groupsyms) -- Gives the number of bits to correct for missing final padding chars. needbits = needsyms * symbits -- Offset to Integer to byte-align the bits in the final ByteString. bytealign = 8 * ceiling (fromIntegral needbits / 8 :: Double) in binaryDecoder (finalizeBits needsyms bytealign) nopad $ \curvalue symbol -> do value <- Alpha.resolveValue abc symbol return $ if Just symbol /= padsymbol then (curvalue .<<. symbits) .|. value else curvalue .<<. symbits where -- Finds the value for the expected amount of padding regardless if the -- BaseSystem has a PaddingMethod or not. {-# INLINE finalizeBits #-} finalizeBits needsyms align -- If the # of Symbols in last group is the groupsyms size, pass | needsyms == groupsyms = Nothing -- Find the needed # of bits and align the output to first byte. | otherwise = Just $ \finalvalue -> finalvalue .<<. (symbits * needsyms) .>>. align