module NixTree.Data.InvertedIndex ( InvertedIndex, iiFromList, iiInsert, iiSearch, ) where import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.Text as Text data InvertedIndex a = InvertedIndex { forall a. InvertedIndex a -> Map Text a iiElems :: Map Text a, forall a. InvertedIndex a -> Map Char (Set Text) iiUnigrams :: Map Char (Set Text), forall a. InvertedIndex a -> Map (Char, Char) (Set Text) iiBigrams :: Map (Char, Char) (Set Text), forall a. InvertedIndex a -> Map (Char, Char, Char) (Set Text) iiTrigrams :: Map (Char, Char, Char) (Set Text) } deriving ((forall x. InvertedIndex a -> Rep (InvertedIndex a) x) -> (forall x. Rep (InvertedIndex a) x -> InvertedIndex a) -> Generic (InvertedIndex a) forall x. Rep (InvertedIndex a) x -> InvertedIndex a forall x. InvertedIndex a -> Rep (InvertedIndex a) x forall a. (forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a forall a x. Rep (InvertedIndex a) x -> InvertedIndex a forall a x. InvertedIndex a -> Rep (InvertedIndex a) x $cfrom :: forall a x. InvertedIndex a -> Rep (InvertedIndex a) x from :: forall x. InvertedIndex a -> Rep (InvertedIndex a) x $cto :: forall a x. Rep (InvertedIndex a) x -> InvertedIndex a to :: forall x. Rep (InvertedIndex a) x -> InvertedIndex a Generic, Int -> InvertedIndex a -> ShowS [InvertedIndex a] -> ShowS InvertedIndex a -> String (Int -> InvertedIndex a -> ShowS) -> (InvertedIndex a -> String) -> ([InvertedIndex a] -> ShowS) -> Show (InvertedIndex a) forall a. Show a => Int -> InvertedIndex a -> ShowS forall a. Show a => [InvertedIndex a] -> ShowS forall a. Show a => InvertedIndex a -> String forall a. (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a $cshowsPrec :: forall a. Show a => Int -> InvertedIndex a -> ShowS showsPrec :: Int -> InvertedIndex a -> ShowS $cshow :: forall a. Show a => InvertedIndex a -> String show :: InvertedIndex a -> String $cshowList :: forall a. Show a => [InvertedIndex a] -> ShowS showList :: [InvertedIndex a] -> ShowS Show) instance (NFData a) => NFData (InvertedIndex a) iiInsert :: Text -> a -> InvertedIndex a -> InvertedIndex a iiInsert :: forall a. Text -> a -> InvertedIndex a -> InvertedIndex a iiInsert Text txt a val InvertedIndex {Map Text a iiElems :: forall a. InvertedIndex a -> Map Text a iiElems :: Map Text a iiElems, Map Char (Set Text) iiUnigrams :: forall a. InvertedIndex a -> Map Char (Set Text) iiUnigrams :: Map Char (Set Text) iiUnigrams, Map (Char, Char) (Set Text) iiBigrams :: forall a. InvertedIndex a -> Map (Char, Char) (Set Text) iiBigrams :: Map (Char, Char) (Set Text) iiBigrams, Map (Char, Char, Char) (Set Text) iiTrigrams :: forall a. InvertedIndex a -> Map (Char, Char, Char) (Set Text) iiTrigrams :: Map (Char, Char, Char) (Set Text) iiTrigrams} = InvertedIndex { iiElems :: Map Text a iiElems = Text -> a -> Map Text a -> Map Text a forall k a. Ord k => k -> a -> Map k a -> Map k a Map.insert Text txt a val Map Text a iiElems, iiUnigrams :: Map Char (Set Text) iiUnigrams = Map Char (Set Text) -> Set Char -> Map Char (Set Text) forall {k}. Ord k => Map k (Set Text) -> Set k -> Map k (Set Text) combine Map Char (Set Text) iiUnigrams (Text -> Set Char unigramsOf Text txt), iiBigrams :: Map (Char, Char) (Set Text) iiBigrams = Map (Char, Char) (Set Text) -> Set (Char, Char) -> Map (Char, Char) (Set Text) forall {k}. Ord k => Map k (Set Text) -> Set k -> Map k (Set Text) combine Map (Char, Char) (Set Text) iiBigrams (Text -> Set (Char, Char) bigramsOf Text txt), iiTrigrams :: Map (Char, Char, Char) (Set Text) iiTrigrams = Map (Char, Char, Char) (Set Text) -> Set (Char, Char, Char) -> Map (Char, Char, Char) (Set Text) forall {k}. Ord k => Map k (Set Text) -> Set k -> Map k (Set Text) combine Map (Char, Char, Char) (Set Text) iiTrigrams (Text -> Set (Char, Char, Char) trigramsOf Text txt) } where combine :: Map k (Set Text) -> Set k -> Map k (Set Text) combine Map k (Set Text) orig Set k chrs = (Set Text -> Set Text -> Set Text) -> Map k (Set Text) -> Map k (Set Text) -> Map k (Set Text) forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a Map.unionWith Set Text -> Set Text -> Set Text forall a. Semigroup a => a -> a -> a (<>) Map k (Set Text) orig (Set Text -> Set k -> Map k (Set Text) forall v k. v -> Set k -> Map k v setToMap (Text -> Set Text forall a. a -> Set a Set.singleton Text txt) Set k chrs) iiFromList :: (Foldable f) => f (Text, a) -> InvertedIndex a iiFromList :: forall (f :: * -> *) a. Foldable f => f (Text, a) -> InvertedIndex a iiFromList = (InvertedIndex a -> (Text, a) -> InvertedIndex a) -> InvertedIndex a -> f (Text, a) -> InvertedIndex a forall b a. (b -> a -> b) -> b -> f a -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b foldl' (((Text, a) -> InvertedIndex a -> InvertedIndex a) -> InvertedIndex a -> (Text, a) -> InvertedIndex a forall a b c. (a -> b -> c) -> b -> a -> c flip ((Text -> a -> InvertedIndex a -> InvertedIndex a) -> (Text, a) -> InvertedIndex a -> InvertedIndex a forall a b c. (a -> b -> c) -> (a, b) -> c uncurry Text -> a -> InvertedIndex a -> InvertedIndex a forall a. Text -> a -> InvertedIndex a -> InvertedIndex a iiInsert)) (Map Text a -> Map Char (Set Text) -> Map (Char, Char) (Set Text) -> Map (Char, Char, Char) (Set Text) -> InvertedIndex a forall a. Map Text a -> Map Char (Set Text) -> Map (Char, Char) (Set Text) -> Map (Char, Char, Char) (Set Text) -> InvertedIndex a InvertedIndex Map Text a forall k a. Map k a Map.empty Map Char (Set Text) forall k a. Map k a Map.empty Map (Char, Char) (Set Text) forall k a. Map k a Map.empty Map (Char, Char, Char) (Set Text) forall k a. Map k a Map.empty) setToMap :: v -> Set k -> Map k v setToMap :: forall v k. v -> Set k -> Map k v setToMap v v = [(k, v)] -> Map k v forall k a. [(k, a)] -> Map k a Map.fromDistinctAscList ([(k, v)] -> Map k v) -> (Set k -> [(k, v)]) -> Set k -> Map k v forall b c a. (b -> c) -> (a -> b) -> a -> c . (k -> (k, v)) -> [k] -> [(k, v)] forall a b. (a -> b) -> [a] -> [b] map (,v v) ([k] -> [(k, v)]) -> (Set k -> [k]) -> Set k -> [(k, v)] forall b c a. (b -> c) -> (a -> b) -> a -> c . Set k -> [k] forall a. Set a -> [a] Set.toAscList unigramsOf :: Text -> Set Char unigramsOf :: Text -> Set Char unigramsOf = String -> Set Char forall a. Ord a => [a] -> Set a Set.fromList (String -> Set Char) -> (Text -> String) -> Text -> Set Char forall b c a. (b -> c) -> (a -> b) -> a -> c . Text -> String Text.unpack (Text -> String) -> (Text -> Text) -> Text -> String forall b c a. (b -> c) -> (a -> b) -> a -> c . Text -> Text Text.toLower bigramsOf :: Text -> Set (Char, Char) bigramsOf :: Text -> Set (Char, Char) bigramsOf Text txt = case Text -> String Text.unpack (Text -> Text Text.toLower Text txt) of p1 :: String p1@(Char _ : String p2) -> [(Char, Char)] -> Set (Char, Char) forall a. Ord a => [a] -> Set a Set.fromList ([(Char, Char)] -> Set (Char, Char)) -> [(Char, Char)] -> Set (Char, Char) forall a b. (a -> b) -> a -> b $ String -> String -> [(Char, Char)] forall a b. [a] -> [b] -> [(a, b)] zip String p1 String p2 String _ -> Set (Char, Char) forall a. Set a Set.empty trigramsOf :: Text -> Set (Char, Char, Char) trigramsOf :: Text -> Set (Char, Char, Char) trigramsOf Text txt = case Text -> String Text.unpack (Text -> Text Text.toLower Text txt) of p1 :: String p1@(Char _ : p2 :: String p2@(Char _ : String p3)) -> [(Char, Char, Char)] -> Set (Char, Char, Char) forall a. Ord a => [a] -> Set a Set.fromList ([(Char, Char, Char)] -> Set (Char, Char, Char)) -> [(Char, Char, Char)] -> Set (Char, Char, Char) forall a b. (a -> b) -> a -> b $ String -> String -> String -> [(Char, Char, Char)] forall a b c. [a] -> [b] -> [c] -> [(a, b, c)] zip3 String p1 String p2 String p3 String _ -> Set (Char, Char, Char) forall a. Set a Set.empty iiSearch :: forall a. Text -> InvertedIndex a -> Map Text a iiSearch :: forall a. Text -> InvertedIndex a -> Map Text a iiSearch Text txt InvertedIndex {Map Text a iiElems :: forall a. InvertedIndex a -> Map Text a iiElems :: Map Text a iiElems, Map Char (Set Text) iiUnigrams :: forall a. InvertedIndex a -> Map Char (Set Text) iiUnigrams :: Map Char (Set Text) iiUnigrams, Map (Char, Char) (Set Text) iiBigrams :: forall a. InvertedIndex a -> Map (Char, Char) (Set Text) iiBigrams :: Map (Char, Char) (Set Text) iiBigrams, Map (Char, Char, Char) (Set Text) iiTrigrams :: forall a. InvertedIndex a -> Map (Char, Char, Char) (Set Text) iiTrigrams :: Map (Char, Char, Char) (Set Text) iiTrigrams} | Text -> Int Text.length Text txt Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 0 = Map Text a iiElems | Text -> Int Text.length Text txt Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 1 = (Text -> Set Char) -> Map Char (Set Text) -> Map Text a forall c. Ord c => (Text -> Set c) -> Map c (Set Text) -> Map Text a using Text -> Set Char unigramsOf Map Char (Set Text) iiUnigrams | Text -> Int Text.length Text txt Int -> Int -> Bool forall a. Eq a => a -> a -> Bool == Int 2 = (Text -> Set (Char, Char)) -> Map (Char, Char) (Set Text) -> Map Text a forall c. Ord c => (Text -> Set c) -> Map c (Set Text) -> Map Text a using Text -> Set (Char, Char) bigramsOf Map (Char, Char) (Set Text) iiBigrams | Bool otherwise = (Text -> Set (Char, Char, Char)) -> Map (Char, Char, Char) (Set Text) -> Map Text a forall c. Ord c => (Text -> Set c) -> Map c (Set Text) -> Map Text a using Text -> Set (Char, Char, Char) trigramsOf Map (Char, Char, Char) (Set Text) iiTrigrams where lowerTxt :: Text lowerTxt = Text -> Text Text.toLower Text txt using :: (Ord c) => (Text -> Set c) -> Map c (Set Text) -> Map Text a using :: forall c. Ord c => (Text -> Set c) -> Map c (Set Text) -> Map Text a using Text -> Set c getGrams Map c (Set Text) m = Map c (Set Text) -> Map c () -> Map c (Set Text) forall k a b. Ord k => Map k a -> Map k b -> Map k a Map.intersection Map c (Set Text) m (() -> Set c -> Map c () forall v k. v -> Set k -> Map k v setToMap () (Text -> Set c getGrams Text txt)) Map c (Set Text) -> (Map c (Set Text) -> [Set Text]) -> [Set Text] forall a b. a -> (a -> b) -> b & Map c (Set Text) -> [Set Text] forall k a. Map k a -> [a] Map.elems [Set Text] -> ([Set Text] -> Set Text) -> Set Text forall a b. a -> (a -> b) -> b & \case [] -> Set Text forall a. Set a Set.empty Set Text x : [Set Text] xs -> (Set Text -> Set Text -> Set Text) -> Set Text -> [Set Text] -> Set Text forall b a. (b -> a -> b) -> b -> [a] -> b forall (t :: * -> *) b a. Foldable t => (b -> a -> b) -> b -> t a -> b foldl' Set Text -> Set Text -> Set Text forall a. Ord a => Set a -> Set a -> Set a Set.intersection Set Text x [Set Text] xs Set Text -> (Set Text -> Set Text) -> Set Text forall a b. a -> (a -> b) -> b & (Text -> Bool) -> Set Text -> Set Text forall a. (a -> Bool) -> Set a -> Set a Set.filter (\Text t -> Text lowerTxt Text -> Text -> Bool `Text.isInfixOf` Text -> Text Text.toLower Text t) Set Text -> (Set Text -> Map Text a) -> Map Text a forall a b. a -> (a -> b) -> b & Map Text a -> Set Text -> Map Text a forall k a. Ord k => Map k a -> Set k -> Map k a Map.restrictKeys Map Text a iiElems