module NixTree.Data.InvertedIndex
  ( InvertedIndex,
    iiFromList,
    iiInsert,
    iiSearch,
  )
where

import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text

data InvertedIndex a = InvertedIndex
  { forall a. InvertedIndex a -> Map Text a
iiElems :: Map Text a,
    forall a. InvertedIndex a -> Map Char (Set Text)
iiUnigrams :: Map Char (Set Text),
    forall a. InvertedIndex a -> Map (Char, Char) (Set Text)
iiBigrams :: Map (Char, Char) (Set Text),
    forall a. InvertedIndex a -> Map (Char, Char, Char) (Set Text)
iiTrigrams :: Map (Char, Char, Char) (Set Text)
  }
  deriving ((forall x. InvertedIndex a -> Rep (InvertedIndex a) x)
-> (forall x. Rep (InvertedIndex a) x -> InvertedIndex a)
-> Generic (InvertedIndex a)
forall x. Rep (InvertedIndex a) x -> InvertedIndex a
forall x. InvertedIndex a -> Rep (InvertedIndex a) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall a x. Rep (InvertedIndex a) x -> InvertedIndex a
forall a x. InvertedIndex a -> Rep (InvertedIndex a) x
$cfrom :: forall a x. InvertedIndex a -> Rep (InvertedIndex a) x
from :: forall x. InvertedIndex a -> Rep (InvertedIndex a) x
$cto :: forall a x. Rep (InvertedIndex a) x -> InvertedIndex a
to :: forall x. Rep (InvertedIndex a) x -> InvertedIndex a
Generic, Int -> InvertedIndex a -> ShowS
[InvertedIndex a] -> ShowS
InvertedIndex a -> String
(Int -> InvertedIndex a -> ShowS)
-> (InvertedIndex a -> String)
-> ([InvertedIndex a] -> ShowS)
-> Show (InvertedIndex a)
forall a. Show a => Int -> InvertedIndex a -> ShowS
forall a. Show a => [InvertedIndex a] -> ShowS
forall a. Show a => InvertedIndex a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> InvertedIndex a -> ShowS
showsPrec :: Int -> InvertedIndex a -> ShowS
$cshow :: forall a. Show a => InvertedIndex a -> String
show :: InvertedIndex a -> String
$cshowList :: forall a. Show a => [InvertedIndex a] -> ShowS
showList :: [InvertedIndex a] -> ShowS
Show)

instance (NFData a) => NFData (InvertedIndex a)

iiInsert :: Text -> a -> InvertedIndex a -> InvertedIndex a
iiInsert :: forall a. Text -> a -> InvertedIndex a -> InvertedIndex a
iiInsert Text
txt a
val InvertedIndex {Map Text a
iiElems :: forall a. InvertedIndex a -> Map Text a
iiElems :: Map Text a
iiElems, Map Char (Set Text)
iiUnigrams :: forall a. InvertedIndex a -> Map Char (Set Text)
iiUnigrams :: Map Char (Set Text)
iiUnigrams, Map (Char, Char) (Set Text)
iiBigrams :: forall a. InvertedIndex a -> Map (Char, Char) (Set Text)
iiBigrams :: Map (Char, Char) (Set Text)
iiBigrams, Map (Char, Char, Char) (Set Text)
iiTrigrams :: forall a. InvertedIndex a -> Map (Char, Char, Char) (Set Text)
iiTrigrams :: Map (Char, Char, Char) (Set Text)
iiTrigrams} =
  InvertedIndex
    { iiElems :: Map Text a
iiElems = Text -> a -> Map Text a -> Map Text a
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Text
txt a
val Map Text a
iiElems,
      iiUnigrams :: Map Char (Set Text)
iiUnigrams = Map Char (Set Text) -> Set Char -> Map Char (Set Text)
forall {k}. Ord k => Map k (Set Text) -> Set k -> Map k (Set Text)
combine Map Char (Set Text)
iiUnigrams (Text -> Set Char
unigramsOf Text
txt),
      iiBigrams :: Map (Char, Char) (Set Text)
iiBigrams = Map (Char, Char) (Set Text)
-> Set (Char, Char) -> Map (Char, Char) (Set Text)
forall {k}. Ord k => Map k (Set Text) -> Set k -> Map k (Set Text)
combine Map (Char, Char) (Set Text)
iiBigrams (Text -> Set (Char, Char)
bigramsOf Text
txt),
      iiTrigrams :: Map (Char, Char, Char) (Set Text)
iiTrigrams = Map (Char, Char, Char) (Set Text)
-> Set (Char, Char, Char) -> Map (Char, Char, Char) (Set Text)
forall {k}. Ord k => Map k (Set Text) -> Set k -> Map k (Set Text)
combine Map (Char, Char, Char) (Set Text)
iiTrigrams (Text -> Set (Char, Char, Char)
trigramsOf Text
txt)
    }
  where
    combine :: Map k (Set Text) -> Set k -> Map k (Set Text)
combine Map k (Set Text)
orig Set k
chrs =
      (Set Text -> Set Text -> Set Text)
-> Map k (Set Text) -> Map k (Set Text) -> Map k (Set Text)
forall k a. Ord k => (a -> a -> a) -> Map k a -> Map k a -> Map k a
Map.unionWith
        Set Text -> Set Text -> Set Text
forall a. Semigroup a => a -> a -> a
(<>)
        Map k (Set Text)
orig
        (Set Text -> Set k -> Map k (Set Text)
forall v k. v -> Set k -> Map k v
setToMap (Text -> Set Text
forall a. a -> Set a
Set.singleton Text
txt) Set k
chrs)

iiFromList :: (Foldable f) => f (Text, a) -> InvertedIndex a
iiFromList :: forall (f :: * -> *) a.
Foldable f =>
f (Text, a) -> InvertedIndex a
iiFromList =
  (InvertedIndex a -> (Text, a) -> InvertedIndex a)
-> InvertedIndex a -> f (Text, a) -> InvertedIndex a
forall b a. (b -> a -> b) -> b -> f a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl'
    (((Text, a) -> InvertedIndex a -> InvertedIndex a)
-> InvertedIndex a -> (Text, a) -> InvertedIndex a
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((Text -> a -> InvertedIndex a -> InvertedIndex a)
-> (Text, a) -> InvertedIndex a -> InvertedIndex a
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> a -> InvertedIndex a -> InvertedIndex a
forall a. Text -> a -> InvertedIndex a -> InvertedIndex a
iiInsert))
    (Map Text a
-> Map Char (Set Text)
-> Map (Char, Char) (Set Text)
-> Map (Char, Char, Char) (Set Text)
-> InvertedIndex a
forall a.
Map Text a
-> Map Char (Set Text)
-> Map (Char, Char) (Set Text)
-> Map (Char, Char, Char) (Set Text)
-> InvertedIndex a
InvertedIndex Map Text a
forall k a. Map k a
Map.empty Map Char (Set Text)
forall k a. Map k a
Map.empty Map (Char, Char) (Set Text)
forall k a. Map k a
Map.empty Map (Char, Char, Char) (Set Text)
forall k a. Map k a
Map.empty)

setToMap :: v -> Set k -> Map k v
setToMap :: forall v k. v -> Set k -> Map k v
setToMap v
v = [(k, v)] -> Map k v
forall k a. [(k, a)] -> Map k a
Map.fromDistinctAscList ([(k, v)] -> Map k v) -> (Set k -> [(k, v)]) -> Set k -> Map k v
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (k -> (k, v)) -> [k] -> [(k, v)]
forall a b. (a -> b) -> [a] -> [b]
map (,v
v) ([k] -> [(k, v)]) -> (Set k -> [k]) -> Set k -> [(k, v)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set k -> [k]
forall a. Set a -> [a]
Set.toAscList

unigramsOf :: Text -> Set Char
unigramsOf :: Text -> Set Char
unigramsOf = String -> Set Char
forall a. Ord a => [a] -> Set a
Set.fromList (String -> Set Char) -> (Text -> String) -> Text -> Set Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Text.unpack (Text -> String) -> (Text -> Text) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.toLower

bigramsOf :: Text -> Set (Char, Char)
bigramsOf :: Text -> Set (Char, Char)
bigramsOf Text
txt = case Text -> String
Text.unpack (Text -> Text
Text.toLower Text
txt) of
  p1 :: String
p1@(Char
_ : String
p2) -> [(Char, Char)] -> Set (Char, Char)
forall a. Ord a => [a] -> Set a
Set.fromList ([(Char, Char)] -> Set (Char, Char))
-> [(Char, Char)] -> Set (Char, Char)
forall a b. (a -> b) -> a -> b
$ String -> String -> [(Char, Char)]
forall a b. [a] -> [b] -> [(a, b)]
zip String
p1 String
p2
  String
_ -> Set (Char, Char)
forall a. Set a
Set.empty

trigramsOf :: Text -> Set (Char, Char, Char)
trigramsOf :: Text -> Set (Char, Char, Char)
trigramsOf Text
txt = case Text -> String
Text.unpack (Text -> Text
Text.toLower Text
txt) of
  p1 :: String
p1@(Char
_ : p2 :: String
p2@(Char
_ : String
p3)) -> [(Char, Char, Char)] -> Set (Char, Char, Char)
forall a. Ord a => [a] -> Set a
Set.fromList ([(Char, Char, Char)] -> Set (Char, Char, Char))
-> [(Char, Char, Char)] -> Set (Char, Char, Char)
forall a b. (a -> b) -> a -> b
$ String -> String -> String -> [(Char, Char, Char)]
forall a b c. [a] -> [b] -> [c] -> [(a, b, c)]
zip3 String
p1 String
p2 String
p3
  String
_ -> Set (Char, Char, Char)
forall a. Set a
Set.empty

iiSearch :: forall a. Text -> InvertedIndex a -> Map Text a
iiSearch :: forall a. Text -> InvertedIndex a -> Map Text a
iiSearch Text
txt InvertedIndex {Map Text a
iiElems :: forall a. InvertedIndex a -> Map Text a
iiElems :: Map Text a
iiElems, Map Char (Set Text)
iiUnigrams :: forall a. InvertedIndex a -> Map Char (Set Text)
iiUnigrams :: Map Char (Set Text)
iiUnigrams, Map (Char, Char) (Set Text)
iiBigrams :: forall a. InvertedIndex a -> Map (Char, Char) (Set Text)
iiBigrams :: Map (Char, Char) (Set Text)
iiBigrams, Map (Char, Char, Char) (Set Text)
iiTrigrams :: forall a. InvertedIndex a -> Map (Char, Char, Char) (Set Text)
iiTrigrams :: Map (Char, Char, Char) (Set Text)
iiTrigrams}
  | Text -> Int
Text.length Text
txt Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Map Text a
iiElems
  | Text -> Int
Text.length Text
txt Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = (Text -> Set Char) -> Map Char (Set Text) -> Map Text a
forall c.
Ord c =>
(Text -> Set c) -> Map c (Set Text) -> Map Text a
using Text -> Set Char
unigramsOf Map Char (Set Text)
iiUnigrams
  | Text -> Int
Text.length Text
txt Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 = (Text -> Set (Char, Char))
-> Map (Char, Char) (Set Text) -> Map Text a
forall c.
Ord c =>
(Text -> Set c) -> Map c (Set Text) -> Map Text a
using Text -> Set (Char, Char)
bigramsOf Map (Char, Char) (Set Text)
iiBigrams
  | Bool
otherwise = (Text -> Set (Char, Char, Char))
-> Map (Char, Char, Char) (Set Text) -> Map Text a
forall c.
Ord c =>
(Text -> Set c) -> Map c (Set Text) -> Map Text a
using Text -> Set (Char, Char, Char)
trigramsOf Map (Char, Char, Char) (Set Text)
iiTrigrams
  where
    lowerTxt :: Text
lowerTxt = Text -> Text
Text.toLower Text
txt
    using :: (Ord c) => (Text -> Set c) -> Map c (Set Text) -> Map Text a
    using :: forall c.
Ord c =>
(Text -> Set c) -> Map c (Set Text) -> Map Text a
using Text -> Set c
getGrams Map c (Set Text)
m =
      Map c (Set Text) -> Map c () -> Map c (Set Text)
forall k a b. Ord k => Map k a -> Map k b -> Map k a
Map.intersection Map c (Set Text)
m (() -> Set c -> Map c ()
forall v k. v -> Set k -> Map k v
setToMap () (Text -> Set c
getGrams Text
txt))
        Map c (Set Text) -> (Map c (Set Text) -> [Set Text]) -> [Set Text]
forall a b. a -> (a -> b) -> b
& Map c (Set Text) -> [Set Text]
forall k a. Map k a -> [a]
Map.elems
        [Set Text] -> ([Set Text] -> Set Text) -> Set Text
forall a b. a -> (a -> b) -> b
& \case
          [] -> Set Text
forall a. Set a
Set.empty
          Set Text
x : [Set Text]
xs -> (Set Text -> Set Text -> Set Text)
-> Set Text -> [Set Text] -> Set Text
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Set Text -> Set Text -> Set Text
forall a. Ord a => Set a -> Set a -> Set a
Set.intersection Set Text
x [Set Text]
xs
        Set Text -> (Set Text -> Set Text) -> Set Text
forall a b. a -> (a -> b) -> b
& (Text -> Bool) -> Set Text -> Set Text
forall a. (a -> Bool) -> Set a -> Set a
Set.filter (\Text
t -> Text
lowerTxt Text -> Text -> Bool
`Text.isInfixOf` Text -> Text
Text.toLower Text
t)
        Set Text -> (Set Text -> Map Text a) -> Map Text a
forall a b. a -> (a -> b) -> b
& Map Text a -> Set Text -> Map Text a
forall k a. Ord k => Map k a -> Set k -> Map k a
Map.restrictKeys Map Text a
iiElems