{-# LANGUAGE RecordWildCards #-}
module Langchain.VectorStore.InMemory
( InMemory (..)
, fromDocuments
, emptyInMemoryVectorStore
, norm
, dotProduct
, cosineSimilarity
) where
import Data.Int (Int64)
import Data.List (sortBy)
import qualified Data.Map.Strict as Map
import Data.Ord (comparing)
import Langchain.DocumentLoader.Core (Document)
import Langchain.Embeddings.Core
import Langchain.VectorStore.Core
dotProduct :: [Float] -> [Float] -> Float
dotProduct :: [Float] -> [Float] -> Float
dotProduct [Float]
a [Float]
b = [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Float] -> Float) -> [Float] -> Float
forall a b. (a -> b) -> a -> b
$ (Float -> Float -> Float) -> [Float] -> [Float] -> [Float]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Float -> Float -> Float
forall a. Num a => a -> a -> a
(*) [Float]
a [Float]
b
norm :: [Float] -> Float
norm :: [Float] -> Float
norm [Float]
a = Float -> Float
forall a. Floating a => a -> a
sqrt (Float -> Float) -> Float -> Float
forall a b. (a -> b) -> a -> b
$ [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Float] -> Float) -> [Float] -> Float
forall a b. (a -> b) -> a -> b
$ (Float -> Float) -> [Float] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (Float -> Int -> Float
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int)) [Float]
a
cosineSimilarity :: [Float] -> [Float] -> Float
cosineSimilarity :: [Float] -> [Float] -> Float
cosineSimilarity [Float]
a [Float]
b = [Float] -> [Float] -> Float
dotProduct [Float]
a [Float]
b Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ ([Float] -> Float
norm [Float]
a Float -> Float -> Float
forall a. Num a => a -> a -> a
* [Float] -> Float
norm [Float]
b)
emptyInMemoryVectorStore :: Embeddings m => m -> InMemory m
emptyInMemoryVectorStore :: forall m. Embeddings m => m -> InMemory m
emptyInMemoryVectorStore m
model = m -> Map Int64 (Document, [Float]) -> InMemory m
forall m.
Embeddings m =>
m -> Map Int64 (Document, [Float]) -> InMemory m
InMemory m
model Map Int64 (Document, [Float])
forall k a. Map k a
Map.empty
fromDocuments :: Embeddings m => m -> [Document] -> IO (Either String (InMemory m))
fromDocuments :: forall m.
Embeddings m =>
m -> [Document] -> IO (Either String (InMemory m))
fromDocuments m
model [Document]
docs = do
let vs :: InMemory m
vs = m -> InMemory m
forall m. Embeddings m => m -> InMemory m
emptyInMemoryVectorStore m
model
InMemory m -> [Document] -> IO (Either String (InMemory m))
forall m. VectorStore m => m -> [Document] -> IO (Either String m)
addDocuments InMemory m
vs [Document]
docs
data Embeddings m => InMemory m = InMemory
{ forall m. Embeddings m => InMemory m -> m
embeddingModel :: m
, forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store :: Map.Map Int64 (Document, [Float])
}
deriving (Int -> InMemory m -> ShowS
[InMemory m] -> ShowS
InMemory m -> String
(Int -> InMemory m -> ShowS)
-> (InMemory m -> String)
-> ([InMemory m] -> ShowS)
-> Show (InMemory m)
forall m. (Embeddings m, Show m) => Int -> InMemory m -> ShowS
forall m. (Embeddings m, Show m) => [InMemory m] -> ShowS
forall m. (Embeddings m, Show m) => InMemory m -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall m. (Embeddings m, Show m) => Int -> InMemory m -> ShowS
showsPrec :: Int -> InMemory m -> ShowS
$cshow :: forall m. (Embeddings m, Show m) => InMemory m -> String
show :: InMemory m -> String
$cshowList :: forall m. (Embeddings m, Show m) => [InMemory m] -> ShowS
showList :: [InMemory m] -> ShowS
Show, InMemory m -> InMemory m -> Bool
(InMemory m -> InMemory m -> Bool)
-> (InMemory m -> InMemory m -> Bool) -> Eq (InMemory m)
forall m. (Embeddings m, Eq m) => InMemory m -> InMemory m -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall m. (Embeddings m, Eq m) => InMemory m -> InMemory m -> Bool
== :: InMemory m -> InMemory m -> Bool
$c/= :: forall m. (Embeddings m, Eq m) => InMemory m -> InMemory m -> Bool
/= :: InMemory m -> InMemory m -> Bool
Eq)
instance Embeddings m => VectorStore (InMemory m) where
addDocuments :: InMemory m -> [Document] -> IO (Either String (InMemory m))
addDocuments InMemory m
inMem [Document]
docs = do
Either String [[Float]]
eRes <- m -> [Document] -> IO (Either String [[Float]])
forall m.
Embeddings m =>
m -> [Document] -> IO (Either String [[Float]])
embedDocuments (InMemory m -> m
forall m. Embeddings m => InMemory m -> m
embeddingModel InMemory m
inMem) [Document]
docs
case Either String [[Float]]
eRes of
Left String
err -> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String (InMemory m) -> IO (Either String (InMemory m)))
-> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a b. (a -> b) -> a -> b
$ String -> Either String (InMemory m)
forall a b. a -> Either a b
Left String
err
Right [[Float]]
floats -> do
let currStore :: Map Int64 (Document, [Float])
currStore = InMemory m -> Map Int64 (Document, [Float])
forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store InMemory m
inMem
mbMaxKey :: Maybe (Int64, (Document, [Float]))
mbMaxKey = (Map Int64 (Document, [Float]) -> Maybe (Int64, (Document, [Float]))
forall k a. Map k a -> Maybe (k, a)
Map.lookupMax Map Int64 (Document, [Float])
currStore)
newStore :: Map Int64 (Document, [Float])
newStore = [(Int64, (Document, [Float]))] -> Map Int64 (Document, [Float])
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(Int64, (Document, [Float]))] -> Map Int64 (Document, [Float]))
-> [(Int64, (Document, [Float]))] -> Map Int64 (Document, [Float])
forall a b. (a -> b) -> a -> b
$ [Int64] -> [(Document, [Float])] -> [(Int64, (Document, [Float]))]
forall a b. [a] -> [b] -> [(a, b)]
zip [(Int64
-> ((Int64, (Document, [Float])) -> Int64)
-> Maybe (Int64, (Document, [Float]))
-> Int64
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int64
1 (\(Int64, (Document, [Float]))
x -> (Int64, (Document, [Float])) -> Int64
forall a b. (a, b) -> a
fst (Int64, (Document, [Float]))
x Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
1) Maybe (Int64, (Document, [Float]))
mbMaxKey) ..] ([Document] -> [[Float]] -> [(Document, [Float])]
forall a b. [a] -> [b] -> [(a, b)]
zip [Document]
docs [[Float]]
floats)
newInMem :: InMemory m
newInMem = InMemory m
inMem {store = Map.union newStore currStore}
Either String (InMemory m) -> IO (Either String (InMemory m))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String (InMemory m) -> IO (Either String (InMemory m)))
-> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a b. (a -> b) -> a -> b
$ InMemory m -> Either String (InMemory m)
forall a b. b -> Either a b
Right InMemory m
newInMem
delete :: InMemory m -> [Int64] -> IO (Either String (InMemory m))
delete InMemory m
inMem [Int64]
ids = do
let currStore :: Map Int64 (Document, [Float])
currStore = InMemory m -> Map Int64 (Document, [Float])
forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store InMemory m
inMem
newStore :: Map Int64 (Document, [Float])
newStore = (Map Int64 (Document, [Float])
-> Int64 -> Map Int64 (Document, [Float]))
-> Map Int64 (Document, [Float])
-> [Int64]
-> Map Int64 (Document, [Float])
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\Map Int64 (Document, [Float])
acc Int64
i -> Int64
-> Map Int64 (Document, [Float]) -> Map Int64 (Document, [Float])
forall k a. Ord k => k -> Map k a -> Map k a
Map.delete Int64
i Map Int64 (Document, [Float])
acc) Map Int64 (Document, [Float])
currStore [Int64]
ids
newInMem :: InMemory m
newInMem = InMemory m
inMem {store = newStore}
Either String (InMemory m) -> IO (Either String (InMemory m))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String (InMemory m) -> IO (Either String (InMemory m)))
-> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a b. (a -> b) -> a -> b
$ InMemory m -> Either String (InMemory m)
forall a b. b -> Either a b
Right InMemory m
newInMem
similaritySearch :: InMemory m -> Text -> Int -> IO (Either String [Document])
similaritySearch InMemory m
vs Text
query Int
k = do
Either String [Float]
eQueryEmbedding <- m -> Text -> IO (Either String [Float])
forall m. Embeddings m => m -> Text -> IO (Either String [Float])
embedQuery (InMemory m -> m
forall m. Embeddings m => InMemory m -> m
embeddingModel InMemory m
vs) Text
query
case Either String [Float]
eQueryEmbedding of
Left String
err -> Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ String -> Either String [Document]
forall a b. a -> Either a b
Left String
err
Right [Float]
queryVec -> InMemory m -> [Float] -> Int -> IO (Either String [Document])
forall m.
VectorStore m =>
m -> [Float] -> Int -> IO (Either String [Document])
similaritySearchByVector InMemory m
vs [Float]
queryVec Int
k
similaritySearchByVector :: InMemory m -> [Float] -> Int -> IO (Either String [Document])
similaritySearchByVector InMemory m
vs [Float]
queryVec Int
k = do
let similarities :: [(Document, Float)]
similarities =
((Document, [Float]) -> (Document, Float))
-> [(Document, [Float])] -> [(Document, Float)]
forall a b. (a -> b) -> [a] -> [b]
map
(\(Document
doc, [Float]
vec) -> (Document
doc, [Float] -> [Float] -> Float
cosineSimilarity [Float]
queryVec [Float]
vec))
(((Int64, (Document, [Float])) -> (Document, [Float]))
-> [(Int64, (Document, [Float]))] -> [(Document, [Float])]
forall a b. (a -> b) -> [a] -> [b]
map (Int64, (Document, [Float])) -> (Document, [Float])
forall a b. (a, b) -> b
snd ([(Int64, (Document, [Float]))] -> [(Document, [Float])])
-> [(Int64, (Document, [Float]))] -> [(Document, [Float])]
forall a b. (a -> b) -> a -> b
$ Map Int64 (Document, [Float]) -> [(Int64, (Document, [Float]))]
forall k a. Map k a -> [(k, a)]
Map.toList (Map Int64 (Document, [Float]) -> [(Int64, (Document, [Float]))])
-> Map Int64 (Document, [Float]) -> [(Int64, (Document, [Float]))]
forall a b. (a -> b) -> a -> b
$ InMemory m -> Map Int64 (Document, [Float])
forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store InMemory m
vs)
sorted :: [(Document, Float)]
sorted = ((Document, Float) -> (Document, Float) -> Ordering)
-> [(Document, Float)] -> [(Document, Float)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Document, Float) -> Float)
-> (Document, Float) -> (Document, Float) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Float -> Float
forall a. Num a => a -> a
negate (Float -> Float)
-> ((Document, Float) -> Float) -> (Document, Float) -> Float
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Document, Float) -> Float
forall a b. (a, b) -> b
snd)) [(Document, Float)]
similarities
topK :: [(Document, Float)]
topK = Int -> [(Document, Float)] -> [(Document, Float)]
forall a. Int -> [a] -> [a]
take Int
k [(Document, Float)]
sorted
Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ [Document] -> Either String [Document]
forall a b. b -> Either a b
Right ([Document] -> Either String [Document])
-> [Document] -> Either String [Document]
forall a b. (a -> b) -> a -> b
$ ((Document, Float) -> Document)
-> [(Document, Float)] -> [Document]
forall a b. (a -> b) -> [a] -> [b]
map (Document, Float) -> Document
forall a b. (a, b) -> a
fst [(Document, Float)]
topK