{-# LANGUAGE RecordWildCards #-}

{- |
Module      : Langchain.VectorStore.InMemory
Description : In-memory vector store implementation for LangChain Haskell
Copyright   : (c) 2025 Tushar Adhatrao
License     : MIT
Maintainer  : Tushar Adhatrao <tusharadhatrao@gmail.com>
Stability   : experimental

In-memory vector store implementation following LangChain's patterns, supporting:

- Document storage with embeddings
- Cosine similarity search
- Integration with embedding models

Example usage:

@
-- Create store with Ollama embeddings
ollamaEmb = OllamaEmbeddings "nomic-embed" Nothing Nothing
inMem = emptyInMemoryVectorStore ollamaEmb

-- Add documents
docs = [Document "Hello World" mempty, Document "Haskell is functional" mempty]
updatedStore <- addDocuments inMem docs

-- Perform similarity search
results <- similaritySearch updatedStore "functional programming" 1
-- Right [Document "Haskell is functional"...]
@
-}
module Langchain.VectorStore.InMemory
  ( InMemory (..)
  , fromDocuments
  , emptyInMemoryVectorStore
  , norm
  , dotProduct
  , cosineSimilarity
  ) where

import Data.Int (Int64)
import Data.List (sortBy)
import qualified Data.Map.Strict as Map
import Data.Ord (comparing)
import Langchain.DocumentLoader.Core (Document)
import Langchain.Embeddings.Core
import Langchain.VectorStore.Core

{- | Compute dot product of two vectors
Example:

>>> dotProduct [1,2,3] [4,5,6]
32.0
-}
dotProduct :: [Float] -> [Float] -> Float
dotProduct :: [Float] -> [Float] -> Float
dotProduct [Float]
a [Float]
b = [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Float] -> Float) -> [Float] -> Float
forall a b. (a -> b) -> a -> b
$ (Float -> Float -> Float) -> [Float] -> [Float] -> [Float]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Float -> Float -> Float
forall a. Num a => a -> a -> a
(*) [Float]
a [Float]
b

{- | Calculate Euclidean norm of a vector
Example:

>>> norm [3,4]
5.0
-}
norm :: [Float] -> Float
norm :: [Float] -> Float
norm [Float]
a = Float -> Float
forall a. Floating a => a -> a
sqrt (Float -> Float) -> Float -> Float
forall a b. (a -> b) -> a -> b
$ [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Float] -> Float) -> [Float] -> Float
forall a b. (a -> b) -> a -> b
$ (Float -> Float) -> [Float] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (Float -> Int -> Float
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int)) [Float]
a

{- | Calculate cosine similarity between vectors
Example:

>>> cosineSimilarity [1,2] [2,4]
1.0
-}
cosineSimilarity :: [Float] -> [Float] -> Float
cosineSimilarity :: [Float] -> [Float] -> Float
cosineSimilarity [Float]
a [Float]
b = [Float] -> [Float] -> Float
dotProduct [Float]
a [Float]
b Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ ([Float] -> Float
norm [Float]
a Float -> Float -> Float
forall a. Num a => a -> a -> a
* [Float] -> Float
norm [Float]
b)

{- | Create empty in-memory store with embedding model
Example:

>>> emptyInMemoryVectorStore ollamaEmb
InMemory {_embeddingModel = ..., _store = empty}
-}
emptyInMemoryVectorStore :: Embeddings m => m -> InMemory m
emptyInMemoryVectorStore :: forall m. Embeddings m => m -> InMemory m
emptyInMemoryVectorStore m
model = m -> Map Int64 (Document, [Float]) -> InMemory m
forall m.
Embeddings m =>
m -> Map Int64 (Document, [Float]) -> InMemory m
InMemory m
model Map Int64 (Document, [Float])
forall k a. Map k a
Map.empty

{- | Initialize store from documents using embeddings
Example:

>>> fromDocuments ollamaEmb [Document "Test" mempty]
Right (InMemory {_store = ...})
-}
fromDocuments :: Embeddings m => m -> [Document] -> IO (Either String (InMemory m))
fromDocuments :: forall m.
Embeddings m =>
m -> [Document] -> IO (Either String (InMemory m))
fromDocuments m
model [Document]
docs = do
  let vs :: InMemory m
vs = m -> InMemory m
forall m. Embeddings m => m -> InMemory m
emptyInMemoryVectorStore m
model
  InMemory m -> [Document] -> IO (Either String (InMemory m))
forall m. VectorStore m => m -> [Document] -> IO (Either String m)
addDocuments InMemory m
vs [Document]
docs

{- | In-memory vector store implementation
Stores documents with:

- Embedding model reference
- Map of document IDs to (Document, embedding) pairs
-}
data Embeddings m => InMemory m = InMemory
  { forall m. Embeddings m => InMemory m -> m
embeddingModel :: m
  , forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store :: Map.Map Int64 (Document, [Float])
  }
  deriving (Int -> InMemory m -> ShowS
[InMemory m] -> ShowS
InMemory m -> String
(Int -> InMemory m -> ShowS)
-> (InMemory m -> String)
-> ([InMemory m] -> ShowS)
-> Show (InMemory m)
forall m. (Embeddings m, Show m) => Int -> InMemory m -> ShowS
forall m. (Embeddings m, Show m) => [InMemory m] -> ShowS
forall m. (Embeddings m, Show m) => InMemory m -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall m. (Embeddings m, Show m) => Int -> InMemory m -> ShowS
showsPrec :: Int -> InMemory m -> ShowS
$cshow :: forall m. (Embeddings m, Show m) => InMemory m -> String
show :: InMemory m -> String
$cshowList :: forall m. (Embeddings m, Show m) => [InMemory m] -> ShowS
showList :: [InMemory m] -> ShowS
Show, InMemory m -> InMemory m -> Bool
(InMemory m -> InMemory m -> Bool)
-> (InMemory m -> InMemory m -> Bool) -> Eq (InMemory m)
forall m. (Embeddings m, Eq m) => InMemory m -> InMemory m -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall m. (Embeddings m, Eq m) => InMemory m -> InMemory m -> Bool
== :: InMemory m -> InMemory m -> Bool
$c/= :: forall m. (Embeddings m, Eq m) => InMemory m -> InMemory m -> Bool
/= :: InMemory m -> InMemory m -> Bool
Eq)

instance Embeddings m => VectorStore (InMemory m) where
  -- \| Add documents with generated embeddings
  --  Example:
  --
  --  >>> addDocuments inMem [doc1, doc2]
  --  Right (InMemory {_store = ...})
  --
  addDocuments :: InMemory m -> [Document] -> IO (Either String (InMemory m))
addDocuments InMemory m
inMem [Document]
docs = do
    Either String [[Float]]
eRes <- m -> [Document] -> IO (Either String [[Float]])
forall m.
Embeddings m =>
m -> [Document] -> IO (Either String [[Float]])
embedDocuments (InMemory m -> m
forall m. Embeddings m => InMemory m -> m
embeddingModel InMemory m
inMem) [Document]
docs
    case Either String [[Float]]
eRes of
      Left String
err -> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String (InMemory m) -> IO (Either String (InMemory m)))
-> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a b. (a -> b) -> a -> b
$ String -> Either String (InMemory m)
forall a b. a -> Either a b
Left String
err
      Right [[Float]]
floats -> do
        let currStore :: Map Int64 (Document, [Float])
currStore = InMemory m -> Map Int64 (Document, [Float])
forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store InMemory m
inMem
            mbMaxKey :: Maybe (Int64, (Document, [Float]))
mbMaxKey = (Map Int64 (Document, [Float]) -> Maybe (Int64, (Document, [Float]))
forall k a. Map k a -> Maybe (k, a)
Map.lookupMax Map Int64 (Document, [Float])
currStore)
            newStore :: Map Int64 (Document, [Float])
newStore = [(Int64, (Document, [Float]))] -> Map Int64 (Document, [Float])
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(Int64, (Document, [Float]))] -> Map Int64 (Document, [Float]))
-> [(Int64, (Document, [Float]))] -> Map Int64 (Document, [Float])
forall a b. (a -> b) -> a -> b
$ [Int64] -> [(Document, [Float])] -> [(Int64, (Document, [Float]))]
forall a b. [a] -> [b] -> [(a, b)]
zip [(Int64
-> ((Int64, (Document, [Float])) -> Int64)
-> Maybe (Int64, (Document, [Float]))
-> Int64
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int64
1 (\(Int64, (Document, [Float]))
x -> (Int64, (Document, [Float])) -> Int64
forall a b. (a, b) -> a
fst (Int64, (Document, [Float]))
x Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int64
1) Maybe (Int64, (Document, [Float]))
mbMaxKey) ..] ([Document] -> [[Float]] -> [(Document, [Float])]
forall a b. [a] -> [b] -> [(a, b)]
zip [Document]
docs [[Float]]
floats)
            newInMem :: InMemory m
newInMem = InMemory m
inMem {store = Map.union newStore currStore}
        Either String (InMemory m) -> IO (Either String (InMemory m))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String (InMemory m) -> IO (Either String (InMemory m)))
-> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a b. (a -> b) -> a -> b
$ InMemory m -> Either String (InMemory m)
forall a b. b -> Either a b
Right InMemory m
newInMem

  -- \| Delete documents by ID
  --  Example:
  --
  --  >>> delete inMem [1, 2]
  --  Right (InMemory {_store = ...})
  --
  delete :: InMemory m -> [Int64] -> IO (Either String (InMemory m))
delete InMemory m
inMem [Int64]
ids = do
    let currStore :: Map Int64 (Document, [Float])
currStore = InMemory m -> Map Int64 (Document, [Float])
forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store InMemory m
inMem
        newStore :: Map Int64 (Document, [Float])
newStore = (Map Int64 (Document, [Float])
 -> Int64 -> Map Int64 (Document, [Float]))
-> Map Int64 (Document, [Float])
-> [Int64]
-> Map Int64 (Document, [Float])
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\Map Int64 (Document, [Float])
acc Int64
i -> Int64
-> Map Int64 (Document, [Float]) -> Map Int64 (Document, [Float])
forall k a. Ord k => k -> Map k a -> Map k a
Map.delete Int64
i Map Int64 (Document, [Float])
acc) Map Int64 (Document, [Float])
currStore [Int64]
ids
        newInMem :: InMemory m
newInMem = InMemory m
inMem {store = newStore}
    Either String (InMemory m) -> IO (Either String (InMemory m))
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either String (InMemory m) -> IO (Either String (InMemory m)))
-> Either String (InMemory m) -> IO (Either String (InMemory m))
forall a b. (a -> b) -> a -> b
$ InMemory m -> Either String (InMemory m)
forall a b. b -> Either a b
Right InMemory m
newInMem

  -- \| Text-based similarity search
  --  Example:
  --
  --  >>> similaritySearch inMem "Haskell" 2
  --  Right [Document "Haskell is...", Document "Functional programming..."]
  --
  similaritySearch :: InMemory m -> Text -> Int -> IO (Either String [Document])
similaritySearch InMemory m
vs Text
query Int
k = do
    Either String [Float]
eQueryEmbedding <- m -> Text -> IO (Either String [Float])
forall m. Embeddings m => m -> Text -> IO (Either String [Float])
embedQuery (InMemory m -> m
forall m. Embeddings m => InMemory m -> m
embeddingModel InMemory m
vs) Text
query
    case Either String [Float]
eQueryEmbedding of
      Left String
err -> Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ String -> Either String [Document]
forall a b. a -> Either a b
Left String
err
      Right [Float]
queryVec -> InMemory m -> [Float] -> Int -> IO (Either String [Document])
forall m.
VectorStore m =>
m -> [Float] -> Int -> IO (Either String [Document])
similaritySearchByVector InMemory m
vs [Float]
queryVec Int
k

  -- \| Vector-based similarity search
  --  Uses cosine similarity for ranking
  --
  --  Example:
  --
  --  >>> similaritySearchByVector inMem [0.1, 0.3, ...] 3
  --  Right [mostRelevantDoc, ...]
  --
  similaritySearchByVector :: InMemory m -> [Float] -> Int -> IO (Either String [Document])
similaritySearchByVector InMemory m
vs [Float]
queryVec Int
k = do
    let similarities :: [(Document, Float)]
similarities =
          ((Document, [Float]) -> (Document, Float))
-> [(Document, [Float])] -> [(Document, Float)]
forall a b. (a -> b) -> [a] -> [b]
map
            (\(Document
doc, [Float]
vec) -> (Document
doc, [Float] -> [Float] -> Float
cosineSimilarity [Float]
queryVec [Float]
vec))
            (((Int64, (Document, [Float])) -> (Document, [Float]))
-> [(Int64, (Document, [Float]))] -> [(Document, [Float])]
forall a b. (a -> b) -> [a] -> [b]
map (Int64, (Document, [Float])) -> (Document, [Float])
forall a b. (a, b) -> b
snd ([(Int64, (Document, [Float]))] -> [(Document, [Float])])
-> [(Int64, (Document, [Float]))] -> [(Document, [Float])]
forall a b. (a -> b) -> a -> b
$ Map Int64 (Document, [Float]) -> [(Int64, (Document, [Float]))]
forall k a. Map k a -> [(k, a)]
Map.toList (Map Int64 (Document, [Float]) -> [(Int64, (Document, [Float]))])
-> Map Int64 (Document, [Float]) -> [(Int64, (Document, [Float]))]
forall a b. (a -> b) -> a -> b
$ InMemory m -> Map Int64 (Document, [Float])
forall m.
Embeddings m =>
InMemory m -> Map Int64 (Document, [Float])
store InMemory m
vs)
        sorted :: [(Document, Float)]
sorted = ((Document, Float) -> (Document, Float) -> Ordering)
-> [(Document, Float)] -> [(Document, Float)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Document, Float) -> Float)
-> (Document, Float) -> (Document, Float) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Float -> Float
forall a. Num a => a -> a
negate (Float -> Float)
-> ((Document, Float) -> Float) -> (Document, Float) -> Float
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Document, Float) -> Float
forall a b. (a, b) -> b
snd)) [(Document, Float)]
similarities -- Sort in descending order
        topK :: [(Document, Float)]
topK = Int -> [(Document, Float)] -> [(Document, Float)]
forall a. Int -> [a] -> [a]
take Int
k [(Document, Float)]
sorted
    Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ [Document] -> Either String [Document]
forall a b. b -> Either a b
Right ([Document] -> Either String [Document])
-> [Document] -> Either String [Document]
forall a b. (a -> b) -> a -> b
$ ((Document, Float) -> Document)
-> [(Document, Float)] -> [Document]
forall a b. (a -> b) -> [a] -> [b]
map (Document, Float) -> Document
forall a b. (a, b) -> a
fst [(Document, Float)]
topK

{-
ghci> let x = OllamaEmbeddings "nomic-embed-text:latest" Nothing Nothing
ghci> let inMem = emptyInMemoryVectorStore x
ghci> eRes <- addDocuments inMem [Document "Hello World" empty, Document "Nice to meet you" empty]
ghci> let newInMem = fromRight inMem eRes
ghci> similaritySearch newInMem "World" 1
Right [Document {pageContent = "Hello World", metadata = fromList []}]
ghci> similaritySearch newInMem "Meet you" 1
Right [Document {pageContent = "Nice to meet you", metadata = fromList []}]
-}

{- $examples
Test case patterns:
1. Document addition
   >>> addDocuments inMem [Document "Test" mempty]
   Right (InMemory {_store = ...})

2. Similarity search
   >>> similaritySearch inMem "World" 1
   Right [Document "Hello World"...]

3. Vector-based search
   >>> similaritySearchByVector inMem [0.5, 0.5] 1
   Right [mostSimilarDoc]
-}