Copyright | (c) 2025 Tushar Adhatrao |
---|---|
License | MIT |
Maintainer | Tushar Adhatrao <tusharadhatrao@gmail.com> |
Safe Haskell | None |
Language | Haskell2010 |
Llama.Decode
Description
Synopsis
- batchInit :: Int -> Int -> Int -> IO Batch
- batchGetOne :: [LlamaToken] -> IO Batch
- freeBatch :: Ptr LlamaBatch -> IO ()
- encodeBatch :: Context -> Batch -> IO (Either String ())
- decodeBatch :: Context -> Batch -> IO (Either String ())
- setThreadCount :: Context -> Int -> IO ()
- getThreadCount :: Context -> IO Int
- getBatchThreadCount :: Context -> IO Int
- setEmbeddingsEnabled :: Context -> Bool -> IO ()
- areEmbeddingsEnabled :: Context -> IO Bool
- setCausalAttention :: Context -> Bool -> IO ()
- setThreadCounts :: Context -> Int -> Int -> IO ()
- setWarmupMode :: Context -> Bool -> IO ()
- synchronizeContext :: Context -> IO ()
Documentation
batchGetOne :: [LlamaToken] -> IO Batch Source #
Create a batch from a list of tokens.
encodeBatch :: Context -> Batch -> IO (Either String ()) Source #
Encode tokens using the model context.
decodeBatch :: Context -> Batch -> IO (Either String ()) Source #
Decode tokens using the model context.
setThreadCounts :: Context -> Int -> Int -> IO () Source #
Set main and batch thread counts separately.
synchronizeContext :: Context -> IO () Source #
Block until all async work is complete.