llama-cpp-hs

Copyright	(c) 2025 Tushar Adhatrao
License	MIT
Maintainer	Tushar Adhatrao <tusharadhatrao@gmail.com>
Safe Haskell	None
Language	Haskell2010

Llama.Decode

Description

Synopsis

batchInit :: Int -> Int -> Int -> IO Batch
batchGetOne :: [LlamaToken] -> IO Batch
freeBatch :: Ptr LlamaBatch -> IO ()
encodeBatch :: Context -> Batch -> IO (Either String ())
decodeBatch :: Context -> Batch -> IO (Either String ())
setThreadCount :: Context -> Int -> IO ()
getThreadCount :: Context -> IO Int
getBatchThreadCount :: Context -> IO Int
setEmbeddingsEnabled :: Context -> Bool -> IO ()
areEmbeddingsEnabled :: Context -> IO Bool
setCausalAttention :: Context -> Bool -> IO ()
setThreadCounts :: Context -> Int -> Int -> IO ()
setWarmupMode :: Context -> Bool -> IO ()
synchronizeContext :: Context -> IO ()

Documentation

batchInit :: Int -> Int -> Int -> IO Batch Source #

batchGetOne :: [LlamaToken] -> IO Batch Source #

Create a batch from a list of tokens.

freeBatch :: Ptr LlamaBatch -> IO () Source #

Free a batch of tokens allocated with initBatch

encodeBatch :: Context -> Batch -> IO (Either String ()) Source #

Encode tokens using the model context.

decodeBatch :: Context -> Batch -> IO (Either String ()) Source #

Decode tokens using the model context.

setThreadCount :: Context -> Int -> IO () Source #

Set number of threads used for processing.

getThreadCount :: Context -> IO Int Source #

Get current main thread count.

getBatchThreadCount :: Context -> IO Int Source #

Get current batch thread count.

setEmbeddingsEnabled :: Context -> Bool -> IO () Source #

Enable or disable embeddings output.

areEmbeddingsEnabled :: Context -> IO Bool Source #

Check if embeddings are enabled.

setCausalAttention :: Context -> Bool -> IO () Source #

Set causal attention mode.

setThreadCounts :: Context -> Int -> Int -> IO () Source #

Set main and batch thread counts separately.

setWarmupMode :: Context -> Bool -> IO () Source #

Set warmup mode (e.g. precompute KV cache).

synchronizeContext :: Context -> IO () Source #

Block until all async work is complete.