llama-cpp-hs
Copyright(c) 2025 Tushar Adhatrao
LicenseMIT
MaintainerTushar Adhatrao <tusharadhatrao@gmail.com>
Safe HaskellNone
LanguageHaskell2010

Llama.Decode

Description

 
Synopsis

Documentation

batchGetOne :: [LlamaToken] -> IO Batch Source #

Create a batch from a list of tokens.

freeBatch :: Ptr LlamaBatch -> IO () Source #

Free a batch of tokens allocated with initBatch

encodeBatch :: Context -> Batch -> IO (Either String ()) Source #

Encode tokens using the model context.

decodeBatch :: Context -> Batch -> IO (Either String ()) Source #

Decode tokens using the model context.

setThreadCount :: Context -> Int -> IO () Source #

Set number of threads used for processing.

getThreadCount :: Context -> IO Int Source #

Get current main thread count.

getBatchThreadCount :: Context -> IO Int Source #

Get current batch thread count.

setEmbeddingsEnabled :: Context -> Bool -> IO () Source #

Enable or disable embeddings output.

areEmbeddingsEnabled :: Context -> IO Bool Source #

Check if embeddings are enabled.

setCausalAttention :: Context -> Bool -> IO () Source #

Set causal attention mode.

setThreadCounts :: Context -> Int -> Int -> IO () Source #

Set main and batch thread counts separately.

setWarmupMode :: Context -> Bool -> IO () Source #

Set warmup mode (e.g. precompute KV cache).

synchronizeContext :: Context -> IO () Source #

Block until all async work is complete.