Index
abort_callback | Llama.Internal.Types.Params |
abort_callback_data | Llama.Internal.Types.Params |
accept | Llama.Internal.Types |
acceptTokenWithSampler | Llama.Sampler |
AdapterLora | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
AddBos | Llama.Internal.Types |
addSamplerToChain | Llama.Sampler |
allowRequantize | Llama.Internal.Types.Params |
Always | Llama.Internal.Types |
apply | Llama.Internal.Types |
applyAdapterCVec | Llama.Adapter |
applySampler | Llama.Sampler |
areEmbeddingsEnabled | Llama.Decode |
attention_type | Llama.Internal.Types.Params |
Batch | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
batchGetOne | Llama.Decode |
batchInit | Llama.Decode |
bias | Llama.Internal.Types |
cb_eval | Llama.Internal.Types.Params |
cb_eval_user_data | Llama.Internal.Types.Params |
cells | Llama.Internal.Types |
cells_sequences | Llama.Internal.Types |
chatApplyTemplate | Llama.ChatTemplate |
chatContent | Llama.ChatTemplate |
chatGetBuiltinTemplates | Llama.ChatTemplate |
ChatMessage | |
1 (Type/Class) | Llama.ChatTemplate |
2 (Data Constructor) | Llama.ChatTemplate |
chatRole | Llama.ChatTemplate |
checkTensors | Llama.Internal.Types.Params |
clearAdapterLora | Llama.Adapter |
CLlamaAdapterLora | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
CLlamaContext | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
CLlamaContextParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
CLlamaKVCache | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
CLlamaModel | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
CLlamaModelParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
CLlamaModelQuantizeParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
CLlamaSamplerChainParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
CLlamaVocab | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
clone | Llama.Internal.Types |
cloneSampler | Llama.Sampler |
content | Llama.Internal.Types |
Context | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
ContextParamsPtr | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
ctx | Llama.Internal.Types |
c_llama_adapter_lora_free | Llama.Internal.Foreign.Adapter, Llama.Internal.Foreign |
c_llama_adapter_lora_init | Llama.Internal.Foreign.Adapter, Llama.Internal.Foreign |
c_llama_apply_adapter_cvec | Llama.Internal.Foreign.Adapter, Llama.Internal.Foreign |
c_llama_backend_free | Llama.Internal.Foreign.Backend, Llama.Internal.Foreign |
c_llama_backend_init | Llama.Internal.Foreign.Backend, Llama.Internal.Foreign |
c_llama_batch_free_wrap | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_batch_get_one_into | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_batch_init_into | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_chat_apply_template | Llama.Internal.Foreign.ChatTemplate, Llama.Internal.Foreign |
c_llama_chat_builtin_templates | Llama.Internal.Foreign.ChatTemplate, Llama.Internal.Foreign |
c_llama_clear_adapter_lora | Llama.Internal.Foreign.Adapter, Llama.Internal.Foreign |
c_llama_context_default_params_into | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
c_llama_decode_wrap | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_detach_threadpool | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
c_llama_detokenize | Llama.Internal.Foreign.Tokenize, Llama.Internal.Foreign |
c_llama_encode | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_free | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_get_embeddings | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_get_embeddings_ith | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_get_embeddings_seq | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_get_kv_self | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
c_llama_get_logits | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_get_logits_ith | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_get_model | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_init_from_model_wrap | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_kv_cache_view_free | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_cache_view_init_into | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_cache_view_update | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_can_shift | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_clear | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_defrag | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_n_tokens | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_seq_add | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_seq_cp | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_seq_div | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_seq_keep | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_seq_pos_max | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_seq_rm | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_update | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_kv_self_used_cells | Llama.Internal.Foreign.KVCache, Llama.Internal.Foreign |
c_llama_log_set | Llama.Internal.Foreign.Split, Llama.Internal.Foreign |
c_llama_model_chat_template | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_decoder_start_token | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_default_params | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_desc | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_free | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_get_vocab | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_has_decoder | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_has_encoder | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_is_recurrent | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_load_from_file_wrap | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_load_from_splits | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_meta_count | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_meta_key_by_index | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_meta_val_str | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_meta_val_str_by_index | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_n_ctx_train | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_n_embd | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_n_head | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_n_head_kv | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_n_layer | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_n_params | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_quantize | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_quantize_default_params | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_rope_freq_scale_train | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_rope_type_into | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_model_size | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
c_llama_n_threads | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_n_threads_batch | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_n_vocab | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_perf_context | Llama.Internal.Foreign.Performance, Llama.Internal.Foreign |
c_llama_perf_context_print | Llama.Internal.Foreign.Performance, Llama.Internal.Foreign |
c_llama_perf_context_reset | Llama.Internal.Foreign.Performance, Llama.Internal.Foreign |
c_llama_perf_sampler | Llama.Internal.Foreign.Performance, Llama.Internal.Foreign |
c_llama_perf_sampler_print | Llama.Internal.Foreign.Performance, Llama.Internal.Foreign |
c_llama_perf_sampler_reset | Llama.Internal.Foreign.Performance, Llama.Internal.Foreign |
c_llama_pooling_type_into | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
c_llama_print_system_info | Llama.Internal.Foreign.Split, Llama.Internal.Foreign |
c_llama_rm_adapter_lora | Llama.Internal.Foreign.Adapter, Llama.Internal.Foreign |
c_llama_sampler_accept | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_apply | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_chain_add | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_chain_default_params_into | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_chain_get | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_chain_init | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_chain_n | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_chain_remove | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_clone | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_free | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_get_seed | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_dist | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_dry | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_grammar | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_grammar_lazy_patterns | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_greedy | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_infill | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_logit_bias | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_min_p | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_mirostat | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_mirostat_v2 | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_penalties | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_temp | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_temp_ext | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_top_k | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_top_n_sigma | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_top_p | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_typical | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_init_xtc | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_name | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_reset | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_sampler_sample | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
c_llama_set_abort_callback | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_set_adapter_lora | Llama.Internal.Foreign.Adapter, Llama.Internal.Foreign |
c_llama_set_causal_attn | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_set_embeddings | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_set_n_threads | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_set_warmup | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_split_path | Llama.Internal.Foreign.Split, Llama.Internal.Foreign |
c_llama_split_prefix | Llama.Internal.Foreign.Split, Llama.Internal.Foreign |
c_llama_state_get_data | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_get_size | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_load_file | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_save_file | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_seq_get_data | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_seq_get_size | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_seq_load_file | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_seq_save_file | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_seq_set_data | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_state_set_data | Llama.Internal.Foreign.State, Llama.Internal.Foreign |
c_llama_synchronize | Llama.Internal.Foreign.Decode, Llama.Internal.Foreign |
c_llama_tokenize | Llama.Internal.Foreign.Tokenize, Llama.Internal.Foreign |
c_llama_token_to_piece | Llama.Internal.Foreign.Tokenize, Llama.Internal.Foreign |
c_llama_vocab_bos | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_eos | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_eot | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_fim_mid | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_fim_pad | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_fim_pre | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_fim_rep | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_fim_sep | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_fim_suf | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_get_add_bos | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_get_add_eos | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_get_attr | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_get_score | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_get_text | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_is_control | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_is_eog | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_nl | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_n_tokens | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_pad | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_sep | Llama.Internal.Foreign.Vocab, Llama.Internal.Foreign |
c_llama_vocab_type | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
data_ | Llama.Internal.Types |
decodeBatch | Llama.Decode |
defaultContextParams | Llama.Context |
defaultModelParams | Llama.Model |
defaultQuantizeParams | Llama.Model |
defaultSamplerChainParams | Llama.Sampler |
defrag_thold | Llama.Internal.Types.Params |
detachThreadPool | Llama.Context |
detokenize | Llama.Tokenize |
devices | Llama.Internal.Types.Params |
embd | Llama.Internal.Types |
embeddings | Llama.Internal.Types.Params |
encodeBatch | Llama.Decode |
flash_attn | Llama.Internal.Types.Params |
freeBatch | Llama.Decode |
free_ | Llama.Internal.Types |
fromLlamaRopePoolingType | Llama.Internal.Types.Params |
fromLlamaRopeTypeScaling | Llama.Internal.Types.Params |
fromLlamaRopeVocabType | Llama.Internal.Types.Params |
ftype | Llama.Internal.Types.Params |
getBatchSize | Llama.Context |
getBatchThreadCount | Llama.Decode |
getContextModel | Llama.Model |
getContextPerformance | Llama.Performance |
getContextSize | Llama.Context |
getMaxDevices | Llama.Context |
getMaxSeqCount | Llama.Context |
getModelChatTemplate | Llama.Model |
getModelDecoderStartToken | Llama.Model |
getModelDescription | Llama.Model |
getModelEmbeddingDim | Llama.Model |
getModelHasDecoder | Llama.Model |
getModelHasEncoder | Llama.Model |
getModelIsRecurrent | Llama.Model |
getModelMetaCount | Llama.Model |
getModelMetaKeyByIndex | Llama.Model |
getModelMetaValue | Llama.Model |
getModelMetaValueByIndex | Llama.Model |
getModelNumHeads | Llama.Model |
getModelNumKVHeads | Llama.Model |
getModelNumLayers | Llama.Model |
getModelNumParams | Llama.Model |
getModelRoPEFreqScale | Llama.Model |
getModelRopeType | Llama.Model |
getModelSize | Llama.Model |
getModelTrainingContextSize | Llama.Model |
getModelVocab | Llama.Model |
getPoolingType | Llama.Context |
getSamplerChainLength | Llama.Sampler |
getSamplerFromChain | Llama.Sampler |
getSamplerName | Llama.Sampler |
getSamplerPerformance | Llama.Performance |
getSamplerSeed | Llama.Sampler |
getSequenceStateSize | Llama.State |
getStateData | Llama.State |
getStateSize | Llama.State |
getThreadCount | Llama.Decode |
getTimeUs | Llama.Context |
getUnbatchedSize | Llama.Context |
getVocabAddBOSToken | Llama.Vocab |
getVocabAddEOSToken | Llama.Vocab |
getVocabBosToken | Llama.Vocab |
getVocabEosToken | Llama.Vocab |
getVocabEotToken | Llama.Vocab |
getVocabFIMMiddleToken | Llama.Vocab |
getVocabFIMPADToken | Llama.Vocab |
getVocabFIMPrefixToken | Llama.Vocab |
getVocabFIMSeparatorToken | Llama.Vocab |
getVocabFIMSuffixToken | Llama.Vocab |
getVocabNlToken | Llama.Vocab |
getVocabPadToken | Llama.Vocab |
getVocabSepToken | Llama.Vocab |
getVocabSize | Llama.Vocab |
getVocabTokenAttr | Llama.Vocab |
getVocabTokenCount | Llama.Vocab |
getVocabTokenScore | Llama.Vocab |
getVocabTokenText | Llama.Vocab |
getVocabType | Llama.Model |
GgmlType | Llama.Internal.Types.Params |
GGML_TYPE_BF16 | Llama.Internal.Types.Params |
GGML_TYPE_COUNT | Llama.Internal.Types.Params |
GGML_TYPE_F16 | Llama.Internal.Types.Params |
GGML_TYPE_F32 | Llama.Internal.Types.Params |
GGML_TYPE_F64 | Llama.Internal.Types.Params |
GGML_TYPE_I16 | Llama.Internal.Types.Params |
GGML_TYPE_I32 | Llama.Internal.Types.Params |
GGML_TYPE_I64 | Llama.Internal.Types.Params |
GGML_TYPE_I8 | Llama.Internal.Types.Params |
GGML_TYPE_IQ1_M | Llama.Internal.Types.Params |
GGML_TYPE_IQ1_S | Llama.Internal.Types.Params |
GGML_TYPE_IQ2_S | Llama.Internal.Types.Params |
GGML_TYPE_IQ2_XS | Llama.Internal.Types.Params |
GGML_TYPE_IQ2_XXS | Llama.Internal.Types.Params |
GGML_TYPE_IQ3_S | Llama.Internal.Types.Params |
GGML_TYPE_IQ3_XXS | Llama.Internal.Types.Params |
GGML_TYPE_IQ4_NL | Llama.Internal.Types.Params |
GGML_TYPE_IQ4_XS | Llama.Internal.Types.Params |
GGML_TYPE_Q2_K | Llama.Internal.Types.Params |
GGML_TYPE_Q3_K | Llama.Internal.Types.Params |
GGML_TYPE_Q4_0 | Llama.Internal.Types.Params |
GGML_TYPE_Q4_1 | Llama.Internal.Types.Params |
GGML_TYPE_Q4_K | Llama.Internal.Types.Params |
GGML_TYPE_Q5_0 | Llama.Internal.Types.Params |
GGML_TYPE_Q5_1 | Llama.Internal.Types.Params |
GGML_TYPE_Q5_K | Llama.Internal.Types.Params |
GGML_TYPE_Q6_K | Llama.Internal.Types.Params |
GGML_TYPE_Q8_0 | Llama.Internal.Types.Params |
GGML_TYPE_Q8_1 | Llama.Internal.Types.Params |
GGML_TYPE_Q8_K | Llama.Internal.Types.Params |
GGML_TYPE_TQ1_0 | Llama.Internal.Types.Params |
GGML_TYPE_TQ2_0 | Llama.Internal.Types.Params |
id | Llama.Internal.Types |
iface | Llama.Internal.Types |
imatrix | Llama.Internal.Types.Params |
initAdapterLora | Llama.Adapter |
initContextFromModel | Llama.Model |
initDistributedSampler | Llama.Sampler |
initDrySampler | Llama.Sampler |
initGrammarLazyPatternsSampler | Llama.Sampler |
initGrammarSampler | Llama.Sampler |
initGreedySampler | Llama.Sampler |
initInfillSampler | Llama.Sampler |
initLogitBiasSampler | Llama.Sampler |
initMinPSampler | Llama.Sampler |
initMirostatSampler | Llama.Sampler |
initMirostatV2Sampler | Llama.Sampler |
initPenaltiesSampler | Llama.Sampler |
initSampler | Llama.Sampler |
initSamplerChain | Llama.Sampler |
initTempExtSampler | Llama.Sampler |
initTempSampler | Llama.Sampler |
initTopKSampler | Llama.Sampler |
initTopNSigmaSampler | Llama.Sampler |
initTopPSampler | Llama.Sampler |
initTypicalSampler | Llama.Sampler |
initXTCSampler | Llama.Sampler |
isVocabTokenControl | Llama.Vocab |
isVocabTokenEog | Llama.Vocab |
keepSplit | Llama.Internal.Types.Params |
KVCache | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
kvCacheViewInit | Llama.KVCache |
kvCacheViewUpdate | Llama.KVCache |
kvOverrides | Llama.Internal.Types.Params |
kvOverridesQuantizeParams | Llama.Internal.Types.Params |
kvSelfCanShift | Llama.KVCache |
kvSelfClear | Llama.KVCache |
kvSelfDefrag | Llama.KVCache |
kvSelfNumTokens | Llama.KVCache |
kvSelfSeqAdd | Llama.KVCache |
kvSelfSeqCopy | Llama.KVCache |
kvSelfSeqDiv | Llama.KVCache |
kvSelfSeqKeep | Llama.KVCache |
kvSelfSeqPosMax | Llama.KVCache |
kvSelfSeqRemove | Llama.KVCache |
kvSelfUpdate | Llama.KVCache |
kvSelfUsedCells | Llama.KVCache |
LlamaAttentionType | Llama.Internal.Types.Params |
llamaBackendFree | Llama.Backend |
llamaBackendInit | Llama.Backend |
LlamaBatch | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaChatMessage | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaContextParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
LlamaKvCacheView | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaLogitBias | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaModelParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
LlamaModelQuantizeParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
LlamaPerfContextData | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaPerfSamplerData | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaPoolingType | Llama.Internal.Types.Params |
LlamaPos | Llama.Internal.Types |
LlamaRopeTypeScaling | Llama.Internal.Types.Params |
LlamaSampler | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaSamplerChainParams | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
LlamaSamplerContext | Llama.Internal.Types |
LlamaSamplerI | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaSeqId | Llama.Internal.Types |
LlamaSplitMode | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
LlamaToken | Llama.Internal.Types |
LlamaTokenData | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaTokenDataArray | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
LlamaVocabType | Llama.Internal.Types.Params |
LLAMA_ATTENTION_TYPE_CAUSAL | Llama.Internal.Types.Params |
LLAMA_ATTENTION_TYPE_NON_CAUSAL | Llama.Internal.Types.Params |
LLAMA_ATTENTION_TYPE_UNSPECIFIED | Llama.Internal.Types.Params |
llama_max_devices | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_n_batch | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_n_ctx | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_n_seq_max | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_n_ubatch | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
LLAMA_POOLING_TYPE_CLS | Llama.Internal.Types.Params |
LLAMA_POOLING_TYPE_LAST | Llama.Internal.Types.Params |
LLAMA_POOLING_TYPE_MEAN | Llama.Internal.Types.Params |
LLAMA_POOLING_TYPE_NONE | Llama.Internal.Types.Params |
LLAMA_POOLING_TYPE_RANK | Llama.Internal.Types.Params |
LLAMA_POOLING_TYPE_UNSPECIFIED | Llama.Internal.Types.Params |
LLAMA_ROPE_SCALING_TYPE_LINEAR | Llama.Internal.Types.Params |
LLAMA_ROPE_SCALING_TYPE_LONGROPE | Llama.Internal.Types.Params |
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE | Llama.Internal.Types.Params |
LLAMA_ROPE_SCALING_TYPE_NONE | Llama.Internal.Types.Params |
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED | Llama.Internal.Types.Params |
LLAMA_ROPE_SCALING_TYPE_YARN | Llama.Internal.Types.Params |
llama_supports_gpu_offload | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_supports_mlock | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_supports_mmap | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_supports_rpc | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
llama_time_us | Llama.Internal.Foreign.Context, Llama.Internal.Foreign |
LLAMA_VOCAB_TYPE_BPE | Llama.Internal.Types.Params |
LLAMA_VOCAB_TYPE_NONE | Llama.Internal.Types.Params |
LLAMA_VOCAB_TYPE_RWKV | Llama.Internal.Types.Params |
LLAMA_VOCAB_TYPE_SPM | Llama.Internal.Types.Params |
LLAMA_VOCAB_TYPE_UGM | Llama.Internal.Types.Params |
LLAMA_VOCAB_TYPE_WPM | Llama.Internal.Types.Params |
loadModelFromFile | Llama.Model |
loadModelFromSplits | Llama.Model |
loadSequenceStateFromFile | Llama.State |
loadStateFromFile | Llama.State |
logit | Llama.Internal.Types |
logits | Llama.Internal.Types |
logits_all | Llama.Internal.Types.Params |
mainGpu | Llama.Internal.Types.Params |
max_contiguous | Llama.Internal.Types |
max_contiguous_idx | Llama.Internal.Types |
Model | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
ModelParamsPtr | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
ModelQuantizeParamsPtr | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
name | Llama.Internal.Types |
Never | Llama.Internal.Types |
nGpuLayers | Llama.Internal.Types.Params |
noPerf | Llama.Internal.Types.Params |
no_perf | Llama.Internal.Types.Params |
nthread | Llama.Internal.Types.Params |
n_batch | Llama.Internal.Types.Params |
n_cells | Llama.Internal.Types |
n_ctx | Llama.Internal.Types.Params |
n_eval | Llama.Internal.Types |
n_p_eval | Llama.Internal.Types |
n_sample | Llama.Internal.Types |
n_seq_id | Llama.Internal.Types |
n_seq_max | |
1 (Function) | Llama.Internal.Types |
2 (Function) | Llama.Internal.Types.Params |
n_threads | Llama.Internal.Types.Params |
n_threads_batch | Llama.Internal.Types.Params |
n_tokens | Llama.Internal.Types |
n_ubatch | Llama.Internal.Types.Params |
offload_kqv | Llama.Internal.Types.Params |
onlyCopy | Llama.Internal.Types.Params |
outputTensorType | Llama.Internal.Types.Params |
p | Llama.Internal.Types |
pooling_type | Llama.Internal.Types.Params |
pos | Llama.Internal.Types |
printContextPerformance | Llama.Performance |
printSamplerPerformance | Llama.Performance |
printSystemInfo | Llama.Split |
progressCallback | Llama.Internal.Types.Params |
progressCallbackUserData | Llama.Internal.Types.Params |
pure_ | Llama.Internal.Types.Params |
p_llama_adapter_lora_free | Llama.Internal.Foreign.Adapter, Llama.Internal.Foreign |
p_llama_free | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
p_llama_model_free | Llama.Internal.Foreign.Model, Llama.Internal.Foreign |
p_llama_sampler_free | Llama.Internal.Foreign.Sampler, Llama.Internal.Foreign |
quantizeModel | Llama.Model |
quantizeModelDefault | Llama.Model |
quantizeOutputTensor | Llama.Internal.Types.Params |
removeSamplerFromChain | Llama.Sampler |
reset | Llama.Internal.Types |
resetContextPerformance | Llama.Performance |
resetSampler | Llama.Sampler |
resetSamplerPerformance | Llama.Performance |
rmAdapterLora | Llama.Adapter |
role | Llama.Internal.Types |
rope_freq_base | Llama.Internal.Types.Params |
rope_freq_scale | Llama.Internal.Types.Params |
rope_scaling_type | Llama.Internal.Types.Params |
Sampler | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
SamplerChainParamsPtr | |
1 (Type/Class) | Llama.Internal.Types.Params |
2 (Data Constructor) | Llama.Internal.Types.Params |
sampleWithSampler | Llama.Sampler |
saveSequenceStateToFile | Llama.State |
saveStateToFile | Llama.State |
selected | Llama.Internal.Types |
seq_id | Llama.Internal.Types |
setAdapterLora | Llama.Adapter |
setCausalAttention | Llama.Decode |
setEmbeddingsEnabled | Llama.Decode |
setSequenceStateData | Llama.State |
setStateData | Llama.State |
setThreadCount | Llama.Decode |
setThreadCounts | Llama.Decode |
setWarmupMode | Llama.Decode |
size_ | Llama.Internal.Types |
sorted | Llama.Internal.Types |
splitMode | Llama.Internal.Types.Params |
splitPath | Llama.Split |
splitPrefix | Llama.Split |
supportsGpuOffload | Llama.Context |
supportsMLock | Llama.Context |
supportsMMap | Llama.Context |
supportsRpc | Llama.Context |
synchronizeContext | Llama.Decode |
tensorBuftOverrides | Llama.Internal.Types.Params |
tensorSplit | Llama.Internal.Types.Params |
tensorTypes | Llama.Internal.Types.Params |
toCLlamaChatMessage | Llama.ChatTemplate |
token | Llama.Internal.Types |
tokenEmbeddingType | Llama.Internal.Types.Params |
tokenize | Llama.Tokenize |
tokenLogitBias | Llama.Internal.Types |
tokenToPiece | Llama.Tokenize |
token_count | Llama.Internal.Types |
type_k | Llama.Internal.Types.Params |
type_v | Llama.Internal.Types.Params |
t_eval_ms | Llama.Internal.Types |
t_load_ms | Llama.Internal.Types |
t_p_eval_ms | Llama.Internal.Types |
t_sample_ms | Llama.Internal.Types |
t_start_ms | Llama.Internal.Types |
used_cells | Llama.Internal.Types |
useMlock | Llama.Internal.Types.Params |
useMmap | Llama.Internal.Types.Params |
Vocab | |
1 (Type/Class) | Llama.Internal.Types |
2 (Data Constructor) | Llama.Internal.Types |
vocabOnly | Llama.Internal.Types.Params |
yarn_attn_factor | Llama.Internal.Types.Params |
yarn_beta_fast | Llama.Internal.Types.Params |
yarn_beta_slow | Llama.Internal.Types.Params |
yarn_ext_factor | Llama.Internal.Types.Params |
yarn_orig_ctx | Llama.Internal.Types.Params |