diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 6c442d0..1911b26 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -405,7 +405,7 @@ class llama_model_params(Structure): # // Keep the booleans together to avoid misalignment during copy-by-value. # bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true) -# bool logits_all; // the llama_eval() call computes all logits, not just the last one +# bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) # bool embedding; // embedding mode only # bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU # }; @@ -430,7 +430,7 @@ class llama_context_params(Structure): type_v (int): data type for V cache mul_mat_q (bool): if true, use experimental mul_mat_q kernels (DEPRECATED - always true) f16_kv (bool): use fp16 for KV cache, fp32 otherwise - logits_all (bool): the llama_eval() call computes all logits, not just the last one + logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) embedding (bool): embedding mode only""" _fields_ = [ ("seed", c_uint32), diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 8a7b2fa..948ff13 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 8a7b2fa528f130631a5f43648481596ab320ed5a +Subproject commit 948ff137ec37f1ec74c02905917fa0afc9b97514