From 3720c739d4bf6124ee1457bed2a93b3747221396 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 29 Sep 2023 19:58:21 -0400 Subject: [PATCH] Update llama.cpp --- llama_cpp/llama_cpp.py | 10 +++++----- vendor/llama.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 4734aec..d2a35c1 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -301,19 +301,19 @@ class llama_model_params(Structure): # struct llama_context_params { # uint32_t seed; // RNG seed, -1 for random -# uint32_t n_ctx; // text context -# uint32_t n_batch; // prompt processing batch size +# uint32_t n_ctx; // text context, 0 = from model +# uint32_t n_batch; // prompt processing maximum batch size # uint32_t n_threads; // number of threads to use for generation # uint32_t n_threads_batch; // number of threads to use for batch processing # // ref: https://github.com/ggerganov/llama.cpp/pull/2054 -# float rope_freq_base; // RoPE base frequency -# float rope_freq_scale; // RoPE frequency scaling factor +# float rope_freq_base; // RoPE base frequency, 0 = from model +# float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model # // Keep the booleans together to avoid misalignment during copy-by-value. # bool mul_mat_q; // if true, use experimental mul_mat_q kernels -# bool f16_kv; // use fp16 for KV cache +# bool f16_kv; // use fp16 for KV cache, fp32 otherwise # bool logits_all; // the llama_eval() call computes all logits, not just the last one # bool embedding; // embedding mode only # }; diff --git a/vendor/llama.cpp b/vendor/llama.cpp index bc39553..40e07a6 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit bc39553c901a91cfcb757863586250838c83eeab +Subproject commit 40e07a60f9ce06e79f3ccd4c903eba300fb31b5e