From 9e5b6d675a49a4466b1fab841baf570e5efeb549 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Wed, 3 May 2023 10:28:10 -0400 Subject: [PATCH] Improve logging messages --- llama_cpp/llama.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 1b9f9e9..fef7b3e 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -396,7 +396,7 @@ class Llama: and tuple(self.eval_tokens) == tuple(tokens[: len(self.eval_tokens)]) ): if self.verbose: - print("generate cache hit", file=sys.stderr) + print("Llama.generate: cache hit", file=sys.stderr) reset = False tokens = tokens[len(self.eval_tokens) :] @@ -518,7 +518,7 @@ class Llama: if self.cache and prompt_tokens in self.cache: if self.verbose: - print("cache hit", file=sys.stderr) + print("Llama._create_completion: cache hit", file=sys.stderr) self.load_state(self.cache[prompt_tokens]) finish_reason = "length" @@ -538,7 +538,7 @@ class Llama: if self.cache and len(completion_tokens) == 0: if prompt_tokens not in self.cache: if self.verbose: - print("cache miss", file=sys.stderr) + print("Llama._create_completion: cache miss", file=sys.stderr) self.cache[prompt_tokens] = self.save_state() completion_tokens.append(token) @@ -957,6 +957,8 @@ class Llama: raise RuntimeError("Failed to copy llama state data") llama_state_compact = (llama_cpp.c_uint8 * int(n_bytes))() llama_cpp.ctypes.memmove(llama_state_compact, llama_state, int(n_bytes)) + if self.verbose: + print(f"Llama.save_state: saving {n_bytes} bytes of llama state", file=sys.stderr) return LlamaState( eval_tokens=self.eval_tokens.copy(), eval_logits=self.eval_logits.copy(),