From 48cf43b4270a95ac875fc2ffc24bb28196ac3014 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 25 Aug 2023 13:43:16 -0400 Subject: [PATCH] Use _with_model variants for tokenization --- llama_cpp/llama.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index b8f76e9..fd3e2c4 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -408,11 +408,11 @@ class Llama: Returns: A list of tokens. """ - assert self.ctx is not None + assert self.model is not None n_ctx = self._n_ctx tokens = (llama_cpp.llama_token * n_ctx)() - n_tokens = llama_cpp.llama_tokenize( - self.ctx, + n_tokens = llama_cpp.llama_tokenize_with_model( + self.model, text, tokens, llama_cpp.c_int(n_ctx), @@ -421,8 +421,8 @@ class Llama: if n_tokens < 0: n_tokens = abs(n_tokens) tokens = (llama_cpp.llama_token * n_tokens)() - n_tokens = llama_cpp.llama_tokenize( - self.ctx, + n_tokens = llama_cpp.llama_tokenize_with_model( + self.model, text, tokens, llama_cpp.c_int(n_tokens), @@ -443,15 +443,15 @@ class Llama: Returns: The detokenized string. """ - assert self.ctx is not None + assert self.model is not None output = b"" - buffer_size = 8 - buffer = (ctypes.c_char * buffer_size)() + size = 8 + buffer = (ctypes.c_char * size)() for token in tokens: - n = llama_cpp.llama_token_to_str( - self.ctx, llama_cpp.llama_token(token), buffer, buffer_size + n = llama_cpp.llama_token_to_str_with_model( + self.model, llama_cpp.llama_token(token), buffer, size ) - assert n <= buffer_size + assert n <= size output += bytes(buffer[:n]) # NOTE: Llama1 models automatically added a space at the start of the prompt # this line removes a leading space if the first token is a beginning of sentence token