feat: Update llama.cpp

This commit is contained in:
Andrei Betlen 2024-04-21 20:46:40 -04:00
parent 893a27a736
commit 159cc4e5d9
3 changed files with 33 additions and 11 deletions

View file

@ -181,20 +181,20 @@ class _LlamaModel:
)
return list(tokens[:n_tokens])
def token_to_piece(self, token: int) -> bytes:
def token_to_piece(self, token: int, special: bool = False) -> bytes:
assert self.model is not None
buf = ctypes.create_string_buffer(32)
llama_cpp.llama_token_to_piece(self.model, token, buf, 32)
llama_cpp.llama_token_to_piece(self.model, token, buf, 32, special)
return bytes(buf)
def detokenize(self, tokens: List[int]) -> bytes:
def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
assert self.model is not None
output = b""
size = 32
buffer = (ctypes.c_char * size)()
for token in tokens:
n = llama_cpp.llama_token_to_piece(
self.model, llama_cpp.llama_token(token), buffer, size
self.model, llama_cpp.llama_token(token), buffer, size, special
)
assert n <= size
output += bytes(buffer[:n])
@ -597,13 +597,13 @@ def _tokenize(model: _LlamaModel, text: str, add_bos: bool, special: bool) -> li
return list(result)
def _token_to_piece(model: _LlamaModel, token: int) -> str:
def _token_to_piece(model: _LlamaModel, token: int, special: bool = False) -> str:
assert model.model is not None
result = (ctypes.c_char * 8)(0)
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
if n_tokens < 0:
result = (ctypes.c_char * -n_tokens)(0)
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result))
check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special)
if check != -n_tokens:
raise RuntimeError(f"Failed to get piece: token={token}")
else:

View file

@ -2380,6 +2380,18 @@ def llama_token_get_type(
...
# // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
# LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
@ctypes_function(
"llama_token_is_eog", [llama_model_p_ctypes, llama_token], ctypes.c_bool
)
def llama_token_is_eog(
model: llama_model_p, token: Union[llama_token, int], /
) -> bool:
"""Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)"""
...
# // Special tokens
@ -2434,7 +2446,7 @@ def llama_add_eos_token(model: llama_model_p, /) -> int:
...
# // codellama infill tokens
# // Codellama infill tokens
# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix
@ctypes_function("llama_token_prefix", [llama_model_p_ctypes], llama_token)
def llama_token_prefix(model: llama_model_p) -> int:
@ -2524,11 +2536,13 @@ def llama_tokenize(
# // Uses the vocabulary in the provided context.
# // Does not write null terminator to the buffer.
# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
# // @param special If true, special tokens are rendered in the output.
# LLAMA_API int32_t llama_token_to_piece(
# const struct llama_model * model,
# llama_token token,
# char * buf,
# int32_t length);
# int32_t length,
# bool special);
@ctypes_function(
"llama_token_to_piece",
[
@ -2536,6 +2550,7 @@ def llama_tokenize(
llama_token,
ctypes.c_char_p,
ctypes.c_int32,
ctypes.c_bool,
],
ctypes.c_int32,
)
@ -2544,13 +2559,20 @@ def llama_token_to_piece(
token: Union[llama_token, int],
buf: Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]],
length: Union[ctypes.c_int, int],
special: Union[ctypes.c_bool, bool],
/,
) -> int:
"""Token Id -> Piece.
Uses the vocabulary in the provided context.
Does not write null terminator to the buffer.
User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
"""
Args:
model: The model to use for tokenization.
token: The token to convert.
buf: The buffer to write the token to.
length: The length of the buffer.
special: If true, special tokens are rendered in the output."""
...

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 3b8f1ec4b18770531d0b1d792f3edf08254e4f0c
Subproject commit 5cf5e7d490dfdd2e70bface2d35dfd14aa44b4fb