From ef22e478db533e89fe3d48577cf9f338b4195b70 Mon Sep 17 00:00:00 2001 From: Tanner Hobson Date: Mon, 11 Dec 2023 20:46:27 -0500 Subject: [PATCH] Replace logits_to_logprobs implementation with numpy equivalent to llama.cpp (#991) See #990. This change makes the logits_to_logprobs function equivalent to the version in the llama.cpp repository. It uses numpy so it's much faster than the previous version. --- llama_cpp/llama.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index c696804..292378d 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -2280,10 +2280,14 @@ class Llama: return self._model.token_nl() @staticmethod - def logits_to_logprobs(logits: List[float]) -> List[float]: - exps = [math.exp(float(x)) for x in logits] - sum_exps = sum(exps) - return [math.log(x / sum_exps) for x in exps] + def logits_to_logprobs(logits: npt.NDArray[np.single]) -> npt.NDArray[np.single]: + maximum = np.max(logits) + tmp = np.subtract(logits, maximum, dtype=np.single) + np.exp(tmp, out=tmp) + normalizer = 1.0 / np.sum(tmp) + np.multiply(normalizer, tmp, out=tmp) + np.log(tmp, out=tmp) + return tmp @staticmethod def longest_token_prefix(a: Sequence[int], b: Sequence[int]):