From 3fc9147218ba503f0cfcd7f2f99b21c9a3e87fd0 Mon Sep 17 00:00:00 2001 From: David Ponce Date: Wed, 1 Nov 2023 23:53:47 +0100 Subject: [PATCH] Iterate over tokens that should be biased rather than the entire vocabulary. (#851) --- llama_cpp/server/app.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 83cde40..930ad5d 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -601,10 +601,9 @@ def make_logit_bias_processor( input_ids: npt.NDArray[np.intc], scores: npt.NDArray[np.single], ) -> npt.NDArray[np.single]: - new_scores = [None] * len(scores) - for input_id, score in enumerate(scores): - new_scores[input_id] = score + to_bias.get(input_id, 0.0) - + new_scores = np.copy(scores) # Does it make sense to copy the whole array or can we just overwrite the original one? + for input_id, score in to_bias.items(): + new_scores[input_id] = score + scores[input_id] return new_scores return logit_bias_processor