From 09a8406c83aaa880a3e3d64c29561a51299b6bf6 Mon Sep 17 00:00:00 2001 From: gmcgoldr Date: Thu, 19 Oct 2023 02:55:56 -0400 Subject: [PATCH] Fix streaming doesn't return finish reason (#798) When streaming the yield that contains the finish can be skipped. This change ensures that yield isn't skipped. --- llama_cpp/llama.py | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 8bb5efb..c179b48 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1232,20 +1232,6 @@ class Llama: } ], } - yield { - "id": completion_id, - "object": "text_completion", - "created": created, - "model": model_name, - "choices": [ - { - "text": "", - "index": 0, - "logprobs": None, - "finish_reason": finish_reason, - } - ], - } break returned_tokens += 1 yield { @@ -1264,20 +1250,20 @@ class Llama: } ], } - yield { - "id": completion_id, - "object": "text_completion", - "created": created, - "model": model_name, - "choices": [ - { - "text": "", - "index": 0, - "logprobs": None, - "finish_reason": finish_reason, - } - ], - } + yield { + "id": completion_id, + "object": "text_completion", + "created": created, + "model": model_name, + "choices": [ + { + "text": "", + "index": 0, + "logprobs": None, + "finish_reason": finish_reason, + } + ], + } if self.cache: if self.verbose: print("Llama._create_completion: cache save", file=sys.stderr)