From 09a8406c83aaa880a3e3d64c29561a51299b6bf6 Mon Sep 17 00:00:00 2001
From: gmcgoldr <garrin.mcgoldrick@gmail.com>
Date: Thu, 19 Oct 2023 02:55:56 -0400
Subject: [PATCH] Fix streaming doesn't return finish reason (#798)

When streaming the yield that contains the finish can be skipped. This change ensures that yield isn't skipped.
---
 llama_cpp/llama.py | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 8bb5efb..c179b48 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -1232,20 +1232,6 @@ class Llama:
                             }
                         ],
                     }
-                    yield {
-                        "id": completion_id,
-                        "object": "text_completion",
-                        "created": created,
-                        "model": model_name,
-                        "choices": [
-                            {
-                                "text": "",
-                                "index": 0,
-                                "logprobs": None,
-                                "finish_reason": finish_reason,
-                            }
-                        ],
-                    }
                     break
                 returned_tokens += 1
                 yield {
@@ -1264,20 +1250,20 @@ class Llama:
                         }
                     ],
                 }
-                yield {
-                    "id": completion_id,
-                    "object": "text_completion",
-                    "created": created,
-                    "model": model_name,
-                    "choices": [
-                        {
-                            "text": "",
-                            "index": 0,
-                            "logprobs": None,
-                            "finish_reason": finish_reason,
-                        }
-                    ],
-                }
+            yield {
+                "id": completion_id,
+                "object": "text_completion",
+                "created": created,
+                "model": model_name,
+                "choices": [
+                    {
+                        "text": "",
+                        "index": 0,
+                        "logprobs": None,
+                        "finish_reason": finish_reason,
+                    }
+                ],
+            }
             if self.cache:
                 if self.verbose:
                     print("Llama._create_completion: cache save", file=sys.stderr)