Fix streaming doesn't return finish reason (#798)

When streaming the yield that contains the finish can be skipped. This change ensures that yield isn't skipped.
2023-10-19 02:55:56 -04:00 · 2023-10-19 02:55:56 -04:00 · 09a8406c83
parent 28c2b884e2
commit 09a8406c83
1 changed files with 14 additions and 28 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -1232,20 +1232,6 @@ class Llama:
                            }
                        ],
                    }
-                    yield {
-                        "id": completion_id,
-                        "object": "text_completion",
-                        "created": created,
-                        "model": model_name,
-                        "choices": [
-                            {
-                                "text": "",
-                                "index": 0,
-                                "logprobs": None,
-                                "finish_reason": finish_reason,
-                            }
-                        ],
-                    }
                    break
                returned_tokens += 1
                yield {
@ -1264,20 +1250,20 @@ class Llama:
                        }
                    ],
                }
-                yield {
-                    "id": completion_id,
-                    "object": "text_completion",
-                    "created": created,
-                    "model": model_name,
-                    "choices": [
-                        {
-                            "text": "",
-                            "index": 0,
-                            "logprobs": None,
-                            "finish_reason": finish_reason,
-                        }
-                    ],
-                }
+            yield {
+                "id": completion_id,
+                "object": "text_completion",
+                "created": created,
+                "model": model_name,
+                "choices": [
+                    {
+                        "text": "",
+                        "index": 0,
+                        "logprobs": None,
+                        "finish_reason": finish_reason,
+                    }
+                ],
+            }
            if self.cache:
                if self.verbose:
                    print("Llama._create_completion: cache save", file=sys.stderr)