From c4a8491d42b3b93330408afc3cc2af31ae2fecb1 Mon Sep 17 00:00:00 2001
From: Mug <>
Date: Wed, 26 Apr 2023 14:37:06 +0200
Subject: [PATCH] Fix decode errors permanently

---
 examples/low_level_api/low_level_api_chat_cpp.py  |  9 ++++++---
 examples/low_level_api/low_level_api_llama_cpp.py |  2 +-
 llama_cpp/llama.py                                | 12 ++++++------
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/examples/low_level_api/low_level_api_chat_cpp.py b/examples/low_level_api/low_level_api_chat_cpp.py
index d64ee8f..4a7cfc1 100644
--- a/examples/low_level_api/low_level_api_chat_cpp.py
+++ b/examples/low_level_api/low_level_api_chat_cpp.py
@@ -96,7 +96,7 @@ specified) expect poor results""", file=sys.stderr)
 
 		print(file=sys.stderr)
 		print(f"system_info: n_threads = {self.params.n_threads} / {cpu_count()} \
-| {llama_cpp.llama_print_system_info().decode('utf8')}", file=sys.stderr)
+| {llama_cpp.llama_print_system_info().decode('utf8', errors='ignore')}", file=sys.stderr)
 
 		# determine the required inference memory per token:
 		if (self.params.mem_test):
@@ -342,7 +342,7 @@ n_keep = {self.params.n_keep}
 	# return past text
 	def past(self):
 		for id in self.last_n_tokens[-self.n_past:]:
-			yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8")
+			yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore")
 
 	# write input
 	def input(self, prompt: str):
@@ -356,7 +356,10 @@ n_keep = {self.params.n_keep}
 	def output(self):
 		self.remaining_tokens = self.params.n_predict
 		for id in self.generate():
-			yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8")
+			try:
+				yield llama_cpp.llama_token_to_str(self.ctx, id).decode("utf-8", errors="ignore")
+			except UnicodeDecodeError:
+				pass
 
 	# read user input
 	def read_input(self):
diff --git a/examples/low_level_api/low_level_api_llama_cpp.py b/examples/low_level_api/low_level_api_llama_cpp.py
index b048c0a..4fb5a03 100644
--- a/examples/low_level_api/low_level_api_llama_cpp.py
+++ b/examples/low_level_api/low_level_api_llama_cpp.py
@@ -70,7 +70,7 @@ while remaining_tokens > 0:
     if not input_noecho:
         for id in embd:
             print(
-                llama_cpp.llama_token_to_str(ctx, id).decode("utf-8"),
+                llama_cpp.llama_token_to_str(ctx, id).decode("utf-8", errors="ignore"),
                 end="",
                 flush=True,
             )
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index edd2eef..a6e7ae3 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -109,7 +109,7 @@ class Llama:
         )
 
         if self.verbose:
-            print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
+            print(llama_cpp.llama_print_system_info().decode("utf-8", errors="ignore"), file=sys.stderr)
 
     def tokenize(self, text: bytes) -> List[llama_cpp.llama_token]:
         """Tokenize a string.
@@ -460,7 +460,7 @@ class Llama:
                     "model": self.model_path,
                     "choices": [
                         {
-                            "text": text[start:].decode("utf-8"),
+                            "text": text[start:].decode("utf-8", errors="ignore"),
                             "index": 0,
                             "logprobs": None,
                             "finish_reason": None,
@@ -484,7 +484,7 @@ class Llama:
                 "model": self.model_path,
                 "choices": [
                     {
-                        "text": text[returned_characters:].decode("utf-8"),
+                        "text": text[returned_characters:].decode("utf-8", errors="ignore"),
                         "index": 0,
                         "logprobs": None,
                         "finish_reason": finish_reason,
@@ -496,7 +496,7 @@ class Llama:
         ### HACK
         self._completion_bytes.append(text)
         ###
-        text_str = text.decode("utf-8")
+        text_str = text.decode("utf-8", errors="ignore")
 
         if echo:
             text_str = prompt + text_str
@@ -514,7 +514,7 @@ class Llama:
 
             all_tokens = prompt_tokens + completion_tokens
             all_token_strs = [
-                self.detokenize([token]).decode("utf-8") for token in all_tokens
+                self.detokenize([token]).decode("utf-8", errors="ignore") for token in all_tokens
             ]
             all_logprobs = [
                 [Llama.logit_to_logprob(logit) for logit in row]
@@ -533,7 +533,7 @@ class Llama:
                 )
                 token_logprobs.append(sorted_logprobs[int(token)][0])
                 top_logprob = {
-                    self.detokenize([llama_cpp.llama_token(i)]).decode("utf-8"): logprob
+                    self.detokenize([llama_cpp.llama_token(i)]).decode("utf-8", errors="ignore"): logprob
                     for logprob, i in sorted_logprobs[:logprobs]
                 }
                 top_logprob.update({token_str: sorted_logprobs[int(token)][0]})