Use model_name and index in response

2023-05-21 21:30:03 -04:00 · 2023-05-21 21:30:03 -04:00 · 0adb9ec37a
parent 922b5b2bfd
commit 0adb9ec37a
1 changed files with 6 additions and 7 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -177,7 +177,6 @@ class Llama:
        if self.verbose:
            print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)

-
        n_vocab = self.n_vocab()
        n_ctx = self.n_ctx()
        data = (llama_cpp.llama_token_data * n_vocab)(
@ -575,9 +574,9 @@ class Llama:
        else:
            inputs = input

-        data = []
+        data: List[EmbeddingData] = []
        total_tokens = 0
-        for input in inputs:
+        for index, input in enumerate(inputs):
            tokens = self.tokenize(input.encode("utf-8"))
            self.reset()
            self.eval(tokens)
@ -587,20 +586,20 @@ class Llama:
                : llama_cpp.llama_n_embd(self.ctx)
            ]

-            if self.verbose:
-                llama_cpp.llama_print_timings(self.ctx)
            data.append(
                {
                    "object": "embedding",
                    "embedding": embedding,
-                    "index": 0,
+                    "index": index,
                }
            )
+        if self.verbose:
+            llama_cpp.llama_print_timings(self.ctx)

        return {
            "object": "list",
            "data": data,
-            "model": self.model_path,
+            "model": model_name,
            "usage": {
                "prompt_tokens": total_tokens,
                "total_tokens": total_tokens,