From d015bdb4f8ab5591a9147443ec3e0d4f1d0a3192 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 8 Aug 2023 14:35:06 -0400 Subject: [PATCH] Add mul_mat_q option --- llama_cpp/llama.py | 4 ++++ llama_cpp/server/app.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 47f71e9..9a8c090 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -227,6 +227,7 @@ class Llama: rope_freq_scale: float = 1.0, n_gqa: Optional[int] = None, # (TEMPORARY) must be 8 for llama2 70b rms_norm_eps: Optional[float] = None, # (TEMPORARY) + mul_mat_q: Optional(bool) = None, # (TEMPORARY) verbose: bool = True, ): """Load a llama.cpp model from `model_path`. @@ -293,6 +294,9 @@ class Llama: if rms_norm_eps is not None: self.params.rms_norm_eps = rms_norm_eps + if mul_mat_q is not None: + self.params.mul_mat_q = mul_mat_q + self.last_n_tokens_size = last_n_tokens_size self.n_batch = min(n_ctx, n_batch) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 4afcfd5..3d5238b 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -103,6 +103,10 @@ class Settings(BaseSettings): default=None, description="TEMPORARY", ) + mul_mat_q: Optional[bool] = Field( + default=None, + description="TEMPORARY", + ) class ErrorResponse(TypedDict):