feat: Update llama.cpp

2024-04-25 21:21:29 -04:00 · 2024-04-25 21:21:29 -04:00 · 7f52335c50
parent 266abfc1a3
commit 7f52335c50
2 changed files with 5 additions and 1 deletions
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@ -811,6 +811,7 @@ It might not exist for progress report where '.' is output repeatedly."""
 #     bool quantize_output_tensor;         // quantize output.weight
 #     bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
 #     bool pure;                           // quantize all tensors to the default type
+#     bool keep_split;                     // quantize to the same number of shards
 #     void * imatrix;                      // pointer to importance matrix data
 #     void * kv_overrides;                 // pointer to vector containing overrides
 # } llama_model_quantize_params;
@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure):
        quantize_output_tensor (bool): quantize output.weight
        only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
        pure (bool): quantize all tensors to the default type
+        keep_split (bool): quantize to the same number of shards
        imatrix (ctypes.c_void_p): pointer to importance matrix data
        kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
    """
@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure):
        quantize_output_tensor: bool
        only_copy: bool
        pure: bool
+        keep_split: bool
        imatrix: ctypes.c_void_p
        kv_overrides: ctypes.c_void_p

@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure):
        ("quantize_output_tensor", ctypes.c_bool),
        ("only_copy", ctypes.c_bool),
        ("pure", ctypes.c_bool),
+        ("keep_split", ctypes.c_bool),
        ("imatrix", ctypes.c_void_p),
        ("kv_overrides", ctypes.c_void_p),
    ]
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@ -1 +1 @@
-Subproject commit 784e11dea1f5ce9638851b2b0dddb107e2a609c8
+Subproject commit 46e12c4692a37bdd31a0432fc5153d7d22bc7f72