Add doc string for n_gpu_layers argument

2023-08-12 18:41:47 +08:00 · 2023-08-12 18:41:47 +08:00 · d018c7b01d
parent 66fb0345e8
commit d018c7b01d
1 changed files with 1 additions and 0 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -239,6 +239,7 @@ class Llama:
            n_ctx: Maximum context size.
            n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
            seed: Random seed. -1 for random.
+            n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded.
            f16_kv: Use half-precision for key/value cache.
            logits_all: Return logits for all tokens, not just the last token.
            vocab_only: Only load the vocabulary no weights.