Update model paths to be more clear they should point to file

This commit is contained in:
Andrei Betlen 2023-04-09 22:45:55 -04:00
parent a79d3eb732
commit 196650ccb2
8 changed files with 12 additions and 12 deletions

View file

@ -27,14 +27,14 @@ pip install llama-cpp-python
```python ```python
>>> from llama_cpp import Llama >>> from llama_cpp import Llama
>>> llm = Llama(model_path="models/7B/...") >>> llm = Llama(model_path="./models/7B/ggml-model.bin")
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True) >>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
>>> print(output) >>> print(output)
{ {
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"object": "text_completion", "object": "text_completion",
"created": 1679561337, "created": 1679561337,
"model": "models/7B/...", "model": "./models/7B/ggml-model.bin",
"choices": [ "choices": [
{ {
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
@ -60,7 +60,7 @@ To install the server package and get started:
```bash ```bash
pip install llama-cpp-python[server] pip install llama-cpp-python[server]
export MODEL=./models/7B export MODEL=./models/7B/ggml-model.bin
python3 -m llama_cpp.server python3 -m llama_cpp.server
``` ```

View file

@ -29,14 +29,14 @@ pip install llama-cpp-python
```python ```python
>>> from llama_cpp import Llama >>> from llama_cpp import Llama
>>> llm = Llama(model_path="models/7B/...") >>> llm = Llama(model_path="./models/7B/ggml-model.bin")
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True) >>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
>>> print(output) >>> print(output)
{ {
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
"object": "text_completion", "object": "text_completion",
"created": 1679561337, "created": 1679561337,
"model": "models/7B/...", "model": "./models/7B/ggml-model.bin",
"choices": [ "choices": [
{ {
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
@ -62,7 +62,7 @@ To install the server package and get started:
```bash ```bash
pip install llama-cpp-python[server] pip install llama-cpp-python[server]
export MODEL=./models/7B export MODEL=./models/7B/ggml-model.bin
python3 -m llama_cpp.server python3 -m llama_cpp.server
``` ```

View file

@ -4,7 +4,7 @@ To run this example:
```bash ```bash
pip install fastapi uvicorn sse-starlette pip install fastapi uvicorn sse-starlette
export MODEL=../models/7B/... export MODEL=../models/7B/ggml-model.bin
uvicorn fastapi_server_chat:app --reload uvicorn fastapi_server_chat:app --reload
``` ```

View file

@ -3,7 +3,7 @@ import argparse
from llama_cpp import Llama from llama_cpp import Llama
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, default=".//models/...") parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-model.bin")
args = parser.parse_args() args = parser.parse_args()
llm = Llama(model_path=args.model, embedding=True) llm = Llama(model_path=args.model, embedding=True)

View file

@ -4,7 +4,7 @@ import argparse
from llama_cpp import Llama from llama_cpp import Llama
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, default="./models/...") parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
args = parser.parse_args() args = parser.parse_args()
llm = Llama(model_path=args.model) llm = Llama(model_path=args.model)

View file

@ -4,7 +4,7 @@ import argparse
from llama_cpp import Llama from llama_cpp import Llama
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, default="./models/...") parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
args = parser.parse_args() args = parser.parse_args()
llm = Llama(model_path=args.model) llm = Llama(model_path=args.model)

View file

@ -29,7 +29,7 @@ class LlamaLLM(LLM):
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, default="./models/...") parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
args = parser.parse_args() args = parser.parse_args()
# Load the model # Load the model

View file

@ -9,7 +9,7 @@ N_THREADS = multiprocessing.cpu_count()
prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n" prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n"
lparams = llama_cpp.llama_context_default_params() lparams = llama_cpp.llama_context_default_params()
ctx = llama_cpp.llama_init_from_file(b"models/ggml-alpaca-7b-q4.bin", lparams) ctx = llama_cpp.llama_init_from_file(b"../models/7B/ggml-model.bin", lparams)
# determine the required inference memory per token: # determine the required inference memory per token:
tmp = [0, 1, 2, 3] tmp = [0, 1, 2, 3]