diff --git a/.gitignore b/.gitignore index d09b209..fd64c09 100644 --- a/.gitignore +++ b/.gitignore @@ -163,4 +163,4 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ diff --git a/examples/high_level_api/fastapi_server.py b/examples/high_level_api/fastapi_server.py index b7d2565..a649692 100644 --- a/examples/high_level_api/fastapi_server.py +++ b/examples/high_level_api/fastapi_server.py @@ -27,10 +27,10 @@ from sse_starlette.sse import EventSourceResponse class Settings(BaseSettings): model: str n_ctx: int = 2048 - n_batch: int = 2048 - n_threads: int = os.cpu_count() or 1 + n_batch: int = 8 + n_threads: int = int(os.cpu_count() / 2) or 1 f16_kv: bool = True - use_mlock: bool = True + use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out... embedding: bool = True last_n_tokens_size: int = 64 diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index 0650bc0..c3168b4 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -27,10 +27,10 @@ from sse_starlette.sse import EventSourceResponse class Settings(BaseSettings): model: str n_ctx: int = 2048 - n_batch: int = 2048 - n_threads: int = os.cpu_count() or 1 + n_batch: int = 8 + n_threads: int = int(os.cpu_count() / 2) or 1 f16_kv: bool = True - use_mlock: bool = True + use_mlock: bool = False # This causes a silent failure on platforms that don't support mlock (e.g. Windows) took forever to figure out... embedding: bool = True last_n_tokens_size: int = 64 diff --git a/setup.py b/setup.py index 7e4193a..f50fe8d 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ setup( entry_points={"console_scripts": ["llama_cpp.server=llama_cpp.server:main"]}, install_requires=[ "typing-extensions>=4.5.0", + "pydantic==1.10.7", ], extras_require={ "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"],