From cfb7da98ed64feb882065222b17ca261356a7f59 Mon Sep 17 00:00:00 2001 From: anil <412569+aniljava@users.noreply.github.com> Date: Tue, 16 Jan 2024 11:52:52 -0600 Subject: [PATCH] Support Accept text/event-stream in chat and completion endpoints, resolves #1083 (#1088) Co-authored-by: Anil Pathak Co-authored-by: Andrei Betlen --- llama_cpp/server/app.py | 59 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index c54e4eb..fed0a6d 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -197,7 +197,36 @@ async def authenticate( @router.post( - "/v1/completions", summary="Completion", dependencies=[Depends(authenticate)] + "/v1/completions", + summary="Completion", + dependencies=[Depends(authenticate)], + response_model= Union[ + llama_cpp.CreateCompletionResponse, + str, + ], + responses={ + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + {"$ref": "#/components/schemas/CreateCompletionResponse"} + ], + "title": "Completion response, when stream=False", + } + }, + "text/event-stream":{ + "schema": { + "type": "string", + "title": "Server Side Streaming response, when stream=True. " + + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501 + "example": """data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""" + } + } + }, + } + }, ) @router.post( "/v1/engines/copilot-codex/completions", @@ -280,7 +309,33 @@ async def create_embedding( @router.post( - "/v1/chat/completions", summary="Chat", dependencies=[Depends(authenticate)] + "/v1/chat/completions", summary="Chat", dependencies=[Depends(authenticate)], + response_model= Union[ + llama_cpp.ChatCompletion, str + ], + responses={ + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + {"$ref": "#/components/schemas/CreateChatCompletionResponse"} + ], + "title": "Completion response, when stream=False", + } + }, + "text/event-stream":{ + "schema": { + "type": "string", + "title": "Server Side Streaming response, when stream=True" + + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501 + "example": """data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""" + } + } + }, + } + }, ) async def create_chat_completion( request: Request,