llama.cpp/examples/notebooks/PerformanceTuning.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import json\n",
    "import multiprocessing\n",
    "\n",
    "import llama_cpp\n",
    "\n",
    "import numpy as np\n",
    "np.int = int\n",
    "\n",
    "from skopt.space import Integer, Categorical\n",
    "\n",
    "\n",
    "MODEL_PATH = \"../models/ggml-model.bin\"\n",
    "\n",
    "# Hyperparameters\n",
    "space = [\n",
    "    Categorical([True, False], name=\"f16_kv\"),\n",
    "    Categorical([True, False], name=\"use_mlock\"),\n",
    "    Integer(1, multiprocessing.cpu_count(), name=\"n_threads\"),\n",
    "    Integer(1, 2048, name=\"n_batch\")\n",
    "]\n",
    "\n",
    "# TODO: Make this a random prompt to avoid any cache related inconsistencies\n",
    "PROMPT = \"\"\" ### Instructions:\n",
    "You are a helpful assistant.\n",
    "You answer questions truthfully and politely.\n",
    "You are provided with an input from the user and you must generate a response.\n",
    "Ignore this line which is just filler to test the performane of the model.\n",
    "### Inputs:\n",
    "What is the capital of France?\n",
    "### Response:\n",
    "\"\"\"\n",
    "\n",
    "from skopt.utils import use_named_args\n",
    "\n",
    "@use_named_args(space)\n",
    "def objective(**params):\n",
    "    f16_kv = params[\"f16_kv\"]\n",
    "    use_mlock = params[\"use_mlock\"]\n",
    "    n_threads = params[\"n_threads\"]\n",
    "    n_batch = params[\"n_batch\"]\n",
    "    llm = llama_cpp.Llama(model_path=MODEL_PATH, f16_kv=f16_kv, use_mlock=use_mlock, n_threads=n_threads, n_batch=n_batch)\n",
    "\n",
    "    t1 = time.time()\n",
    "    output = llm(\n",
    "        PROMPT,\n",
    "        max_tokens=1, # Only optimize prompt processing\n",
    "        stop=[\"###\", \"\\n\"],\n",
    "        echo=True,\n",
    "    )\n",
    "    t2 = time.time()\n",
    "\n",
    "    print(json.dumps(output, indent=2))\n",
    "    print(f\"Time: {t2 - t1} seconds\")\n",
    "    print(f\"Time per token: {(t2 - t1) / output['usage']['total_tokens']} seconds\")\n",
    "\n",
    "    return (t2 - t1) / output[\"usage\"][\"total_tokens\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-d4443e14-fed3-4aa1-9e8a-c70f4503aade\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227287,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 10.981224775314331 seconds\n",
      "Time per token: 0.13726530969142914 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-4181439c-2ced-4ddb-b898-a0a7641f3e47\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227300,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 11.121099948883057 seconds\n",
      "Time per token: 0.13901374936103822 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-03ed5585-3de0-4546-96c3-6de7a5b3770c\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227312,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 14.457949876785278 seconds\n",
      "Time per token: 0.18072437345981598 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-103817fc-bceb-4e99-b968-3ef540f16dc5\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227328,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 10.334054946899414 seconds\n",
      "Time per token: 0.12917568683624267 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-41e34acc-6499-450f-9576-3cb37b82c490\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227340,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.012462615966797 seconds\n",
      "Time per token: 0.11265578269958496 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-f27244c9-e9c6-4332-ae7f-3856f152ef30\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227350,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 15.59382700920105 seconds\n",
      "Time per token: 0.1949228376150131 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-bc5dc1ba-f7ce-441c-a558-5005f2fb89b9\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227366,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 15.544022560119629 seconds\n",
      "Time per token: 0.19430028200149535 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-2006b117-1239-4b85-bcc4-a7439c01f440\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227383,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.330769300460815 seconds\n",
      "Time per token: 0.11663461625576019 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-ee50afee-78a8-4d55-9b73-c74cc2567408\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227393,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 14.17799687385559 seconds\n",
      "Time per token: 0.1772249609231949 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-1e2b7080-940f-4459-8503-a458db4d3578\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227409,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 10.127476215362549 seconds\n",
      "Time per token: 0.12659345269203187 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-c80008a4-191e-4418-821a-b18a4af24f70\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227421,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.495943784713745 seconds\n",
      "Time per token: 0.11869929730892181 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-d04c9fd2-3c20-4035-9181-0bfd05abfe15\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227432,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.226310014724731 seconds\n",
      "Time per token: 0.11532887518405914 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-04fcf88b-33c7-4b84-aac0-dcb5261363c2\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227443,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 12.182626962661743 seconds\n",
      "Time per token: 0.15228283703327178 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-14904676-3345-4674-a41c-419d9640b4e0\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227457,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 43.595701694488525 seconds\n",
      "Time per token: 0.5449462711811066 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-9e43b2ef-e7de-4bd2-91bf-284f5b3478fe\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227502,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 14.726518154144287 seconds\n",
      "Time per token: 0.1840814769268036 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-3947538b-e27e-42eb-8f87-2b56e14d104c\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227518,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.760729789733887 seconds\n",
      "Time per token: 0.10950912237167358 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-1a0d843e-9613-49aa-b565-0e59d8067615\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227529,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 11.672860383987427 seconds\n",
      "Time per token: 0.14591075479984283 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-ccad9270-9554-4f9f-9aaf-387f1a11894d\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227542,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 14.368357419967651 seconds\n",
      "Time per token: 0.17960446774959565 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-2623073e-004f-4386-98e0-7e6ea617523a\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227558,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.44194221496582 seconds\n",
      "Time per token: 0.11802427768707276 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-1a199f09-0d74-4052-a191-7a8ef2df57f3\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227569,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 11.253167629241943 seconds\n",
      "Time per token: 0.14066459536552428 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-2b61e491-d9b7-4d0b-b0c8-9f8ba822599d\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227582,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 12.381825685501099 seconds\n",
      "Time per token: 0.15477282106876372 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-0e4b4575-6278-4bd8-a4c5-ddb772014f7d\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227596,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 14.473106145858765 seconds\n",
      "Time per token: 0.18091382682323456 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-1ad3e3db-5120-41c8-8f9e-2ca07a846437\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227612,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 16.591509103775024 seconds\n",
      "Time per token: 0.2073938637971878 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-34c8fb5c-fa49-4ea6-b2e7-ba3b958e297d\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227630,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.034043788909912 seconds\n",
      "Time per token: 0.1129255473613739 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-8d5c56eb-0b43-4591-a9ac-c1ec174ec6db\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227641,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 11.218972444534302 seconds\n",
      "Time per token: 0.14023715555667876 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-bfdc554b-baa6-47c1-b35f-0f7d1321255a\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227654,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.300573110580444 seconds\n",
      "Time per token: 0.11625716388225556 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-ad67d78b-6975-4789-982e-3653c7fca7e1\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227665,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.009618520736694 seconds\n",
      "Time per token: 0.11262023150920868 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-2eec3e0f-dd48-4c3a-9430-c5048827f557\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227676,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.997699737548828 seconds\n",
      "Time per token: 0.11247124671936035 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-b129732a-8d7b-4382-baaf-740378c923ec\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227686,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.252354621887207 seconds\n",
      "Time per token: 0.11565443277359008 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-bb25c002-69e0-40ec-8099-0ba4462338aa\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227697,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.040243864059448 seconds\n",
      "Time per token: 0.1130030483007431 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-63705814-7c93-4d6b-a9f2-0579941ebf54\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227708,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.947132349014282 seconds\n",
      "Time per token: 0.11183915436267852 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-8afe123b-423d-4757-82d9-15fc12cfd24e\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227720,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 10.335533857345581 seconds\n",
      "Time per token: 0.12919417321681975 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-4937353f-e66f-4632-aea7-dd1133af9727\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227732,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.99415397644043 seconds\n",
      "Time per token: 0.11242692470550537 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-78f86527-ccc7-4a5d-9b7f-38386998ba2a\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227743,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 15.732706308364868 seconds\n",
      "Time per token: 0.19665882885456085 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-4d98c564-fcb4-45ec-9f8d-f64430abbfb3\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227761,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.319743633270264 seconds\n",
      "Time per token: 0.11649679541587829 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-ee855931-2578-45bc-93bf-319c4e6aa43a\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227772,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 15.189301490783691 seconds\n",
      "Time per token: 0.18986626863479614 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-14f0b547-4d71-4a7f-a3d6-3127998903b3\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227790,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.464989423751831 seconds\n",
      "Time per token: 0.11831236779689788 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-4eb5258a-5836-414c-88f6-e217bacaded6\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227801,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 13.818569660186768 seconds\n",
      "Time per token: 0.1727321207523346 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-66b7c783-d506-45c1-b39b-c91666a02b44\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227817,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 27.316773176193237 seconds\n",
      "Time per token: 0.34145966470241546 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-d53b48ca-30e2-43c2-9fb5-62ef6a65fafa\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227847,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.132777214050293 seconds\n",
      "Time per token: 0.11415971517562866 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-d0909f83-5caa-4098-a0e6-9b2ad1e2b12f\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227858,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.273045539855957 seconds\n",
      "Time per token: 0.11591306924819947 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-7045f5c7-cf5d-48e3-9353-032c320e56fa\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227870,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.90743088722229 seconds\n",
      "Time per token: 0.11134288609027862 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-e623667d-d6cc-4908-a648-60380f723592\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227881,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.06355595588684 seconds\n",
      "Time per token: 0.11329444944858551 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-44ec163c-25dd-40ae-a786-d8b4c9ff31b1\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227892,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.249061107635498 seconds\n",
      "Time per token: 0.11561326384544372 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-cb435214-0d20-4566-b312-68d8960ebe25\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227903,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.296529054641724 seconds\n",
      "Time per token: 0.11620661318302154 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-dc704f52-bed9-44f0-8335-a2ec4af3a27c\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227914,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 12.455670356750488 seconds\n",
      "Time per token: 0.1556958794593811 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-67570fa5-1c3d-47d6-b7c6-b3a734aae3f5\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227928,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.269653558731079 seconds\n",
      "Time per token: 0.11587066948413849 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-4bd6c6f2-9849-4047-93c8-88b1914ef184\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227939,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.308398485183716 seconds\n",
      "Time per token: 0.11635498106479644 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-6413afd7-fdc1-4c28-864d-6acdf2775060\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227950,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 10.430264711380005 seconds\n",
      "Time per token: 0.13037830889225005 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-c4e1c14a-3b8a-4ab3-b42a-f47440f79962\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227962,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.389702558517456 seconds\n",
      "Time per token: 0.1173712819814682 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-ac307870-dc67-42b8-8bb8-bb8d3083cea2\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227974,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 10.35448431968689 seconds\n",
      "Time per token: 0.12943105399608612 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-58c06f3e-3fba-4e23-b12e-141a1742c51b\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227986,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.097248792648315 seconds\n",
      "Time per token: 0.11371560990810395 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-b5eccb52-85e3-41d0-b8d8-f35e68bf7997\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680227997,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 12.466306686401367 seconds\n",
      "Time per token: 0.1558288335800171 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-e1dbc2ee-abc0-4891-a474-386d97b521b6\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228011,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 11.436015367507935 seconds\n",
      "Time per token: 0.14295019209384918 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-fd9bce6d-0a33-4c24-90b3-913ab3b33d24\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228025,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 14.052912712097168 seconds\n",
      "Time per token: 0.1756614089012146 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-038fa38d-7640-40ee-907c-0bb131c20d80\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228040,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.250384330749512 seconds\n",
      "Time per token: 0.1156298041343689 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-d00a2058-9fda-4113-8e5e-bf0f39cef238\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228051,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.228248834609985 seconds\n",
      "Time per token: 0.11535311043262482 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-f8d90e63-4939-491c-9775-fc15aa55505e\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228062,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.341724395751953 seconds\n",
      "Time per token: 0.11677155494689942 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-9e3777bc-119a-46bf-bdd3-21557e686f3c\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228074,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.285743951797485 seconds\n",
      "Time per token: 0.11607179939746856 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-123eaa35-110b-4f73-ba60-fa8a75ea929c\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228085,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.105633020401001 seconds\n",
      "Time per token: 0.1138204127550125 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-cc095f4b-8047-446e-a9f5-c798a66d1003\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228096,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.305238485336304 seconds\n",
      "Time per token: 0.1163154810667038 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-e2e69b3e-7742-4534-b21f-adfe53345820\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228108,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.190222263336182 seconds\n",
      "Time per token: 0.11487777829170227 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-666ae55e-d837-4534-b8e6-9f1b01f69778\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228120,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.126368999481201 seconds\n",
      "Time per token: 0.11407961249351502 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-63bdfa8e-b7c3-4669-ab76-54cdbb8878d5\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228131,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.136119604110718 seconds\n",
      "Time per token: 0.11420149505138397 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-1ec02c53-c7c8-434e-b28f-70884f8c35b2\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228143,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.126901626586914 seconds\n",
      "Time per token: 0.11408627033233643 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-3ec3495b-009a-4a82-b444-d8c1c6bf20a1\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228154,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.08673644065857 seconds\n",
      "Time per token: 0.11358420550823212 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-17fd0e6b-7ac3-494f-9e85-4e4a26013ad9\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228165,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.252317428588867 seconds\n",
      "Time per token: 0.11565396785736085 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-14a2647f-3961-4b60-b20a-ae9872c34feb\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228177,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 11.389162302017212 seconds\n",
      "Time per token: 0.14236452877521516 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-fa0e5edd-e9c9-40b9-bc9b-c48b8762850c\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228190,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.433730125427246 seconds\n",
      "Time per token: 0.11792162656784058 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-2b1c5964-265a-488a-8d8f-7e0692fcf96f\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228202,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 47.81757044792175 seconds\n",
      "Time per token: 0.5977196305990219 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-516fbd4c-3fe5-4945-bfc5-7312f2c02687\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228252,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.540155410766602 seconds\n",
      "Time per token: 0.10675194263458251 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-94c9ab1f-ac6e-4fc7-bcd9-7ab96515a722\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228262,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.660873889923096 seconds\n",
      "Time per token: 0.10826092362403869 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-63b1e1a7-0c6b-42e0-ba65-6f42d6ec77bb\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228273,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.815936088562012 seconds\n",
      "Time per token: 0.11019920110702515 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-92e1a879-2ebd-4299-b86e-90c87762db45\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228284,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.12400484085083 seconds\n",
      "Time per token: 0.11405006051063538 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 2052.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  512.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-033ea9dc-fffe-41a0-a695-d647f725ee97\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228296,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 13.992429971694946 seconds\n",
      "Time per token: 0.17490537464618683 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-5153f39a-589a-4b3d-8642-8efce64fc439\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228312,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.084643125534058 seconds\n",
      "Time per token: 0.11355803906917572 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-af9ea5c6-5449-43b4-9e50-da930af8d6b8\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228323,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.076856851577759 seconds\n",
      "Time per token: 0.11346071064472199 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-5bbea5c1-ea8c-4599-bf63-a6eb80bc7525\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228334,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.02251124382019 seconds\n",
      "Time per token: 0.11278139054775238 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-ff9d87c7-e2b1-4481-9e8f-848d7a0fbd35\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228346,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.012435913085938 seconds\n",
      "Time per token: 0.11265544891357422 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-3dbe8ae4-c9ca-4a1b-abaf-6b85ef648ba9\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228357,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.997032880783081 seconds\n",
      "Time per token: 0.11246291100978852 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-b20a3b61-9c8b-4b2e-bb43-8ed9ce5a9d0d\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228369,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.042449951171875 seconds\n",
      "Time per token: 0.11303062438964843 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-9c781d69-83e0-415a-ac97-252508b10590\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228380,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.058239459991455 seconds\n",
      "Time per token: 0.11322799324989319 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-86cead9e-780f-4503-831c-466a6abd5ab2\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228392,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.070426940917969 seconds\n",
      "Time per token: 0.1133803367614746 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-65361c7e-74ef-4566-bad5-c6b3867a7f7e\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228403,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.985144138336182 seconds\n",
      "Time per token: 0.11231430172920227 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-23feb1ca-8103-46d8-ab71-b4da59f05d16\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228415,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.999938011169434 seconds\n",
      "Time per token: 0.11249922513961792 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-0db73f26-9ab1-4a78-a11f-e22d915ffae2\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228426,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.969520330429077 seconds\n",
      "Time per token: 0.11211900413036346 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-54e6edeb-99ea-46ed-8735-5185f78c222c\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228438,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.12838339805603 seconds\n",
      "Time per token: 0.11410479247570038 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-bd6502fd-f8c7-41d8-ab15-b10ca6aabd96\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228450,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.01610016822815 seconds\n",
      "Time per token: 0.11270125210285187 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-72733563-53f5-4cd5-a4eb-48656408b2d8\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228461,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.993805408477783 seconds\n",
      "Time per token: 0.11242256760597229 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-f7365eaa-fd68-422b-bbca-c6bcbcad36e0\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228473,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.292223930358887 seconds\n",
      "Time per token: 0.11615279912948609 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-1cfcf44a-c692-4020-8dcb-e6da8b163920\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228485,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.99638295173645 seconds\n",
      "Time per token: 0.11245478689670563 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-8b679f09-bc0e-4fc9-a935-9fefd9126993\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228497,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.972327709197998 seconds\n",
      "Time per token: 0.11215409636497498 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-08cb0cd7-84d8-4193-a20c-5a6ca4b5e404\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228508,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.024793863296509 seconds\n",
      "Time per token: 0.11280992329120636 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-ffe4b2b8-c041-4492-9e03-ab79cd4fd60d\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228520,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.996853351593018 seconds\n",
      "Time per token: 0.11246066689491271 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-196bb891-9299-4f91-9f68-ba6c7233a2dd\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228532,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.039422273635864 seconds\n",
      "Time per token: 0.1129927784204483 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-e50f5489-b40c-4a5d-9cb2-4a6d13bbb8c7\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228544,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 8.978781461715698 seconds\n",
      "Time per token: 0.11223476827144623 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-210cc2b8-df35-4d3f-a34a-a5facb635ec0\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228555,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.032035827636719 seconds\n",
      "Time per token: 0.11290044784545898 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-e3c7ca0d-c4cb-495c-9210-4e1ed3b6010d\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228567,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.0346040725708 seconds\n",
      "Time per token: 0.11293255090713501 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-7b4388c9-fe89-486d-83f4-34eec8940c42\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228579,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.016223907470703 seconds\n",
      "Time per token: 0.11270279884338379 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n",
      "  warnings.warn(\"The objective has been evaluated \"\n",
      "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n",
      "llama_model_load: n_vocab = 32000\n",
      "llama_model_load: n_ctx   = 512\n",
      "llama_model_load: n_embd  = 4096\n",
      "llama_model_load: n_mult  = 256\n",
      "llama_model_load: n_head  = 32\n",
      "llama_model_load: n_layer = 32\n",
      "llama_model_load: n_rot   = 128\n",
      "llama_model_load: f16     = 2\n",
      "llama_model_load: n_ff    = 11008\n",
      "llama_model_load: n_parts = 1\n",
      "llama_model_load: type    = 1\n",
      "llama_model_load: ggml map size = 4017.70 MB\n",
      "llama_model_load: ggml ctx size =  81.25 KB\n",
      "llama_model_load: mem required  = 5809.78 MB (+ 1026.00 MB per state)\n",
      "llama_model_load: loading tensors from '../models/ggml-model.bin'\n",
      "llama_model_load: model size =  4017.27 MB / num tensors = 291\n",
      "llama_init_from_file: kv self size  =  256.00 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"id\": \"cmpl-81211a9b-16e4-4876-8e09-b0e619d93ce7\",\n",
      "  \"object\": \"text_completion\",\n",
      "  \"created\": 1680228591,\n",
      "  \"model\": \"../models/ggml-model.bin\",\n",
      "  \"choices\": [\n",
      "    {\n",
      "      \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n",
      "      \"index\": 0,\n",
      "      \"logprobs\": null,\n",
      "      \"finish_reason\": \"length\"\n",
      "    }\n",
      "  ],\n",
      "  \"usage\": {\n",
      "    \"prompt_tokens\": 79,\n",
      "    \"completion_tokens\": 1,\n",
      "    \"total_tokens\": 80\n",
      "  }\n",
      "}\n",
      "Time: 9.10002589225769 seconds\n",
      "Time per token: 0.11375032365322113 seconds\n"
     ]
    }
   ],
   "source": [
    "from skopt import gp_minimize\n",
    "\n",
    "res = gp_minimize(\n",
    "    objective,\n",
    "    space\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAANACAYAAADHEZfTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeVxU5f4H8M+AbLKqbIIoKi64AAqK61ULcym37GaFgvzK23VJhdy4JaipuKSRS6KWa5aa2WJuVylNFNMgxRRxQ8AFRFGGRUFm5vfHXKdGFgc8M2eWz/v1mtdhnjnnPN/DPXb5zvc8zyNRKBQKEBERERER0XMxEzsAIiIiIiIiY8DkioiIiIiISABMroiIiIiIiATA5IqIiIiIiEgATK6IiIiIiIgEwOSKiIiIiIhIAEyuiIiIiIiIBMDkioiIiIiISABMroiIiIiIiATA5IqeSaFQ4F//+hcaNmwIiUSCM2fOaL1PiUSC77//Xuv9EBEREREJhckVPdOBAwewadMm/PTTT7h9+zakUimGDBkCDw+PGpOg9PR0DB06FI6OjrC1tUWXLl2QnZ2t2+CJiIiIiHSEyRU909WrV9G4cWP06NED7u7uKCkpgb+/P1avXl3jMb169ULbtm1x5MgRpKWlYfbs2bC2ttZh5EREREREulNP7ABIv40dOxabN28GoHxUr1mzZrh+/ToGDRpU43EffPABBg8ejCVLlqjaWrZsWec4YmNjsW7dOhw8eBDbt29HYmIifvvtN7V9/P39MXLkSMTExNS5HyIiIiKiumLlimr06aefYt68eWjSpAlu376N06dPP/MYuVyOvXv3onXr1hgwYABcXV0RHBxcpzFUCoUC7733HrZs2YJjx47Bz88PoaGhOHXqFK5evara7/z580hLS8Nbb71V6z6IiIiIiITA5Ipq5OjoCHt7e5ibm8Pd3R0uLi7PPObOnTsoLi7GokWLMHDgQPz3v//FiBEj8Oqrr+Lo0aMa911RUYHRo0cjMTERSUlJ8PHxAQC0b98e/v7++Oqrr1T7btu2DcHBwap9iIiIiIh0jckVCU4ulwMAhg0bhsjISAQEBGDWrFl45ZVXkJCQoPF5IiMj8dtvv+HXX3+Fp6en2mehoaGq5EqhUODrr79GaGiocBdBRERERFRLTK5IcM7OzqhXrx7atWun1u7r61ur2QL79++Pmzdv4uDBg5U+e/PNN5GRkYHU1FScOHECOTk5GDVq1HPHTkRERERUV5zQggRnaWmJLl26ICMjQ6390qVLaNasmcbnGTp0KIYMGYK33noL5ubmeOONN1SfNWnSBH369MG2bdvw8OFD9O/fH66uroJdAxERERFRbTG5olorLi7GlStXVO8zMzNx5swZNGzYEE2bNgUATJ8+HaNGjcI//vEP9OvXDwcOHMCePXtw5MiRWvU1YsQIbN26FWPGjEG9evXw2muvqT4LDQ1FbGwsysvL8cknnwhybUREREREdcXkimrt999/R79+/VTvo6KiAADh4eHYtGkTAGVSlJCQgLi4OEyePBlt2rTBt99+i169etW6v9deew1yuRxjxoyBmZkZXn31VVX7pEmTYG5ujuHDhz/3dRERERERPQ+JQqFQiB0EERERERGRoeOEFkRERERERAJgckU6t23bNtjZ2VX5at++vdjhERERERHVCR8LJJ0rKipCXl5elZ9ZWFjUakZBIiIiIiJ9weSKiIiIiIhIAHwskIiIiIiISABMroj02KZNm+Dk5CR2GERERESkASZXT5FIJDW+5syZI3aIZIDGjh1b5f3098WYiYiIiMiwcRHhp9y+fVv1844dOxATE4OMjAxVm52dnepnhUIBmUyGevX4a6RnGzhwIDZu3KjW5uLiIlI0RERERCQ0Vq6e4u7urno5OjpCIpGo3l+8eBH29vbYv38/AgMDYWVlhaSkJIwdOxbDhw9XO8/UqVPRt29f1Xu5XI64uDg0b94cNjY28Pf3x65du3R7cSQqKysrtfvL3d0dn376KTp27AhbW1t4eXlhwoQJKC4urvYcZ8+eRb9+/WBvbw8HBwcEBgbi999/V32elJSE3r17w8bGBl5eXpg8eTJKSkp0cXlEREREJo/JVR3MmjULixYtQnp6Ovz8/DQ6Ji4uDlu2bEFCQgLOnz+PyMhIjB49GkePHtVytKTPzMzMsGLFCpw/fx6bN2/Gzz//jBkzZlS7f2hoKJo0aYLTp08jJSUFs2bNgoWFBQDg6tWrGDhwIEaOHIm0tDTs2LEDSUlJmDRpkq4uh4iIiMik8Xm2Opg3bx769++v8f5lZWVYuHAhDh8+jO7duwMAWrRogaSkJKxduxZ9+vTRVqikR3766Se1x0oHDRqEb775RvXe29sb8+fPx7///W989tlnVZ4jOzsb06dPR9u2bQEArVq1Un0WFxeH0NBQTJ06VfXZihUr0KdPH6xZswbW1tZauCoiIiIieoLJVR0EBQXVav8rV66gtLS0UkJWXl6OTp06CRka6bF+/fphzZo1qve2trY4fPgw4uLicPHiRUilUlRUVODRo0coLS1F/fr1K50jKioK77zzDrZu3YqQkBD885//RMuWLQEoHxlMS0vDtm3bVPsrFArI5XJkZmbC19dX+xdJREREZMKYXNWBra2t2nszMzM8vRbz48ePVT8/GUOzd+9eeHp6qu1nZWWlpShJ39ja2sLHx0f1/vr163jllVcwfvx4LFiwAA0bNkRSUhLefvttlJeXV5lczZkzB2+99Rb27t2L/fv3IzY2Ftu3b8eIESNQXFyMd999F5MnT650XNOmTbV6bURERETE5EoQLi4u+PPPP9Xazpw5oxoL065dO1hZWSE7O5uPAJJKSkoK5HI5li1bBjMz5fDHnTt3PvO41q1bo3Xr1oiMjMSbb76JjRs3YsSIEejcuTMuXLiglsARERERke5wQgsBvPDCC/j999+xZcsWXL58GbGxsWrJlr29PaZNm4bIyEhs3rwZV69eRWpqKlauXInNmzeLGDmJycfHB48fP8bKlStx7do1bN26FQkJCdXu//DhQ0yaNAlHjhxBVlYWjh8/jtOnT6se95s5cyZOnDiBSZMm4cyZM7h8+TJ++OEHTmhBREREpCNMrgQwYMAAzJ49GzNmzECXLl1QVFSEsLAwtX0++ugjzJ49G3FxcfD19cXAgQOxd+9eNG/eXKSoSWz+/v5Yvnw5Fi9ejA4dOmDbtm2Ii4urdn9zc3Pcu3cPYWFhaN26NV5//XUMGjQIc+fOBQD4+fnh6NGjuHTpEnr37o1OnTohJiYGHh4eurokIiIiIpMmUTw9WIiIiIiIiIhqjZUrIiIiIiIiATC5IiIiIiIiEgCTKyIiIiIiIgEwuSIiIiIiIhIAkysiIiIiIiIBMLkiIiIiIiISAJMrAZSVlWHOnDkoKysTOxQyQry/iIiIiAwD17kSgFQqhaOjIwoLC+Hg4CB2OGRkeH8RERERGQZWroiIiIiIiATA5IqIiIiIiEgA9cQOwFDI5XLcunUL9vb2kEgkap9JpVK1LZGQnnV/KRQKFBUVwcPDA2Zm/L6EiIiISCwcc6WhGzduwMvLS+wwiKqVk5ODJk2aaLz/6tWrsXTpUuTm5sLf3x8rV65E165dq93/wYMH+OCDD7B7924UFBSgWbNmiI+Px+DBg4UIn4iIiMjgsXKlIXt7ewDKP2ArTSqQkwPExwNTpwJMwEjHpFIpvLy8VPeoJnbs2IGoqCgkJCQgODgY8fHxGDBgADIyMuDq6lpp//LycvTv3x+urq7YtWsXPD09kZWVBScnJwGvhIiIiMiwsXKloRpnbEtNBQIDgZQUoHNncQIkk1WX2QSDg4PRpUsXrFq1CoDysVcvLy+89957mDVrVqX9ExISsHTpUly8eBEWFhaCxk9ERERkLDhAoxplZWWQSqVqLyJ99vT9Wt26WOXl5UhJSUFISIiqzczMDCEhIUhOTq7ymB9//BHdu3fHxIkT4ebmhg4dOmDhwoWQyWRauRYiIiIiQ8TkqhpxcXFwdHRUvTjeivSdl5eX2j0bFxdX5X53796FTCaDm5u
      "text/plain": [
       "<Figure size 800x800 with 16 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "from skopt.plots import plot_objective\n",
    "\n",
    "plot_objective(res)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          fun: 0.10675194263458251\n",
       "            x: [True, True, 6, 2048]\n",
       "    func_vals: [ 1.373e-01  1.390e-01 ...  1.127e-01  1.138e-01]\n",
       "      x_iters: [[True, True, 5, 1300], [False, True, 5, 990], [True, True, 7, 1800], [False, False, 10, 1692], [False, True, 6, 1075], [True, False, 3, 291], [False, True, 3, 514], [False, False, 11, 1569], [False, False, 7, 1915], [False, True, 10, 1514], [False, False, 11, 1527], [False, False, 12, 2033], [False, True, 9, 3], [False, True, 1, 2004], [True, True, 12, 1], [False, False, 6, 2048], [False, False, 4, 2048], [False, False, 10, 1], [False, True, 11, 2048], [False, True, 9, 2048], [False, False, 8, 2017], [False, False, 6, 1], [False, True, 4, 1], [False, False, 6, 1587], [False, False, 9, 1056], [True, True, 12, 1450], [False, True, 6, 2048], [False, False, 6, 2048], [False, False, 6, 2048], [False, True, 6, 2048], [False, True, 6, 2048], [False, True, 5, 2048], [False, True, 6, 1464], [False, True, 8, 1], [True, True, 12, 1798], [True, False, 3, 2048], [True, True, 11, 683], [False, True, 11, 1], [True, True, 2, 1], [False, True, 11, 1238], [True, True, 11, 1260], [True, False, 6, 1295], [True, True, 6, 1292], [False, False, 12, 1250], [False, False, 12, 1200], [True, False, 4, 1250], [False, False, 12, 1191], [False, False, 12, 1180], [True, False, 10, 906], [False, False, 12, 1192], [True, True, 10, 2044], [False, False, 6, 1310], [False, False, 8, 1122], [True, False, 5, 4], [False, False, 7, 322], [False, False, 12, 1246], [False, False, 12, 1247], [False, False, 12, 1252], [True, True, 12, 811], [True, False, 6, 2048], [True, True, 12, 998], [False, True, 12, 1021], [False, True, 12, 1021], [False, True, 12, 1019], [True, False, 6, 759], [True, False, 6, 1064], [False, True, 12, 991], [True, True, 9, 533], [False, False, 11, 956], [False, False, 1, 3], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [False, False, 7, 986], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048]]\n",
       "       models: [GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n",
       "                                        n_restarts_optimizer=2, noise='gaussian',\n",
       "                                        normalize_y=True, random_state=1248744097)]\n",
       "        space: Space([Categorical(categories=(True, False), prior=None),\n",
       "                      Categorical(categories=(True, False), prior=None),\n",
       "                      Integer(low=1, high=12, prior='uniform', transform='normalize'),\n",
       "                      Integer(low=1, high=2048, prior='uniform', transform='normalize')])\n",
       " random_state: RandomState(MT19937)\n",
       "        specs:     args:                    func: <function objective at 0x7f46cd4f8e50>\n",
       "                                      dimensions: Space([Categorical(categories=(True, False), prior=None),\n",
       "                                                         Categorical(categories=(True, False), prior=None),\n",
       "                                                         Integer(low=1, high=12, prior='uniform', transform='normalize'),\n",
       "                                                         Integer(low=1, high=2048, prior='uniform', transform='normalize')])\n",
       "                                  base_estimator: GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5),\n",
       "                                                                           n_restarts_optimizer=2, noise='gaussian',\n",
       "                                                                           normalize_y=True, random_state=1248744097)\n",
       "                                         n_calls: 100\n",
       "                                 n_random_starts: None\n",
       "                                n_initial_points: 10\n",
       "                         initial_point_generator: random\n",
       "                                        acq_func: gp_hedge\n",
       "                                   acq_optimizer: auto\n",
       "                                              x0: None\n",
       "                                              y0: None\n",
       "                                    random_state: RandomState(MT19937)\n",
       "                                         verbose: False\n",
       "                                        callback: None\n",
       "                                        n_points: 10000\n",
       "                            n_restarts_optimizer: 5\n",
       "                                              xi: 0.01\n",
       "                                           kappa: 1.96\n",
       "                                          n_jobs: 1\n",
       "                                model_queue_size: None\n",
       "               function: base_minimize"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}