I was playing around with Ollama and getting streaming responses to work, made a custom interface using tailwind and alpinejs, served from node-red using a websocket, libraries are coming from cdn's but could be included locally too, but example flow became too large.
Simple flow, setup the ollama server address and model (make sure that ollama is accessible outside localhost), inject it once (will set a flow variable).
Open a browser window and point it to /ollama_chat
and hopefully you can chat with your ollama llm.
Type anything and click send or use ctrl+enter to submit it.
Currently the interaction is based on llama2, will need some modifications when interacting with other models.
The ollama chat
function will install the ollama npm package as dependency.
Using node-red for this does not directly make sense, but that's not the point :')
It types/streams responses
Flow:
[{"id":"60fdaddad83dd177","type":"tab","label":"ollama_chat","disabled":false,"info":"","env":[]},{"id":"0a050a4b1676969e","type":"function","z":"60fdaddad83dd177","name":"ollama chat","func":"const server = flow.get(\"ollama_server\")\nconst {res,req} = msg\nconst o = new ollama.Ollama({ host: `${server.host}:${server.port}` })\nconst message = { role: 'user', content: msg.payload }\nconst response = await o.chat({ model: server.model, messages: [message], stream: true })\n// @ts-ignore\nfor await (const part of response) {\n node.send({payload:part,req,res})\n}\n\nreturn null\n","outputs":1,"timeout":0,"noerr":0,"initialize":"","finalize":"","libs":[{"var":"ollama","module":"ollama"}],"x":350,"y":180,"wires":[["88b9ffdb65494f2c"]]},{"id":"d934559fa2d213d0","type":"inject","z":"60fdaddad83dd177","name":"","props":[{"p":"payload"},{"p":"topic","vt":"str"}],"repeat":"","crontab":"","once":true,"onceDelay":0.1,"topic":"","payload":"","payloadType":"date","x":125,"y":60,"wires":[["dd8ebd3976b2de10"]],"l":false},{"id":"dd8ebd3976b2de10","type":"template","z":"60fdaddad83dd177","name":"ollama server address","field":"ollama_server","fieldType":"flow","format":"yaml","syntax":"mustache","template":"host: 10.0.0.191\nport: 11434\nmodel: 'llama2'","output":"yaml","x":260,"y":60,"wires":[[]]},{"id":"d4c0b223cd7fdbc3","type":"websocket in","z":"60fdaddad83dd177","name":"","server":"356c7f041a66096b","client":"","x":180,"y":180,"wires":[["0a050a4b1676969e"]]},{"id":"88b9ffdb65494f2c","type":"websocket out","z":"60fdaddad83dd177","name":"","server":"356c7f041a66096b","client":"","x":520,"y":180,"wires":[]},{"id":"9f98aa5af9412608","type":"http in","z":"60fdaddad83dd177","name":"","url":"/ollama_chat","method":"get","upload":false,"swaggerDoc":"","x":190,"y":120,"wires":[["6f1ae63ef4fd8653"]]},{"id":"6f1ae63ef4fd8653","type":"template","z":"60fdaddad83dd177","name":"html","field":"payload","fieldType":"msg","format":"handlebars","syntax":"mustache","template":"<!DOCTYPE html>\n<html lang=\"en\">\n <head>\n <meta charset=\"UTF-8\" />\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />\n <title>Ollama chat</title>\n <script src=\"https://cdnjs.cloudflare.com/ajax/libs/alpinejs/3.14.0/cdn.min.js\" defer></script>\n <script src=\"https://unpkg.com/@vimesh/style\"></script>\n <script src=\"https://cdn.jsdelivr.net/npm/marked/marked.min.js\"></script>\n <style>\n pre {\n margin: 24px 0px !important;\n background-color: #475569;\n color: #fff;\n padding: 12px;\n border-radius: 3px;\n cursor: pointer;\n }\n\n ul,\n ol {\n list-style-type: decimal;\n list-style-position: inside;\n margin-left: 8px;\n margin-top: 8px;\n margin-bottom: 8px;\n }\n ol li {\n margin-top: 8px;\n }\n ul ul,\n ol ul {\n list-style-type: circle;\n list-style-position: inside;\n margin-left: 15px;\n }\n ol ol,\n ul ol {\n list-style-type: lower-latin;\n list-style-position: inside;\n margin-left: 15px;\n }\n p {\n margin-top: 8px;\n margin-bottom: 8px;\n }\n </style>\n </head>\n <body class=\"font-sans bg-zinc-300\">\n <div class=\"flex-1 justify-between flex flex-col h-screen\" x-data=\"load()\">\n <div id=\"messages\" class=\"flex flex-col p-4 overflow-y-auto scrollbar-thumb-blue scrollbar-thumb-rounded scrollbar-track-blue-lighter scrollbar-w-2 scrolling-touch\">\n <template x-for=\"r in responses\">\n <div class=\"space-y-4\">\n <template x-if=\"r.q\">\n <div class=\"chat-message mb-4 mt-4 cursor-pointer\" @click=\"document.querySelector('#chat_input').value = r.q\">\n <div class=\"flex items-end justify-end\">\n <div class=\"flex flex-col space-y-2 text-sm max-w-xs mx-2 order-1 items-end\">\n <div><span class=\"px-4 py-2 rounded-lg inline-block bg-indigo-500 text-white\" x-text=\"r.q\"></span></div>\n </div>\n </div>\n </div>\n </template>\n <div class=\"chat-message mt-2\">\n <div class=\"flex items-end\">\n <div class=\"flex flex-col text-sm max-w-3xl mx-2 order-2 items-start\">\n <div><span class=\"px-4 py-2 rounded-lg inline-block bg-gray-100 text-gray-700\" :class=\"r.response.length>0 ? '' : 'hidden'\" x-html=\"r.response.length>0 ? marked.parse(r.response) : '' \"></span></div>\n </div>\n </div>\n </div>\n </div>\n </template>\n <div class=\"chat-message\">\n <div class=\"flex items-end\">\n <div class=\"flex flex-col space-y-2 text-sm max-w-3xl mx-2 order-2 items-start\">\n <div>\n <template x-if=\"waiting && resp.length ==0\">\n <span class=\"px-4 py-2 rounded-lg inline-block bg-gray-100 text-gray-700 animate-pulse\" x-text=\"waiting && resp.length ==0 ? '...' : ''\"></span>\n </template>\n <span class=\"px-4 py-2 rounded-lg inline-block bg-gray-100 text-gray-700\" x-html=\"marked.parse(resp)\" :class=\"resp.length== 0 ? 'hidden':''\"></span>\n </div>\n </div>\n </div>\n </div>\n </div>\n <div class=\"bg-white border-t border-gray-300 p-4 mb-2 sm:mb-0\">\n <div class=\"relative flex items-start\">\n <textarea id=\"chat_input\" @keyup.ctrl.enter=\"chat()\" rows=\"4\" type=\"text\" placeholder=\"Ask a question\" class=\"max-h-[80px] w-full overflow-y-auto focus:outline-none focus:border-indigo-500 focus:placeholder-gray-500 text-gray-600 placeholder-gray-400 px-4 py-3 bg-gray-100 rounded-md border-2 border-gray-300\"></textarea>\n <div class=\"right-4 items-center inset-y-0 hidden sm:flex\">\n <button type=\"button\" class=\"inline-flex items-center justify-center rounded-lg px-3 py-2 ml-2 transition duration-500 ease-in-out text-white bg-indigo-500 hover:bg-indigo-400 focus:outline-none\" @click=\"chat()\">\n <span class=\"font-semibold text-sm\">Send</span>\n </button>\n </div>\n </div>\n </div>\n </div>\n\n <script>\n const el = document.getElementById(\"messages\")\n const data = []\n const responses = []\n\n let resp\n let code_elements\n let waiting = false\n function load() {\n return {\n init() {\n this.responses = responses\n\n this.resp = \"\"\n this.ws = new WebSocket(location.origin.replace(/^http/, 'ws') + '/ws/ollama')\n this.ws.onopen = (event) => {\n this.responses.push({ q: false, response: \"Ready for input...\" })\n }\n this.ws.onerror = (event) => {\n this.responses.push({ q: false, response: \"Error connecting to ollama.\" })\n }\n\n this.ws.onmessage = (event) => {\n const input = JSON.parse(event.data)\n el.scrollTop = el.scrollHeight\n this.waiting = true\n this.resp += input.message.content\n if (input.done) {\n const responseIndex = this.responses.length - 1\n this.responses[responseIndex].response = this.resp\n this.resp = \"\"\n this.waiting = false\n el.scrollTop = el.scrollHeight\n }\n }\n },\n chat() {\n const q = document.getElementById(\"chat_input\").value\n this.responses.push({ q, response: false })\n\n this.ws.send(q)\n this.waiting = true\n document.getElementById(\"chat_input\").value = \"\"\n setTimeout(() => {\n el.scrollTop = el.scrollHeight\n }, 100)\n },\n }\n }\n </script>\n </body>\n</html>\n","output":"str","x":370,"y":120,"wires":[["c08e76930daefda7"]]},{"id":"c08e76930daefda7","type":"http response","z":"60fdaddad83dd177","name":"","statusCode":"","headers":{},"x":510,"y":120,"wires":[]},{"id":"356c7f041a66096b","type":"websocket-listener","path":"/ws/ollama","wholemsg":"false"}]