From c1760e043ae157aea9653d040cb73c8e61c8f6dc Mon Sep 17 00:00:00 2001
From: Dustin <h1ddenpr0cess2085@gmail.com>
Date: Tue, 30 Jul 2024 21:07:47 -0400
Subject: [PATCH] replaced litellm with ollama api, other improvements

---
 README.md     |  8 +++---
 config.json   | 28 ++++++++++++---------
 ollamarama.py | 67 ++++++++++++++++++++++++++++-----------------------
 3 files changed, 58 insertions(+), 45 deletions(-)

diff --git a/README.md b/README.md
index 079fc94..f49deb5 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # ollamarama-matrix
-Ollamarama is an AI chatbot for the [Matrix](https://matrix.org/) chat protocol using LiteLLM and Ollama. It can roleplay as almost anything you can think of. You can set any default personality you would like. It can be changed at any time, and each user has their own separate chat history with their chosen personality setting. Users can interact with each others chat histories for collaboration if they would like, but otherwise, conversations are separated, per channel, per user.
+Ollamarama is an AI chatbot for the [Matrix](https://matrix.org/) chat protocol using Ollama. It can roleplay as almost anything you can think of. You can set any default personality you would like. It can be changed at any time, and each user has their own separate chat history with their chosen personality setting. Users can interact with each others chat histories for collaboration if they would like, but otherwise, conversations are separated, per channel, per user.
 
 This is based on my earlier project, [infinigpt-matrix](https://github.com/h1ddenpr0cess20/infinigpt-matrix), which uses OpenAI and costs money to use.  (Now updated with OpenAI/Ollama model switching)
 
@@ -17,12 +17,12 @@ curl https://ollama.ai/install.sh | sh
 ```
 
 
-Once it's all set up, you'll need to [download the models](https://ollama.ai/library) you want to use.  You can play with the available ones and see what works best for you.  Add those to the config.json file.  If you want to use the ones I've included, just run ollama pull _modelname_ for each.  You can skip this part, and they should download when the model is switched, but the response will be delayed until it finishes downloading.
+Once it's all set up, you'll need to [download the models](https://ollama.ai/library) you want to use.  You can play with the available ones and see what works best for you.  Add those to the config.json file.  If you want to use the ones I've included, just run ollama pull _modelname_ for each.
 
 
-You'll also need to install matrix-nio and litellm
+You'll also need to install matrix-nio
 ```
-pip3 install matrix-nio litellm
+pip3 install matrix-nio
 ```
 
 Set up a [Matrix account](https://app.element.io/) for your bot.  You'll need the server, username and password.
diff --git a/config.json b/config.json
index 1a1c7c0..c32e02c 100644
--- a/config.json
+++ b/config.json
@@ -2,19 +2,19 @@
     {
         "models":
         {
-            "llama3": "ollama/llama3",
-            "wizardlm2": "ollama/wizardlm2",
-            "phi3": "ollama/phi3",
-            "zephyr": "ollama/zephyr",
-            "solar": "ollama/solar",
-            "mistral": "ollama/mistral",
-            "codellama": "ollama/codellama",
-            "dolphin-mistral": "ollama/dolphin-mistral",
-            "dolphin-llama3": "ollama/dolphin-llama3"
+            "llama3": "llama3:8b-instruct-q5_K_M",
+            "wizardlm2": "wizardlm2:7b-q5_K_M",
+            "phi3": "phi3:14b-medium-4k-instruct-q5_K_M",
+            "zephyr": "zephyr:7b-beta-q5_K_M",
+            "solar": "solar:10.7b-instruct-v1-q5_K_M",
+            "mistral": "mistral:7b-instruct-v0.2-q5_K_M",
+            "dolphin-mistral": "dolphin-mistral:7b-v2.8-q5_K_M",
+            "dolphin-llama3": "dolphin-llama3:8b-v2.9-q5_K_M",
+            "llama3.1": "llama3.1:8b-instruct-q5_K_M"
             
         },
         
-        "default_model": "llama3"
+        "default_model": "llama3.1"
 
     },
     {
@@ -39,6 +39,12 @@
 
     },
     {
-        "api_base": "http://localhost:11434"
+        "api_base": "http://localhost:11434",
+        "options":
+        {
+            "temperature": 0.8,
+            "top_p": 0.7,
+            "repeat_penalty": 1.2
+        }
     }
 ]
\ No newline at end of file
diff --git a/ollamarama.py b/ollamarama.py
index 7695a3a..1ecfb14 100644
--- a/ollamarama.py
+++ b/ollamarama.py
@@ -6,10 +6,10 @@ Date: December 2023
 """
 
 from nio import AsyncClient, MatrixRoom, RoomMessageText
-from litellm import completion
 import json
 import datetime
 import asyncio
+import requests
 
 class ollamarama:
     def __init__(self):
@@ -20,7 +20,7 @@ class ollamarama:
             f.close()
 
         self.server, self.username, self.password, self.channels, self.default_personality, self.admins = config[1].values()
-        self.api_base = config[2]['api_base']
+        self.api_url = config[2]['api_base'] + "/api/chat"
         self.personality = self.default_personality
 
         self.client = AsyncClient(self.server, self.username)
@@ -40,9 +40,13 @@ class ollamarama:
         self.model = self.default_model
 
         #no idea if optimal, change if necessary
-        self.temperature = .9
-        self.top_p = .7
-        self.repeat_penalty = 1.5
+        self.temperature, self.top_p, self.repeat_penalty = config[2]['options'].values()
+        self.defaults = {
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "repeat_penalty": self.repeat_penalty
+        }
+        
 
         #load help menu
         with open("help.txt", "r") as f:
@@ -92,26 +96,27 @@ class ollamarama:
     #generate Ollama model response
     async def respond(self, channel, sender, message, sender2=None):
         try:
-            #Generate response
-            response = completion(
-                api_base=self.api_base,
-                model=self.model,
-                temperature=self.temperature,
-                top_p=self.top_p,
-                repeat_penalty=self.repeat_penalty,
-                messages=message,
-                timeout=60
-                ) 
+            # #Generate response
+            data = {
+                "model": self.model, 
+                "messages": message, 
+                "stream": False,
+                "options": {
+                    "top_p": self.top_p,
+                    "temperature": self.temperature,
+                    "repeat_penalty": self.repeat_penalty
+                    }
+                }
+            response = requests.post(self.api_url, json=data)
+            response.raise_for_status()
+            data = response.json()
+            
         except Exception as e:
             await self.send_message(channel, "Something went wrong")
             print(e)
         else:
             #Extract response text
-            response_text = response.choices[0].message.content
-            
-            #check for unwanted quotation marks around response and remove them
-            if response_text.startswith('"') and response_text.endswith('"'):
-                response_text = response_text.strip('"')
+            response_text = data["message"]['content']
 
             #add to history
             await self.add_history("assistant", channel, sender, response_text)
@@ -129,7 +134,10 @@ class ollamarama:
                 print(e)
             #Shrink history list for token size management 
             if len(self.messages[channel][sender]) > 24:
-                del self.messages[channel][sender][1:3]  #delete the first set of question and answers 
+                if self.messages[channel][sender][0]['role'] == 'system':
+                    del self.messages[channel][sender][1:3]  #delete the first set of question and answers
+                else:
+                    del self.messages[channel][sender][0:2]
 
     # change the personality of the bot
     async def persona(self, channel, sender, persona):
@@ -224,17 +232,16 @@ class ollamarama:
                         if message == ".clear":
                             self.messages.clear()
                             self.model = self.default_model
-                            self.temperature = .9
-                            self.top_p = .7
-                            self.repeat_penalty = 1.5
+                            self.temperature, self.top_p, self.repeat_penalty = self.defaults
+
                             await self.send_message(room_id, "Bot has been reset for everyone")
 
                         if message.startswith((".temperature ", ".top_p ", ".repeat_penalty ")):
                             attr_name = message.split()[0][1:]
                             min_val, max_val, default_val = {
-                                "temperature": (0, 1, 0.9),
-                                "top_p": (0, 1, 0.7),
-                                "repeat_penalty": (0, 2, 1.5)
+                                "temperature": (0, 1, self.defaults['temperature']),
+                                "top_p": (0, 1, self.defaults['top_p']),
+                                "repeat_penalty": (0, 2, self.defaults['repeat_penalty'])
                             }[attr_name]
 
                             if message.endswith(" reset"):
@@ -256,7 +263,7 @@ class ollamarama:
                     if message != ".ai reset":
                         m = message.split(" ", 1)
                         try:
-                            m = m[1]#  + " [your response must be one paragraph or less]"
+                            m = m[1]
                             await self.add_history("user", room_id, sender, m)
                             await self.respond(room_id, sender, self.messages[room_id][sender])
                         except:
@@ -268,7 +275,7 @@ class ollamarama:
                     if len(m) > 1:
                         disp_name = m[0]
                         name_id = ""
-                        m = m[1]# + " [your response must be one paragraph or less]"
+                        m = m[1]
                         if room_id in self.messages:
                             for user in self.messages[room_id]:
                                 try:
@@ -284,7 +291,7 @@ class ollamarama:
                 #change personality    
                 if message.startswith(".persona "):
                     m = message.split(" ", 1)
-                    m = m[1]# + " [your response must be one paragraph or less]"
+                    m = m[1]
                 
                     await self.persona(room_id, sender, m)
                     await self.respond(room_id, sender, self.messages[room_id][sender])