replaced litellm with ollama api, other improvements

This commit is contained in:
Dustin 2024-07-30 21:07:47 -04:00
parent a50e442de9
commit c1760e043a
3 changed files with 58 additions and 45 deletions

View File

@ -1,5 +1,5 @@
# ollamarama-matrix # ollamarama-matrix
Ollamarama is an AI chatbot for the [Matrix](https://matrix.org/) chat protocol using LiteLLM and Ollama. It can roleplay as almost anything you can think of. You can set any default personality you would like. It can be changed at any time, and each user has their own separate chat history with their chosen personality setting. Users can interact with each others chat histories for collaboration if they would like, but otherwise, conversations are separated, per channel, per user. Ollamarama is an AI chatbot for the [Matrix](https://matrix.org/) chat protocol using Ollama. It can roleplay as almost anything you can think of. You can set any default personality you would like. It can be changed at any time, and each user has their own separate chat history with their chosen personality setting. Users can interact with each others chat histories for collaboration if they would like, but otherwise, conversations are separated, per channel, per user.
This is based on my earlier project, [infinigpt-matrix](https://github.com/h1ddenpr0cess20/infinigpt-matrix), which uses OpenAI and costs money to use. (Now updated with OpenAI/Ollama model switching) This is based on my earlier project, [infinigpt-matrix](https://github.com/h1ddenpr0cess20/infinigpt-matrix), which uses OpenAI and costs money to use. (Now updated with OpenAI/Ollama model switching)
@ -17,12 +17,12 @@ curl https://ollama.ai/install.sh | sh
``` ```
Once it's all set up, you'll need to [download the models](https://ollama.ai/library) you want to use. You can play with the available ones and see what works best for you. Add those to the config.json file. If you want to use the ones I've included, just run ollama pull _modelname_ for each. You can skip this part, and they should download when the model is switched, but the response will be delayed until it finishes downloading. Once it's all set up, you'll need to [download the models](https://ollama.ai/library) you want to use. You can play with the available ones and see what works best for you. Add those to the config.json file. If you want to use the ones I've included, just run ollama pull _modelname_ for each.
You'll also need to install matrix-nio and litellm You'll also need to install matrix-nio
``` ```
pip3 install matrix-nio litellm pip3 install matrix-nio
``` ```
Set up a [Matrix account](https://app.element.io/) for your bot. You'll need the server, username and password. Set up a [Matrix account](https://app.element.io/) for your bot. You'll need the server, username and password.

View File

@ -2,19 +2,19 @@
{ {
"models": "models":
{ {
"llama3": "ollama/llama3", "llama3": "llama3:8b-instruct-q5_K_M",
"wizardlm2": "ollama/wizardlm2", "wizardlm2": "wizardlm2:7b-q5_K_M",
"phi3": "ollama/phi3", "phi3": "phi3:14b-medium-4k-instruct-q5_K_M",
"zephyr": "ollama/zephyr", "zephyr": "zephyr:7b-beta-q5_K_M",
"solar": "ollama/solar", "solar": "solar:10.7b-instruct-v1-q5_K_M",
"mistral": "ollama/mistral", "mistral": "mistral:7b-instruct-v0.2-q5_K_M",
"codellama": "ollama/codellama", "dolphin-mistral": "dolphin-mistral:7b-v2.8-q5_K_M",
"dolphin-mistral": "ollama/dolphin-mistral", "dolphin-llama3": "dolphin-llama3:8b-v2.9-q5_K_M",
"dolphin-llama3": "ollama/dolphin-llama3" "llama3.1": "llama3.1:8b-instruct-q5_K_M"
}, },
"default_model": "llama3" "default_model": "llama3.1"
}, },
{ {
@ -39,6 +39,12 @@
}, },
{ {
"api_base": "http://localhost:11434" "api_base": "http://localhost:11434",
"options":
{
"temperature": 0.8,
"top_p": 0.7,
"repeat_penalty": 1.2
}
} }
] ]

View File

@ -6,10 +6,10 @@ Date: December 2023
""" """
from nio import AsyncClient, MatrixRoom, RoomMessageText from nio import AsyncClient, MatrixRoom, RoomMessageText
from litellm import completion
import json import json
import datetime import datetime
import asyncio import asyncio
import requests
class ollamarama: class ollamarama:
def __init__(self): def __init__(self):
@ -20,7 +20,7 @@ class ollamarama:
f.close() f.close()
self.server, self.username, self.password, self.channels, self.default_personality, self.admins = config[1].values() self.server, self.username, self.password, self.channels, self.default_personality, self.admins = config[1].values()
self.api_base = config[2]['api_base'] self.api_url = config[2]['api_base'] + "/api/chat"
self.personality = self.default_personality self.personality = self.default_personality
self.client = AsyncClient(self.server, self.username) self.client = AsyncClient(self.server, self.username)
@ -40,9 +40,13 @@ class ollamarama:
self.model = self.default_model self.model = self.default_model
#no idea if optimal, change if necessary #no idea if optimal, change if necessary
self.temperature = .9 self.temperature, self.top_p, self.repeat_penalty = config[2]['options'].values()
self.top_p = .7 self.defaults = {
self.repeat_penalty = 1.5 "temperature": self.temperature,
"top_p": self.top_p,
"repeat_penalty": self.repeat_penalty
}
#load help menu #load help menu
with open("help.txt", "r") as f: with open("help.txt", "r") as f:
@ -92,26 +96,27 @@ class ollamarama:
#generate Ollama model response #generate Ollama model response
async def respond(self, channel, sender, message, sender2=None): async def respond(self, channel, sender, message, sender2=None):
try: try:
#Generate response # #Generate response
response = completion( data = {
api_base=self.api_base, "model": self.model,
model=self.model, "messages": message,
temperature=self.temperature, "stream": False,
top_p=self.top_p, "options": {
repeat_penalty=self.repeat_penalty, "top_p": self.top_p,
messages=message, "temperature": self.temperature,
timeout=60 "repeat_penalty": self.repeat_penalty
) }
}
response = requests.post(self.api_url, json=data)
response.raise_for_status()
data = response.json()
except Exception as e: except Exception as e:
await self.send_message(channel, "Something went wrong") await self.send_message(channel, "Something went wrong")
print(e) print(e)
else: else:
#Extract response text #Extract response text
response_text = response.choices[0].message.content response_text = data["message"]['content']
#check for unwanted quotation marks around response and remove them
if response_text.startswith('"') and response_text.endswith('"'):
response_text = response_text.strip('"')
#add to history #add to history
await self.add_history("assistant", channel, sender, response_text) await self.add_history("assistant", channel, sender, response_text)
@ -129,7 +134,10 @@ class ollamarama:
print(e) print(e)
#Shrink history list for token size management #Shrink history list for token size management
if len(self.messages[channel][sender]) > 24: if len(self.messages[channel][sender]) > 24:
del self.messages[channel][sender][1:3] #delete the first set of question and answers if self.messages[channel][sender][0]['role'] == 'system':
del self.messages[channel][sender][1:3] #delete the first set of question and answers
else:
del self.messages[channel][sender][0:2]
# change the personality of the bot # change the personality of the bot
async def persona(self, channel, sender, persona): async def persona(self, channel, sender, persona):
@ -224,17 +232,16 @@ class ollamarama:
if message == ".clear": if message == ".clear":
self.messages.clear() self.messages.clear()
self.model = self.default_model self.model = self.default_model
self.temperature = .9 self.temperature, self.top_p, self.repeat_penalty = self.defaults
self.top_p = .7
self.repeat_penalty = 1.5
await self.send_message(room_id, "Bot has been reset for everyone") await self.send_message(room_id, "Bot has been reset for everyone")
if message.startswith((".temperature ", ".top_p ", ".repeat_penalty ")): if message.startswith((".temperature ", ".top_p ", ".repeat_penalty ")):
attr_name = message.split()[0][1:] attr_name = message.split()[0][1:]
min_val, max_val, default_val = { min_val, max_val, default_val = {
"temperature": (0, 1, 0.9), "temperature": (0, 1, self.defaults['temperature']),
"top_p": (0, 1, 0.7), "top_p": (0, 1, self.defaults['top_p']),
"repeat_penalty": (0, 2, 1.5) "repeat_penalty": (0, 2, self.defaults['repeat_penalty'])
}[attr_name] }[attr_name]
if message.endswith(" reset"): if message.endswith(" reset"):
@ -256,7 +263,7 @@ class ollamarama:
if message != ".ai reset": if message != ".ai reset":
m = message.split(" ", 1) m = message.split(" ", 1)
try: try:
m = m[1]# + " [your response must be one paragraph or less]" m = m[1]
await self.add_history("user", room_id, sender, m) await self.add_history("user", room_id, sender, m)
await self.respond(room_id, sender, self.messages[room_id][sender]) await self.respond(room_id, sender, self.messages[room_id][sender])
except: except:
@ -268,7 +275,7 @@ class ollamarama:
if len(m) > 1: if len(m) > 1:
disp_name = m[0] disp_name = m[0]
name_id = "" name_id = ""
m = m[1]# + " [your response must be one paragraph or less]" m = m[1]
if room_id in self.messages: if room_id in self.messages:
for user in self.messages[room_id]: for user in self.messages[room_id]:
try: try:
@ -284,7 +291,7 @@ class ollamarama:
#change personality #change personality
if message.startswith(".persona "): if message.startswith(".persona "):
m = message.split(" ", 1) m = message.split(" ", 1)
m = m[1]# + " [your response must be one paragraph or less]" m = m[1]
await self.persona(room_id, sender, m) await self.persona(room_id, sender, m)
await self.respond(room_id, sender, self.messages[room_id][sender]) await self.respond(room_id, sender, self.messages[room_id][sender])