Compare commits

..

3 Commits

Author SHA1 Message Date
ab0531cf05 Migrating to using FastAPI for exposing API to openwebui
Some checks failed
Run Python tests (through Pytest) / Test (push) Failing after 23s
Verify Python project can be installed, loaded and have version checked / Test (push) Failing after 21s
2025-05-12 00:42:34 +02:00
20bfd588f6 OpenAI wip 2025-05-12 00:28:21 +02:00
60ae842764 Testing memories 2025-05-11 20:15:42 +02:00
4 changed files with 222 additions and 64 deletions

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
prompt_toolkit
langchain[ollama]
langgraph

View File

@ -1,65 +1,29 @@
import logging from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
import prompt_toolkit from fastapi.responses import StreamingResponse
import prompt_toolkit.auto_suggest from fastapi.encoders import jsonable_encoder
import prompt_toolkit.history
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_ollama import ChatOllama
from langgraph.prebuilt import create_react_agent
logger = logging.getLogger(__name__)
from . import tools from . import tools
cli_history = prompt_toolkit.history.FileHistory('output/cli_history.txt')
# MODEL = "gemma3:27b" app = FastAPI()
# MODEL = "qwen3:latest"
MODEL = 'hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q4_K_M'
origins = [
"http://localhost.tiangolo.com",
"https://localhost.tiangolo.com",
"http://localhost",
"http://localhost:8080",
]
def create_model(): app.add_middleware(
available_tools = tools.get_tools() CORSMiddleware,
logger.info('Available tools:') allow_origins=origins,
for tool in available_tools: allow_credentials=True,
logger.info('- %s', tool.name) allow_methods=["*"],
allow_headers=["*"],
)
llm = ChatOllama(model=MODEL) for tool in tools.get_tools():
llm.bind_tools(tools=available_tools) component, method = tool.__name__.split('.')
return create_react_agent(llm, tools=available_tools) path = f'/{component}/{method}'
app.get(path, response_model=None)(tool)
SYSTEM_MESSAGE = """
You are a useful assistant with access to built in system tools.
Format responses as markdown.
Provide links when available.
"""
def main():
logging.basicConfig(level='INFO')
messages = [SystemMessage(SYSTEM_MESSAGE)]
llm = create_model()
prev_idx = 0
while True:
user_input = prompt_toolkit.prompt(
'Human: ',
history=cli_history,
auto_suggest=prompt_toolkit.auto_suggest.AutoSuggestFromHistory(),
)
messages.append(HumanMessage(user_input))
result = llm.invoke(
{
'messages': messages,
},
)
messages = result['messages']
for msg in messages[prev_idx:]:
print(msg.pretty_repr())
del msg
prev_idx = len(messages)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,173 @@
import logging
import json
import prompt_toolkit
import prompt_toolkit.auto_suggest
import prompt_toolkit.history
from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage
from langchain_ollama import ChatOllama
from langgraph.prebuilt import create_react_agent
from langmem import create_memory_manager
import dataclasses
logger = logging.getLogger(__name__)
from . import tools
cli_history = prompt_toolkit.history.FileHistory('output/cli_history.txt')
MODEL = 'hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q4_K_M'
def create_raw_model():
return ChatOllama(model=MODEL)
def create_model():
available_tools = tools.get_tools()
logger.info('Available tools:')
for tool in available_tools:
logger.info('- %s', tool.name)
llm = create_raw_model()
llm.bind_tools(tools=available_tools)
return create_react_agent(llm, tools=available_tools)
SYSTEM_MESSAGE = """
You are a useful assistant with access to built in system tools.
Format responses as markdown.
Provide links when available.
"""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from fastapi.encoders import jsonable_encoder
app = FastAPI()
origins = [
"http://localhost.tiangolo.com",
"https://localhost.tiangolo.com",
"http://localhost",
"http://localhost:8080",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@dataclasses.dataclass(frozen=True)
class OpenAIMessage:
role: str
content: str
@dataclasses.dataclass(frozen=True)
class OpenAIRequest:
model: str
messages: list[OpenAIMessage]
stream: bool
@dataclasses.dataclass(frozen=True)
class OpenAIUsage:
prompt_tokens: int
completion_tokens: int
total_tokens: int
@dataclasses.dataclass(frozen=True)
class OpenAIMessageSeq:
index: int
message: OpenAIMessage
@dataclasses.dataclass(frozen=True)
class OpenAIResponse:
id: str
object: str
created: int
model: str
system_fingerprint: str
choices: list[OpenAIMessageSeq]
usage: OpenAIUsage
memory_manager = create_memory_manager(
create_raw_model(),
instructions="Extract all noteworthy facts, events, and relationships. Indicate their importance.",
enable_inserts=True,
)
llm = create_model()
def invoke_model(messages_input: list[OpenAIMessage]):
messages = [{'role': m.role, 'content': m.content} for m in messages_input]
return llm.invoke(
{
'messages': messages,
},
)
@app.post('/v1/chat/completions')
async def chat_completions(
request: OpenAIRequest
) -> OpenAIResponse:
print(request)
def fjerp():
derp = invoke_model(request.messages)['messages']
choices = [OpenAIMessageSeq(idx,OpenAIMessage(m.type, m.content)) for idx,m in enumerate(derp)]
return OpenAIResponse(
id = 'test1',
object='chat.completion',
created=1746999397,
model = request.model,
system_fingerprint=request.model,
choices=choices,
usage = OpenAIUsage(0,0,0)
)
async def response_stream():
yield json.dumps(jsonable_encoder(fjerp()))
if request.stream:
return StreamingResponse(response_stream())
return fjerp()
@app.get('/v1/models')
async def models():
return {"object":"list","data":[
{"id":"test_langgraph","object":"model","created":1746919302,"owned_by":"jmaa"},
]}
def main_cli():
messages = [SystemMessage(SYSTEM_MESSAGE)]
prev_idx = 0
while True:
user_input = prompt_toolkit.prompt(
'Human: ',
history=cli_history,
auto_suggest=prompt_toolkit.auto_suggest.AutoSuggestFromHistory(),
)
if user_input == '/memories':
memories = memory_manager.invoke({"messages": messages})
print(memories)
else:
messages.append(HumanMessage(user_input))
result = invoke_model(messages)
messages = result['messages']
for msg in messages[prev_idx:]:
print(msg.pretty_repr())
del msg
prev_idx = len(messages)
def main_server():
pass
def main():
logging.basicConfig(level='INFO')
main_server()

View File

@ -12,6 +12,11 @@ try:
except ImportError: except ImportError:
pycountry = None pycountry = None
try:
import fin_defs
except ImportError:
fin_defs = None
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -26,6 +31,10 @@ def search(query: str):
def dataclasses_to_json(data): def dataclasses_to_json(data):
if pycountry and isinstance(data, pycountry.db.Country): if pycountry and isinstance(data, pycountry.db.Country):
return data.alpha_2 return data.alpha_2
if fin_defs and isinstance(data, fin_defs.AssetAmount):
return str(data)
if fin_defs and isinstance(data, fin_defs.Asset):
return data.raw_short_name()
if isinstance(data, list | tuple): if isinstance(data, list | tuple):
return [dataclasses_to_json(d) for d in data] return [dataclasses_to_json(d) for d in data]
if isinstance(data, dict): if isinstance(data, dict):
@ -50,25 +59,29 @@ RETURN_FORMATS = {
RETURN_FORMAT = 'json' RETURN_FORMAT = 'json'
MAX_TOOL_RESULT_LEN = 1000
APPEND_RESULT_TYPE_DOCS = True
def wrap_method(class_, method): def wrap_method(class_, method):
logger.info('Wrapping %s.%s', class_.__name__, method.__name__) logger.info('Wrapping %s.%s', class_.__name__, method.__name__)
is_iterator = str(method.__annotations__.get('return', '')).startswith( return_type = method.__annotations__.get('return', '')
is_iterator = str(return_type).startswith(
'collections.abc.Iterator', 'collections.abc.Iterator',
) )
def wrapper(input_value): def wrapper(input_value):
if isinstance(input_value, dict):
logger.warning('Silently converting from dict to plain value!')
input_value = next(input_value.values())
logger.info( logger.info(
'AI called %s.%s(%s)', class_.__name__, method.__name__, repr(input_value), 'AI called %s.%s(%s)', class_.__name__, method.__name__, repr(input_value),
) )
try: try:
if isinstance(input_value, dict):
logger.warning('Silently converting from dict to plain value!')
input_value = next(input_value.values())
result = method(input_value) result = method(input_value)
if is_iterator: if is_iterator:
result = list(result) result = list(result)
return RETURN_FORMATS[RETURN_FORMAT](result) result_str: str = str(RETURN_FORMATS[RETURN_FORMAT](result))
del result
except: except:
logger.exception( logger.exception(
'AI invocation of %s.%s(%s) failed!', 'AI invocation of %s.%s(%s) failed!',
@ -77,11 +90,16 @@ def wrap_method(class_, method):
repr(input_value), repr(input_value),
) )
raise raise
if len(result_str) > MAX_TOOL_RESULT_LEN:
result_str = result_str[:MAX_TOOL_RESULT_LEN] + ' (remaining tool result elicited...)'
if APPEND_RESULT_TYPE_DOCS and (return_docs := getattr(return_type, '__doc__', None)):
result_str = result_str+'\n'+return_docs
return result_str
wrapper.__name__ = f'{class_.__name__}.{method.__name__}' wrapper.__name__ = f'{class_.__name__}.{method.__name__}'
wrapper.__doc__ = method.__doc__ wrapper.__doc__ = method.__doc__
wrapper.__annotations__ = method.__annotations__ wrapper.__annotations__ = method.__annotations__
return tool(wrapper) return wrapper
def wrap_all_methods_on_client(obj): def wrap_all_methods_on_client(obj):