Migrating to using FastAPI for exposing API to openwebui

OpenAI wip
Testing memories
2025-05-12 00:42:34 +02:00 · 2025-05-12 00:28:21 +02:00 · 2025-05-11 20:15:42 +02:00
4 changed files with 222 additions and 64 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
 prompt_toolkit
 langchain[ollama]
 langgraph
--- a/test_langgraph/main.py
+++ b/test_langgraph/main.py
@ -1,65 +1,29 @@
-import logging
+from fastapi import FastAPI
-
+from fastapi.middleware.cors import CORSMiddleware
-import prompt_toolkit
+from fastapi.responses import StreamingResponse
-import prompt_toolkit.auto_suggest
+from fastapi.encoders import jsonable_encoder
 import prompt_toolkit.history
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_ollama import ChatOllama
 from langgraph.prebuilt import create_react_agent
 logger = logging.getLogger(__name__)
 from . import tools
 cli_history = prompt_toolkit.history.FileHistory('output/cli_history.txt')
-# MODEL = "gemma3:27b"
+app = FastAPI()
 # MODEL = "qwen3:latest"
 MODEL = 'hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q4_K_M'
 origins = [
    "http://localhost.tiangolo.com",
    "https://localhost.tiangolo.com",
    "http://localhost",
    "http://localhost:8080",
 ]
-def create_model():
+app.add_middleware(
-    available_tools = tools.get_tools()
+    CORSMiddleware,
-    logger.info('Available tools:')
+    allow_origins=origins,
-    for tool in available_tools:
+    allow_credentials=True,
-        logger.info('- %s', tool.name)
+    allow_methods=["*"],
    allow_headers=["*"],
 )
-    llm = ChatOllama(model=MODEL)
+for tool in tools.get_tools():
-    llm.bind_tools(tools=available_tools)
+    component, method = tool.__name__.split('.')
-    return create_react_agent(llm, tools=available_tools)
+    path = f'/{component}/{method}'
-
+    app.get(path, response_model=None)(tool)
 SYSTEM_MESSAGE = """
 You are a useful assistant with access to built in system tools.
 Format responses as markdown.
 Provide links when available.
 """
 def main():
    logging.basicConfig(level='INFO')
    messages = [SystemMessage(SYSTEM_MESSAGE)]
    llm = create_model()
    prev_idx = 0
    while True:
        user_input = prompt_toolkit.prompt(
            'Human: ',
            history=cli_history,
            auto_suggest=prompt_toolkit.auto_suggest.AutoSuggestFromHistory(),
        )
        messages.append(HumanMessage(user_input))
        result = llm.invoke(
            {
                'messages': messages,
            },
        )
        messages = result['messages']
        for msg in messages[prev_idx:]:
            print(msg.pretty_repr())
            del msg
        prev_idx = len(messages)
 if __name__ == '__main__':
    main()
--- a/test_langgraph/main_openai_api.py
+++ b/test_langgraph/main_openai_api.py
@ -0,0 +1,173 @@
 import logging
 import json
 import prompt_toolkit
 import prompt_toolkit.auto_suggest
 import prompt_toolkit.history
 from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage
 from langchain_ollama import ChatOllama
 from langgraph.prebuilt import create_react_agent
 from langmem import create_memory_manager
 import dataclasses
 logger = logging.getLogger(__name__)
 from . import tools
 cli_history = prompt_toolkit.history.FileHistory('output/cli_history.txt')
 MODEL = 'hf.co/unsloth/Qwen3-30B-A3B-GGUF:Q4_K_M'
 def create_raw_model():
    return ChatOllama(model=MODEL)
 def create_model():
    available_tools = tools.get_tools()
    logger.info('Available tools:')
    for tool in available_tools:
        logger.info('- %s', tool.name)
    llm = create_raw_model()
    llm.bind_tools(tools=available_tools)
    return create_react_agent(llm, tools=available_tools)
 SYSTEM_MESSAGE = """
 You are a useful assistant with access to built in system tools.
 Format responses as markdown.
 Provide links when available.
 """
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse
 from fastapi.encoders import jsonable_encoder
 app = FastAPI()
 origins = [
    "http://localhost.tiangolo.com",
    "https://localhost.tiangolo.com",
    "http://localhost",
    "http://localhost:8080",
 ]
 app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
@dataclasses.dataclass(frozen=True)
 class OpenAIMessage:
    role: str
    content: str
@dataclasses.dataclass(frozen=True)
 class OpenAIRequest:
    model: str
    messages: list[OpenAIMessage]
    stream: bool
@dataclasses.dataclass(frozen=True)
 class OpenAIUsage:
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
@dataclasses.dataclass(frozen=True)
 class OpenAIMessageSeq:
    index: int
    message: OpenAIMessage
@dataclasses.dataclass(frozen=True)
 class OpenAIResponse:
    id: str
    object: str
    created: int
    model: str
    system_fingerprint: str
    choices: list[OpenAIMessageSeq]
    usage: OpenAIUsage
 memory_manager = create_memory_manager(
    create_raw_model(),
    instructions="Extract all noteworthy facts, events, and relationships. Indicate their importance.",
    enable_inserts=True,
 )
 llm = create_model()
 def invoke_model(messages_input: list[OpenAIMessage]):
    messages = [{'role': m.role, 'content': m.content} for m in messages_input]
    return llm.invoke(
        {
            'messages': messages,
        },
    )
@app.post('/v1/chat/completions')
 async def chat_completions(
        request: OpenAIRequest
 ) -> OpenAIResponse:
    print(request)
    def fjerp():
        derp = invoke_model(request.messages)['messages']
        choices = [OpenAIMessageSeq(idx,OpenAIMessage(m.type, m.content)) for idx,m in enumerate(derp)]
        return OpenAIResponse(
                id = 'test1',
                object='chat.completion',
                created=1746999397,
                model = request.model,
                system_fingerprint=request.model,
                choices=choices,
                usage = OpenAIUsage(0,0,0)
        )
    async def response_stream():
        yield json.dumps(jsonable_encoder(fjerp()))
    if request.stream:
        return StreamingResponse(response_stream())
    return fjerp()
@app.get('/v1/models')
 async def models():
    return {"object":"list","data":[
        {"id":"test_langgraph","object":"model","created":1746919302,"owned_by":"jmaa"},
    ]}
 def main_cli():
    messages = [SystemMessage(SYSTEM_MESSAGE)]
    prev_idx = 0
    while True:
        user_input = prompt_toolkit.prompt(
            'Human: ',
            history=cli_history,
            auto_suggest=prompt_toolkit.auto_suggest.AutoSuggestFromHistory(),
        )
        if user_input == '/memories':
            memories = memory_manager.invoke({"messages": messages})
            print(memories)
        else:
            messages.append(HumanMessage(user_input))
            result = invoke_model(messages)
            messages = result['messages']
            for msg in messages[prev_idx:]:
                print(msg.pretty_repr())
                del msg
            prev_idx = len(messages)
 def main_server():
    pass
 def main():
    logging.basicConfig(level='INFO')
    main_server()
--- a/test_langgraph/tools.py
+++ b/test_langgraph/tools.py
@ -12,6 +12,11 @@ try:
 except ImportError:
    pycountry = None
 try:
    import fin_defs
 except ImportError:
    fin_defs = None
 logger = logging.getLogger(__name__)
@ -26,6 +31,10 @@ def search(query: str):
 def dataclasses_to_json(data):
    if pycountry and isinstance(data, pycountry.db.Country):
        return data.alpha_2
    if fin_defs and isinstance(data, fin_defs.AssetAmount):
        return str(data)
    if fin_defs and isinstance(data, fin_defs.Asset):
        return data.raw_short_name()
    if isinstance(data, list | tuple):
        return [dataclasses_to_json(d) for d in data]
    if isinstance(data, dict):
@ -50,25 +59,29 @@ RETURN_FORMATS = {
 RETURN_FORMAT = 'json'
 MAX_TOOL_RESULT_LEN = 1000
 APPEND_RESULT_TYPE_DOCS = True
 def wrap_method(class_, method):
    logger.info('Wrapping %s.%s', class_.__name__, method.__name__)
-    is_iterator = str(method.__annotations__.get('return', '')).startswith(
+    return_type = method.__annotations__.get('return', '')
    is_iterator = str(return_type).startswith(
        'collections.abc.Iterator',
    )
    def wrapper(input_value):
        if isinstance(input_value, dict):
            logger.warning('Silently converting from dict to plain value!')
            input_value = next(input_value.values())
        logger.info(
            'AI called %s.%s(%s)', class_.__name__, method.__name__, repr(input_value),
        )
        try:
            if isinstance(input_value, dict):
                logger.warning('Silently converting from dict to plain value!')
                input_value = next(input_value.values())
            result = method(input_value)
            if is_iterator:
                result = list(result)
-            return RETURN_FORMATS[RETURN_FORMAT](result)
+            result_str: str = str(RETURN_FORMATS[RETURN_FORMAT](result))
            del result
        except:
            logger.exception(
                'AI invocation of %s.%s(%s) failed!',
@ -77,11 +90,16 @@ def wrap_method(class_, method):
                repr(input_value),
            )
            raise
        if len(result_str) > MAX_TOOL_RESULT_LEN:
            result_str = result_str[:MAX_TOOL_RESULT_LEN] + ' (remaining tool result elicited...)'
        if APPEND_RESULT_TYPE_DOCS and (return_docs := getattr(return_type, '__doc__', None)):
            result_str = result_str+'\n'+return_docs
        return result_str
    wrapper.__name__ = f'{class_.__name__}.{method.__name__}'
    wrapper.__doc__ = method.__doc__
    wrapper.__annotations__ = method.__annotations__
-    return tool(wrapper)
+    return wrapper
 def wrap_all_methods_on_client(obj):
Author	SHA1	Message	Date
Jon Michael Aanes	ab0531cf05	Migrating to using FastAPI for exposing API to openwebui Some checks failed Run Python tests (through Pytest) / Test (push) Failing after 23s Details Verify Python project can be installed, loaded and have version checked / Test (push) Failing after 21s Details	2025-05-12 00:42:34 +02:00
Jon Michael Aanes	20bfd588f6	OpenAI wip	2025-05-12 00:28:21 +02:00
Jon Michael Aanes	60ae842764	Testing memories	2025-05-11 20:15:42 +02:00