6 tests that instantiate ErasExpertNode directly (no HTTP, no pipeline). Assert SQL table selection, JOIN patterns, and response hygiene. 2 LLM calls per test vs 4+ for matrix — runs in ~22s total locally. Requires pymysql in venv and DB access (WireGuard or NodePort). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
129 lines
4.2 KiB
Python
129 lines
4.2 KiB
Python
"""Node-level tests for ErasExpertNode.
|
|
|
|
Tests the expert node directly — no HTTP, no pipeline, no session.
|
|
Instantiates ErasExpertNode, calls execute(), asserts on HUD events + ThoughtResult.
|
|
|
|
Two LLM calls per test (plan + response) vs 4+ for full matrix tests.
|
|
Runs against MariaDB directly (DB_HOST from .env — WireGuard on local, ClusterIP in K3s).
|
|
|
|
Usage:
|
|
python tests/run_tests.py node
|
|
python tests/run_tests.py node/umsatz_uses_artikelposition
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Load .env so DB_HOST, OPENROUTER_API_KEY etc. are set
|
|
from dotenv import load_dotenv
|
|
load_dotenv(Path(__file__).parent.parent / ".env")
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from agent.nodes.eras_expert import ErasExpertNode
|
|
|
|
|
|
def _run(job: str):
|
|
"""Instantiate expert, run job, return (result, hud_events)."""
|
|
events = []
|
|
|
|
async def hud(e):
|
|
events.append(e)
|
|
|
|
async def _exec():
|
|
node = ErasExpertNode(send_hud=hud)
|
|
return await node.execute(job, language="de"), events
|
|
|
|
return asyncio.run(_exec())
|
|
|
|
|
|
def _tool_calls(events: list) -> list[dict]:
|
|
return [e for e in events if e.get("event") == "tool_call"]
|
|
|
|
|
|
def _query_db_calls(events: list) -> list[str]:
|
|
"""Extract SQL strings from all query_db tool_call events."""
|
|
return [
|
|
e["args"]["query"]
|
|
for e in _tool_calls(events)
|
|
if e.get("tool") == "query_db" and "args" in e
|
|
]
|
|
|
|
|
|
# --- Tests ---
|
|
|
|
def test_umsatz_uses_artikelposition():
|
|
"""Umsatz query must use artikelposition, not geraeteverbraeuche."""
|
|
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
|
|
|
queries = _query_db_calls(events)
|
|
assert queries, "no query_db call made"
|
|
|
|
combined = " ".join(queries).lower()
|
|
assert "artikelposition" in combined, \
|
|
f"expected artikelposition in query, got: {queries[0][:300]}"
|
|
|
|
|
|
def test_umsatz_not_geraeteverbraeuche():
|
|
"""Umsatz query must not touch geraeteverbraeuche (consumption table)."""
|
|
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
|
|
|
queries = _query_db_calls(events)
|
|
combined = " ".join(queries).lower()
|
|
assert "geraeteverbraeuche" not in combined, \
|
|
f"used wrong table geraeteverbraeuche: {queries[0][:300]}"
|
|
|
|
|
|
def test_umsatz_has_result():
|
|
"""Umsatz query returns non-empty result and completes without errors."""
|
|
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
|
|
|
assert not result.errors, \
|
|
f"expert had errors: {result.errors}"
|
|
assert result.tool_output, "no tool output (query returned nothing)"
|
|
assert result.response, "no response text generated"
|
|
|
|
|
|
def test_kunden_count_uses_kunden_table():
|
|
"""Simple count query uses the kunden table."""
|
|
result, events = _run("Wie viele Kunden gibt es?")
|
|
|
|
queries = _query_db_calls(events)
|
|
assert queries, "no query_db call made"
|
|
|
|
combined = " ".join(queries).lower()
|
|
assert "kunden" in combined, f"expected kunden table: {queries}"
|
|
|
|
|
|
def test_objekte_joins_objektkunde():
|
|
"""Objekte-per-Kunde query uses the objektkunde junction table."""
|
|
result, events = _run("Welcher Kunde hat die meisten Objekte?")
|
|
|
|
queries = _query_db_calls(events)
|
|
assert queries, "no query_db call made"
|
|
|
|
combined = " ".join(queries).lower()
|
|
assert "objektkunde" in combined, \
|
|
f"expected objektkunde junction: {queries[0][:300] if queries else '(none)'}"
|
|
|
|
|
|
def test_no_sql_exposed_in_response():
|
|
"""Response text must not contain raw SQL (domain language only)."""
|
|
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
|
|
|
text = result.response.lower()
|
|
assert "select " not in text, f"SQL leaked into response: {result.response[:200]}"
|
|
assert "from kunden" not in text, f"table name leaked: {result.response[:200]}"
|
|
|
|
|
|
TESTS = {
|
|
"umsatz_uses_artikelposition": test_umsatz_uses_artikelposition,
|
|
"umsatz_not_geraeteverbraeuche": test_umsatz_not_geraeteverbraeuche,
|
|
"umsatz_has_result": test_umsatz_has_result,
|
|
"kunden_count_uses_kunden_table": test_kunden_count_uses_kunden_table,
|
|
"objekte_joins_objektkunde": test_objekte_joins_objektkunde,
|
|
"no_sql_exposed_in_response": test_no_sql_exposed_in_response,
|
|
}
|