Add node-level test suite for ErasExpertNode

6 tests that instantiate ErasExpertNode directly (no HTTP, no pipeline).
Assert SQL table selection, JOIN patterns, and response hygiene.
2 LLM calls per test vs 4+ for matrix — runs in ~22s total locally.
Requires pymysql in venv and DB access (WireGuard or NodePort).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Nico 2026-04-03 22:12:01 +02:00
parent 269c5ccf8c
commit b35610cf6f
2 changed files with 136 additions and 0 deletions

View File

@ -127,9 +127,17 @@ def get_testcase_tests() -> dict:
return get_testcase_tests() return get_testcase_tests()
def get_node_tests() -> dict:
"""Load node-level tests (direct node instantiation, real LLM + DB, no HTTP)."""
sys.path.insert(0, os.path.dirname(__file__))
from test_node_eras import TESTS
return TESTS
SUITES = { SUITES = {
'engine': get_engine_tests, 'engine': get_engine_tests,
'api': get_api_tests, 'api': get_api_tests,
'node': get_node_tests,
'matrix': get_matrix_tests, 'matrix': get_matrix_tests,
'testcases': get_testcase_tests, 'testcases': get_testcase_tests,
'roundtrip': get_roundtrip_tests, 'roundtrip': get_roundtrip_tests,

128
tests/test_node_eras.py Normal file
View File

@ -0,0 +1,128 @@
"""Node-level tests for ErasExpertNode.
Tests the expert node directly no HTTP, no pipeline, no session.
Instantiates ErasExpertNode, calls execute(), asserts on HUD events + ThoughtResult.
Two LLM calls per test (plan + response) vs 4+ for full matrix tests.
Runs against MariaDB directly (DB_HOST from .env WireGuard on local, ClusterIP in K3s).
Usage:
python tests/run_tests.py node
python tests/run_tests.py node/umsatz_uses_artikelposition
"""
import asyncio
import os
import sys
from pathlib import Path
# Load .env so DB_HOST, OPENROUTER_API_KEY etc. are set
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent.parent / ".env")
sys.path.insert(0, str(Path(__file__).parent.parent))
from agent.nodes.eras_expert import ErasExpertNode
def _run(job: str):
"""Instantiate expert, run job, return (result, hud_events)."""
events = []
async def hud(e):
events.append(e)
async def _exec():
node = ErasExpertNode(send_hud=hud)
return await node.execute(job, language="de"), events
return asyncio.run(_exec())
def _tool_calls(events: list) -> list[dict]:
return [e for e in events if e.get("event") == "tool_call"]
def _query_db_calls(events: list) -> list[str]:
"""Extract SQL strings from all query_db tool_call events."""
return [
e["args"]["query"]
for e in _tool_calls(events)
if e.get("tool") == "query_db" and "args" in e
]
# --- Tests ---
def test_umsatz_uses_artikelposition():
"""Umsatz query must use artikelposition, not geraeteverbraeuche."""
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
queries = _query_db_calls(events)
assert queries, "no query_db call made"
combined = " ".join(queries).lower()
assert "artikelposition" in combined, \
f"expected artikelposition in query, got: {queries[0][:300]}"
def test_umsatz_not_geraeteverbraeuche():
"""Umsatz query must not touch geraeteverbraeuche (consumption table)."""
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
queries = _query_db_calls(events)
combined = " ".join(queries).lower()
assert "geraeteverbraeuche" not in combined, \
f"used wrong table geraeteverbraeuche: {queries[0][:300]}"
def test_umsatz_has_result():
"""Umsatz query returns non-empty result and completes without errors."""
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
assert not result.errors, \
f"expert had errors: {result.errors}"
assert result.tool_output, "no tool output (query returned nothing)"
assert result.response, "no response text generated"
def test_kunden_count_uses_kunden_table():
"""Simple count query uses the kunden table."""
result, events = _run("Wie viele Kunden gibt es?")
queries = _query_db_calls(events)
assert queries, "no query_db call made"
combined = " ".join(queries).lower()
assert "kunden" in combined, f"expected kunden table: {queries}"
def test_objekte_joins_objektkunde():
"""Objekte-per-Kunde query uses the objektkunde junction table."""
result, events = _run("Welcher Kunde hat die meisten Objekte?")
queries = _query_db_calls(events)
assert queries, "no query_db call made"
combined = " ".join(queries).lower()
assert "objektkunde" in combined, \
f"expected objektkunde junction: {queries[0][:300] if queries else '(none)'}"
def test_no_sql_exposed_in_response():
"""Response text must not contain raw SQL (domain language only)."""
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
text = result.response.lower()
assert "select " not in text, f"SQL leaked into response: {result.response[:200]}"
assert "from kunden" not in text, f"table name leaked: {result.response[:200]}"
TESTS = {
"umsatz_uses_artikelposition": test_umsatz_uses_artikelposition,
"umsatz_not_geraeteverbraeuche": test_umsatz_not_geraeteverbraeuche,
"umsatz_has_result": test_umsatz_has_result,
"kunden_count_uses_kunden_table": test_kunden_count_uses_kunden_table,
"objekte_joins_objektkunde": test_objekte_joins_objektkunde,
"no_sql_exposed_in_response": test_no_sql_exposed_in_response,
}