Add node-level test suite for ErasExpertNode
6 tests that instantiate ErasExpertNode directly (no HTTP, no pipeline). Assert SQL table selection, JOIN patterns, and response hygiene. 2 LLM calls per test vs 4+ for matrix — runs in ~22s total locally. Requires pymysql in venv and DB access (WireGuard or NodePort). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
269c5ccf8c
commit
b35610cf6f
@ -127,9 +127,17 @@ def get_testcase_tests() -> dict:
|
||||
return get_testcase_tests()
|
||||
|
||||
|
||||
def get_node_tests() -> dict:
|
||||
"""Load node-level tests (direct node instantiation, real LLM + DB, no HTTP)."""
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from test_node_eras import TESTS
|
||||
return TESTS
|
||||
|
||||
|
||||
SUITES = {
|
||||
'engine': get_engine_tests,
|
||||
'api': get_api_tests,
|
||||
'node': get_node_tests,
|
||||
'matrix': get_matrix_tests,
|
||||
'testcases': get_testcase_tests,
|
||||
'roundtrip': get_roundtrip_tests,
|
||||
|
||||
128
tests/test_node_eras.py
Normal file
128
tests/test_node_eras.py
Normal file
@ -0,0 +1,128 @@
|
||||
"""Node-level tests for ErasExpertNode.
|
||||
|
||||
Tests the expert node directly — no HTTP, no pipeline, no session.
|
||||
Instantiates ErasExpertNode, calls execute(), asserts on HUD events + ThoughtResult.
|
||||
|
||||
Two LLM calls per test (plan + response) vs 4+ for full matrix tests.
|
||||
Runs against MariaDB directly (DB_HOST from .env — WireGuard on local, ClusterIP in K3s).
|
||||
|
||||
Usage:
|
||||
python tests/run_tests.py node
|
||||
python tests/run_tests.py node/umsatz_uses_artikelposition
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Load .env so DB_HOST, OPENROUTER_API_KEY etc. are set
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path(__file__).parent.parent / ".env")
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from agent.nodes.eras_expert import ErasExpertNode
|
||||
|
||||
|
||||
def _run(job: str):
|
||||
"""Instantiate expert, run job, return (result, hud_events)."""
|
||||
events = []
|
||||
|
||||
async def hud(e):
|
||||
events.append(e)
|
||||
|
||||
async def _exec():
|
||||
node = ErasExpertNode(send_hud=hud)
|
||||
return await node.execute(job, language="de"), events
|
||||
|
||||
return asyncio.run(_exec())
|
||||
|
||||
|
||||
def _tool_calls(events: list) -> list[dict]:
|
||||
return [e for e in events if e.get("event") == "tool_call"]
|
||||
|
||||
|
||||
def _query_db_calls(events: list) -> list[str]:
|
||||
"""Extract SQL strings from all query_db tool_call events."""
|
||||
return [
|
||||
e["args"]["query"]
|
||||
for e in _tool_calls(events)
|
||||
if e.get("tool") == "query_db" and "args" in e
|
||||
]
|
||||
|
||||
|
||||
# --- Tests ---
|
||||
|
||||
def test_umsatz_uses_artikelposition():
|
||||
"""Umsatz query must use artikelposition, not geraeteverbraeuche."""
|
||||
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
||||
|
||||
queries = _query_db_calls(events)
|
||||
assert queries, "no query_db call made"
|
||||
|
||||
combined = " ".join(queries).lower()
|
||||
assert "artikelposition" in combined, \
|
||||
f"expected artikelposition in query, got: {queries[0][:300]}"
|
||||
|
||||
|
||||
def test_umsatz_not_geraeteverbraeuche():
|
||||
"""Umsatz query must not touch geraeteverbraeuche (consumption table)."""
|
||||
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
||||
|
||||
queries = _query_db_calls(events)
|
||||
combined = " ".join(queries).lower()
|
||||
assert "geraeteverbraeuche" not in combined, \
|
||||
f"used wrong table geraeteverbraeuche: {queries[0][:300]}"
|
||||
|
||||
|
||||
def test_umsatz_has_result():
|
||||
"""Umsatz query returns non-empty result and completes without errors."""
|
||||
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
||||
|
||||
assert not result.errors, \
|
||||
f"expert had errors: {result.errors}"
|
||||
assert result.tool_output, "no tool output (query returned nothing)"
|
||||
assert result.response, "no response text generated"
|
||||
|
||||
|
||||
def test_kunden_count_uses_kunden_table():
|
||||
"""Simple count query uses the kunden table."""
|
||||
result, events = _run("Wie viele Kunden gibt es?")
|
||||
|
||||
queries = _query_db_calls(events)
|
||||
assert queries, "no query_db call made"
|
||||
|
||||
combined = " ".join(queries).lower()
|
||||
assert "kunden" in combined, f"expected kunden table: {queries}"
|
||||
|
||||
|
||||
def test_objekte_joins_objektkunde():
|
||||
"""Objekte-per-Kunde query uses the objektkunde junction table."""
|
||||
result, events = _run("Welcher Kunde hat die meisten Objekte?")
|
||||
|
||||
queries = _query_db_calls(events)
|
||||
assert queries, "no query_db call made"
|
||||
|
||||
combined = " ".join(queries).lower()
|
||||
assert "objektkunde" in combined, \
|
||||
f"expected objektkunde junction: {queries[0][:300] if queries else '(none)'}"
|
||||
|
||||
|
||||
def test_no_sql_exposed_in_response():
|
||||
"""Response text must not contain raw SQL (domain language only)."""
|
||||
result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz")
|
||||
|
||||
text = result.response.lower()
|
||||
assert "select " not in text, f"SQL leaked into response: {result.response[:200]}"
|
||||
assert "from kunden" not in text, f"table name leaked: {result.response[:200]}"
|
||||
|
||||
|
||||
TESTS = {
|
||||
"umsatz_uses_artikelposition": test_umsatz_uses_artikelposition,
|
||||
"umsatz_not_geraeteverbraeuche": test_umsatz_not_geraeteverbraeuche,
|
||||
"umsatz_has_result": test_umsatz_has_result,
|
||||
"kunden_count_uses_kunden_table": test_kunden_count_uses_kunden_table,
|
||||
"objekte_joins_objektkunde": test_objekte_joins_objektkunde,
|
||||
"no_sql_exposed_in_response": test_no_sql_exposed_in_response,
|
||||
}
|
||||
Reference in New Issue
Block a user