diff --git a/tests/run_tests.py b/tests/run_tests.py index e7052f1..99c0c7b 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -127,9 +127,17 @@ def get_testcase_tests() -> dict: return get_testcase_tests() +def get_node_tests() -> dict: + """Load node-level tests (direct node instantiation, real LLM + DB, no HTTP).""" + sys.path.insert(0, os.path.dirname(__file__)) + from test_node_eras import TESTS + return TESTS + + SUITES = { 'engine': get_engine_tests, 'api': get_api_tests, + 'node': get_node_tests, 'matrix': get_matrix_tests, 'testcases': get_testcase_tests, 'roundtrip': get_roundtrip_tests, diff --git a/tests/test_node_eras.py b/tests/test_node_eras.py new file mode 100644 index 0000000..cfccde4 --- /dev/null +++ b/tests/test_node_eras.py @@ -0,0 +1,128 @@ +"""Node-level tests for ErasExpertNode. + +Tests the expert node directly — no HTTP, no pipeline, no session. +Instantiates ErasExpertNode, calls execute(), asserts on HUD events + ThoughtResult. + +Two LLM calls per test (plan + response) vs 4+ for full matrix tests. +Runs against MariaDB directly (DB_HOST from .env — WireGuard on local, ClusterIP in K3s). + +Usage: + python tests/run_tests.py node + python tests/run_tests.py node/umsatz_uses_artikelposition +""" + +import asyncio +import os +import sys +from pathlib import Path + +# Load .env so DB_HOST, OPENROUTER_API_KEY etc. are set +from dotenv import load_dotenv +load_dotenv(Path(__file__).parent.parent / ".env") + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from agent.nodes.eras_expert import ErasExpertNode + + +def _run(job: str): + """Instantiate expert, run job, return (result, hud_events).""" + events = [] + + async def hud(e): + events.append(e) + + async def _exec(): + node = ErasExpertNode(send_hud=hud) + return await node.execute(job, language="de"), events + + return asyncio.run(_exec()) + + +def _tool_calls(events: list) -> list[dict]: + return [e for e in events if e.get("event") == "tool_call"] + + +def _query_db_calls(events: list) -> list[str]: + """Extract SQL strings from all query_db tool_call events.""" + return [ + e["args"]["query"] + for e in _tool_calls(events) + if e.get("tool") == "query_db" and "args" in e + ] + + +# --- Tests --- + +def test_umsatz_uses_artikelposition(): + """Umsatz query must use artikelposition, not geraeteverbraeuche.""" + result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz") + + queries = _query_db_calls(events) + assert queries, "no query_db call made" + + combined = " ".join(queries).lower() + assert "artikelposition" in combined, \ + f"expected artikelposition in query, got: {queries[0][:300]}" + + +def test_umsatz_not_geraeteverbraeuche(): + """Umsatz query must not touch geraeteverbraeuche (consumption table).""" + result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz") + + queries = _query_db_calls(events) + combined = " ".join(queries).lower() + assert "geraeteverbraeuche" not in combined, \ + f"used wrong table geraeteverbraeuche: {queries[0][:300]}" + + +def test_umsatz_has_result(): + """Umsatz query returns non-empty result and completes without errors.""" + result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz") + + assert not result.errors, \ + f"expert had errors: {result.errors}" + assert result.tool_output, "no tool output (query returned nothing)" + assert result.response, "no response text generated" + + +def test_kunden_count_uses_kunden_table(): + """Simple count query uses the kunden table.""" + result, events = _run("Wie viele Kunden gibt es?") + + queries = _query_db_calls(events) + assert queries, "no query_db call made" + + combined = " ".join(queries).lower() + assert "kunden" in combined, f"expected kunden table: {queries}" + + +def test_objekte_joins_objektkunde(): + """Objekte-per-Kunde query uses the objektkunde junction table.""" + result, events = _run("Welcher Kunde hat die meisten Objekte?") + + queries = _query_db_calls(events) + assert queries, "no query_db call made" + + combined = " ".join(queries).lower() + assert "objektkunde" in combined, \ + f"expected objektkunde junction: {queries[0][:300] if queries else '(none)'}" + + +def test_no_sql_exposed_in_response(): + """Response text must not contain raw SQL (domain language only).""" + result, events = _run("Zeig mir die 5 größten Kunden nach Umsatz") + + text = result.response.lower() + assert "select " not in text, f"SQL leaked into response: {result.response[:200]}" + assert "from kunden" not in text, f"table name leaked: {result.response[:200]}" + + +TESTS = { + "umsatz_uses_artikelposition": test_umsatz_uses_artikelposition, + "umsatz_not_geraeteverbraeuche": test_umsatz_not_geraeteverbraeuche, + "umsatz_has_result": test_umsatz_has_result, + "kunden_count_uses_kunden_table": test_kunden_count_uses_kunden_table, + "objekte_joins_objektkunde": test_objekte_joins_objektkunde, + "no_sql_exposed_in_response": test_no_sql_exposed_in_response, +}