New 'engine' suite in run_tests.py with tests that verify frame engine mechanics without LLM calls. Covers graph loading, node instantiation, edge type completeness, reflex/tool_output conditions, and frame trace structure for reflex/expert/expert+interpreter pipelines. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
199 lines
6.3 KiB
Python
199 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test orchestrator — runs test suites and posts results to dev assay.
|
|
|
|
Usage:
|
|
python tests/run_all_tests.py # all suites
|
|
python tests/run_all_tests.py api # one suite
|
|
python tests/run_all_tests.py roundtrip # one suite
|
|
python tests/run_all_tests.py api/health # single test
|
|
python tests/run_all_tests.py roundtrip/full_eras # single test
|
|
python tests/run_all_tests.py api/health roundtrip/full_chat # multiple tests
|
|
|
|
Test names: suite/name (without the suite prefix in the test registry).
|
|
engine tests: graph_load, node_instantiation, edge_types_complete,
|
|
condition_reflex, condition_tool_output,
|
|
frame_trace_reflex, frame_trace_expert, frame_trace_expert_with_interpreter
|
|
api tests: health, eras_umsatz_api, eras_umsatz_artifact
|
|
roundtrip tests: nyx_loads, inject_artifact, inject_message, full_chat, full_eras
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
import urllib.request
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from dataclasses import dataclass, field, asdict
|
|
|
|
RESULTS_ENDPOINT = os.environ.get('RESULTS_ENDPOINT', '')
|
|
RUN_ID = os.environ.get('RUN_ID', str(uuid.uuid4())[:8])
|
|
|
|
|
|
def _now_iso() -> str:
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
@dataclass
|
|
class TestResult:
|
|
run_id: str
|
|
test: str
|
|
suite: str
|
|
status: str # 'pass', 'fail', 'running', 'error'
|
|
duration_ms: float = 0
|
|
error: str = ''
|
|
ts: str = ''
|
|
|
|
|
|
def post_result(result: TestResult):
|
|
"""Post a single test result to the dev assay endpoint."""
|
|
print(json.dumps(asdict(result)), flush=True)
|
|
if not RESULTS_ENDPOINT:
|
|
return
|
|
try:
|
|
payload = json.dumps(asdict(result)).encode()
|
|
req = urllib.request.Request(
|
|
RESULTS_ENDPOINT,
|
|
data=payload,
|
|
headers={'Content-Type': 'application/json'},
|
|
)
|
|
urllib.request.urlopen(req, timeout=5)
|
|
except Exception as e:
|
|
print(f' [warn] failed to post result: {e}', file=sys.stderr)
|
|
|
|
|
|
def run_test(name: str, suite: str, fn) -> TestResult:
|
|
"""Run a single test function and return the result."""
|
|
result = TestResult(run_id=RUN_ID, test=name, suite=suite, status='running', ts=_now_iso())
|
|
post_result(result)
|
|
|
|
start = time.time()
|
|
try:
|
|
fn()
|
|
result.status = 'pass'
|
|
except AssertionError as e:
|
|
result.status = 'fail'
|
|
result.error = str(e)
|
|
except Exception as e:
|
|
result.status = 'error'
|
|
result.error = f'{type(e).__name__}: {e}'
|
|
result.duration_ms = round((time.time() - start) * 1000)
|
|
result.ts = _now_iso()
|
|
|
|
post_result(result)
|
|
return result
|
|
|
|
|
|
def get_api_tests() -> dict:
|
|
"""Load API tests from e2e_harness.py."""
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
import e2e_harness
|
|
e2e_harness.ASSAY_BASE = os.environ.get('ASSAY_API', 'http://assay-runtime-test:8000').rstrip('/api')
|
|
# Skip browser-dependent tests
|
|
return {k: v for k, v in e2e_harness.TESTS.items() if 'takeover' not in k and 'panes' not in k}
|
|
|
|
|
|
def get_roundtrip_tests() -> dict:
|
|
"""Load Playwright roundtrip tests."""
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
from test_roundtrip import TESTS
|
|
return TESTS
|
|
|
|
|
|
def get_engine_tests() -> dict:
|
|
"""Load engine-level tests (no LLM, no network)."""
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
from test_engine import TESTS
|
|
return TESTS
|
|
|
|
|
|
SUITES = {
|
|
'engine': get_engine_tests,
|
|
'api': get_api_tests,
|
|
'roundtrip': get_roundtrip_tests,
|
|
}
|
|
|
|
|
|
def parse_filters(args: list[str]) -> tuple[set[str] | None, set[str]]:
|
|
"""Parse CLI args into (suite_filter, test_filter).
|
|
|
|
Returns:
|
|
suite_filter: set of suite names, or None for all suites
|
|
test_filter: set of 'suite/test' names (empty = run all in suite)
|
|
"""
|
|
if not args:
|
|
return None, set()
|
|
|
|
suites = set()
|
|
tests = set()
|
|
for arg in args:
|
|
if '/' in arg:
|
|
tests.add(arg)
|
|
suites.add(arg.split('/')[0])
|
|
else:
|
|
suites.add(arg)
|
|
return suites, tests
|
|
|
|
|
|
def run_suite(suite_name: str, tests: dict, test_filter: set[str]) -> list[TestResult]:
|
|
"""Run tests from a suite, optionally filtered."""
|
|
results = []
|
|
for name, fn in tests.items():
|
|
# Apply test filter if specified
|
|
full_name = f'{suite_name}/{name}'
|
|
# Strip suite prefix for matching (roundtrip/full_eras matches roundtrip_full_eras)
|
|
short_name = name.replace(f'{suite_name}_', '')
|
|
if test_filter and full_name not in test_filter and f'{suite_name}/{short_name}' not in test_filter:
|
|
continue
|
|
|
|
r = run_test(name, suite_name, fn)
|
|
results.append(r)
|
|
status = 'PASS' if r.status == 'pass' else 'FAIL'
|
|
print(f' [{status}] {suite_name}/{name} ({r.duration_ms:.0f}ms)', flush=True)
|
|
if r.error:
|
|
print(f' {r.error[:200]}', flush=True)
|
|
return results
|
|
|
|
|
|
def main():
|
|
suite_filter, test_filter = parse_filters(sys.argv[1:])
|
|
|
|
print(f'=== Test Run {RUN_ID} ===', flush=True)
|
|
if suite_filter:
|
|
print(f'Filter: suites={suite_filter}, tests={test_filter or "all"}', flush=True)
|
|
print(f'ASSAY_API: {os.environ.get("ASSAY_API", "not set")}', flush=True)
|
|
print(f'NYX_URL: {os.environ.get("NYX_URL", "not set")}', flush=True)
|
|
print(flush=True)
|
|
|
|
all_results = []
|
|
|
|
for suite_name, loader in SUITES.items():
|
|
if suite_filter and suite_name not in suite_filter:
|
|
continue
|
|
print(f'--- {suite_name} ---', flush=True)
|
|
tests = loader()
|
|
all_results.extend(run_suite(suite_name, tests, test_filter))
|
|
print(flush=True)
|
|
|
|
# Summary
|
|
passed = sum(1 for r in all_results if r.status == 'pass')
|
|
failed = sum(1 for r in all_results if r.status in ('fail', 'error'))
|
|
total_ms = sum(r.duration_ms for r in all_results)
|
|
print(f'=== {passed} passed, {failed} failed, {len(all_results)} total ({total_ms:.0f}ms) ===', flush=True)
|
|
|
|
if RESULTS_ENDPOINT:
|
|
summary = TestResult(
|
|
run_id=RUN_ID, test='__summary__', suite='summary',
|
|
status='pass' if failed == 0 else 'fail',
|
|
duration_ms=total_ms,
|
|
error=f'{passed} passed, {failed} failed',
|
|
)
|
|
post_result(summary)
|
|
|
|
sys.exit(1 if failed else 0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|