State Schema + Graph Definition
from langgraph.graph import StateGraph, START, END
from typing import TypedDict, List, Optional, Annotated
from langgraph.checkpoint.redis import RedisSaver
class AgentLoopState(TypedDict):
# Input
task: str
spec: dict
# Orchestrator
plan: Optional[dict]
active_prompt: str
iteration: int
# Coding Agent
generated_code: Optional[str]
generated_tests: Optional[str]
code_diff: Optional[str]
# Review Agent
review_score: Optional[float]
review_issues: Optional[List[dict]]
# Quality Agent
test_passed: Optional[bool]
coverage_pct: Optional[float]
quality_report: Optional[dict]
# Memory
memory_context: Optional[str]
anti_patterns: Optional[List[str]]
# Control
max_iterations: int
should_escalate: bool
run_id: str
# ── Build the graph ──
builder = StateGraph(AgentLoopState)
builder.add_node("orchestrate", orchestrator_node)
builder.add_node("memory_retrieve", memory_retrieve_node)
builder.add_node("code", coding_agent_node)
builder.add_node("review", review_agent_node)
builder.add_node("quality", quality_agent_node)
builder.add_node("memory_write", memory_write_node)
builder.add_node("hitl", hitl_node) # interrupt
builder.add_node("self_improve", self_improve_node)
builder.add_edge(START, "orchestrate")
builder.add_edge("orchestrate", "memory_retrieve")
builder.add_edge("memory_retrieve", "code")
builder.add_edge("code", "review")
builder.add_edge("review", "quality")
# ── Conditional routing ──
builder.add_conditional_edges(
"quality", route_after_quality,
{
"retry": "orchestrate", # loop back
"escalate": "hitl", # human gate
"pass": "memory_write", # success
}
)
builder.add_edge("memory_write", "self_improve")
builder.add_edge("self_improve", END)
builder.add_edge("hitl", "code")
graph = builder.compile(
checkpointer=RedisSaver(client),
interrupt_before=["hitl"]
)
Routing Function
def route_after_quality(state: AgentLoopState) -> str:
score = state["review_score"] or 0.0
passed = state["test_passed"] or False
coverage = state["coverage_pct"] or 0.0
i = state["iteration"]
max_i = state["max_iterations"]
# Hard escalation
if i >= max_i:
return "escalate" # HITL takes over
# Security flag → always escalate
if any(i["type"]=="security"
for i in state["review_issues"] or []):
return "escalate"
# Pass gate
if score >= 0.85 and passed and coverage >= 80:
return "pass"
return "retry" # loop
Self-Improve Node
async def self_improve_node(state):
# Only triggers every N=10 runs
if not should_trigger_improve():
return state
# Pull last 10 run traces
traces = memory.get_recent_runs(n=10)
failures = [t for t in traces
if t["avg_score"] < 0.75]
# Ask Claude to mutate the prompt
new_prompts = await meta_llm.generate(
system=PROMPT_ENGINEER_SYSTEM,
user=f"Failures: {failures}\n"
f"Current prompt: {state['active_prompt']}\n"
f"Generate 3 improved variants."
)
# Eval on held-out test cases
best = await eval_harness.pick_best(
candidates=new_prompts,
test_cases=EVAL_SUITE
)
# Persist + promote
memory.save_prompt_version(best)
return {**state, "active_prompt": best}
Model Router Config
AGENT_MODELS = {
"orchestrator": {
"primary": "anthropic/claude-sonnet-4-6",
"fallback": "gemini/gemini-2.5-flash",
},
"coding": {
"primary": "gemini/gemini-2.5-flash", # free
"fallback": "anthropic/claude-haiku-4-5",
},
"review": {
"primary": "groq/llama-3.3-70b-versatile",# free+fast
"fallback": "gemini/gemini-2.5-flash",
},
"quality": {
"primary": "groq/llama-3.3-70b-versatile",# free
"fallback": "cerebras/llama-3.3-70b",
},
"meta_improve": {
"primary": "anthropic/claude-sonnet-4-6",# best reasoning
},
}