Given slice-level metrics for a baseline and candidate, return
continue, pause, or rollback with reasons.
Hidden answer: invariants, tests, and Python solution
Invariants: critical safety regressions override cost wins, every
slice is checked independently, lower-is-better and higher-is-better
metrics use opposite delta signs, and missing launch-critical
metrics pause the rollout. Test clean wins, cost-only wins, safety
regressions, latency regressions, and missing slice metrics.
def decide_voice_agent_release(baseline, candidate, budgets):
reasons = []
for slice_name, metric_budgets in budgets.items():
old = baseline.get(slice_name)
new = candidate.get(slice_name)
if old is None or new is None:
reasons.append(("pause", slice_name, "missing slice"))
continue
for metric, rule in metric_budgets.items():
if metric not in old or metric not in new:
reasons.append(("pause", slice_name, f"missing {metric}"))
continue
delta = new[metric] - old[metric]
direction = rule["direction"]
allowed = rule["allowed_delta"]
severity = rule.get("severity", "normal")
regressed = (
direction == "lower_is_better" and delta > allowed
) or (
direction == "higher_is_better" and -delta > allowed
)
if regressed:
action = "rollback" if severity == "critical" else "pause"
reasons.append((action, slice_name, metric, round(delta, 4)))
if any(reason[0] == "rollback" for reason in reasons):
return "rollback", reasons
if reasons:
return "pause", reasons
return "continue", []