Given canary metrics and budgets, return whether a speech model
rollout should continue, pause, or roll back.
Hidden answer: invariant, tests, and Python solution
Invariant: critical metrics override noncritical wins. Test a
latency win with entity error regression, missing critical metrics,
exactly-on-budget deltas, and a noncritical cost regression that
should pause instead of roll back.
def rollout_recommendation(metrics, budgets):
findings = []
for name, budget in budgets.items():
if name not in metrics:
findings.append(("pause", name, "missing"))
continue
value = metrics[name]
limit = budget["limit"]
direction = budget["direction"]
critical = budget.get("critical", False)
failed = (
direction == "lower_is_better" and value > limit
) or (
direction == "higher_is_better" and value < limit
)
if failed:
action = "rollback" if critical else "pause"
findings.append((action, name, value))
if any(item[0] == "rollback" for item in findings):
return "rollback", findings
if findings:
return "pause", findings
return "continue", []