#!/usr/bin/env python3 """Compare the Go tracker's /live (and /trails) against the live Python service. Run on the server (or anywhere with loopback access to both): python3 compare_live.py # default loopback ports python3 compare_live.py --py http://127.0.0.1:8765 --go http://127.0.0.1:8770 Parity strategy for a live firehose ----------------------------------- The two services rebuild their /live cache independently every 5s, so an actively-updating character can legitimately show a newer telemetry row in one than the other. We separate "is this a real divergence?" from "is this just cache timing?" using the server-stamped received_at: * SAME ROW (py.received_at == go.received_at): both rendered the *same* telemetry_events row -> every field MUST match (numbers within epsilon, timestamps compared as instants). This is the rigorous render-parity proof. * DIFFERENT ROW: a newer row arrived between the two refreshes -> we only require identity + key-set + type/null-pattern parity, and report the volatile-field skew (which should be small and recent). Exit code 0 if no real parity violations, 1 otherwise. """ import argparse import json import sys import urllib.request from datetime import datetime, timezone EPS = 1e-6 # Fields that identify the entity / join keys — must always match for a player # present in both outputs. IDENTITY = ("character_name", "char_tag", "session_id") # Slowly-changing aggregates — informational when they differ on a same-row pair # (a kill/rare recorded between refreshes can bump these even for the same # telemetry row). AGGREGATES = ("total_kills", "total_rares", "session_rares") TIMESTAMP_FIELDS = ("timestamp", "received_at") def fetch(base, path): with urllib.request.urlopen(base.rstrip("/") + path, timeout=8) as r: return json.load(r) def jtype(v): if v is None: return "null" if isinstance(v, bool): return "bool" if isinstance(v, (int, float)): return "num" if isinstance(v, str): return "str" return type(v).__name__ def parse_ts(s): if s is None: return None return datetime.fromisoformat(s.replace("Z", "+00:00")) def values_equal(key, a, b): """Semantic equality for a single field value.""" if a is None or b is None: return a is b or a == b if key in TIMESTAMP_FIELDS and isinstance(a, str) and isinstance(b, str): return parse_ts(a) == parse_ts(b) an, bn = isinstance(a, (int, float)) and not isinstance(a, bool), isinstance(b, (int, float)) and not isinstance(b, bool) if an and bn: return abs(float(a) - float(b)) <= EPS return a == b def main(): ap = argparse.ArgumentParser() ap.add_argument("--py", default="http://127.0.0.1:8765") ap.add_argument("--go", default="http://127.0.0.1:8770") args = ap.parse_args() py = fetch(args.py, "/live")["players"] go = fetch(args.go, "/live")["players"] now = datetime.now(timezone.utc) pyi = {p["character_name"]: p for p in py} goi = {p["character_name"]: p for p in go} common = sorted(set(pyi) & set(goi)) only_py = sorted(set(pyi) - set(goi)) only_go = sorted(set(goi) - set(pyi)) print("=" * 72) print("/live PARITY python(%s) vs go(%s)" % (args.py, args.go)) print("=" * 72) print(f"python players : {len(py)}") print(f"go players : {len(go)}") print(f"common : {len(common)}") violations = 0 # --- key-set parity (all players) --- py_keys = set().union(*[set(p) for p in py]) if py else set() go_keys = set().union(*[set(p) for p in go]) if go else set() if py_keys == go_keys: print(f"key set : IDENTICAL ({len(py_keys)} keys)") else: violations += 1 print("key set : MISMATCH") print(" only in python:", sorted(py_keys - go_keys)) print(" only in go :", sorted(go_keys - py_keys)) # --- online-set parity (boundary-aware) --- def age(p): ts = parse_ts(p.get("received_at") or p.get("timestamp")) return (now - ts).total_seconds() if ts else None print("\n-- online set --") if not only_py and not only_go: print("online set : IDENTICAL") else: # Players near the 30s boundary can flap between the two refreshes. def explain(names, idx): for n in names: a = age(idx[n]) tag = "boundary-flap (age %.1fs)" % a if a is not None and 22 <= a <= 38 else "age %s" % (None if a is None else round(a, 1)) print(f" only_{('py' if idx is pyi else 'go')}: {n:<20} {tag}") if only_py: print(f"only in python : {len(only_py)}") explain(only_py, pyi) if only_go: print(f"only in go : {len(only_go)}") explain(only_go, goi) unexplained = [n for n in (only_py + only_go) if not (lambda a: a is not None and 22 <= a <= 38)(age((pyi.get(n) or goi.get(n))))] if unexplained: violations += 1 print(" UNEXPLAINED set difference (not near 30s boundary):", unexplained) else: print(" (all set differences explained by the 30s online boundary)") # --- per-player field parity --- same_row = [] # py.received_at == go.received_at -> must fully match diff_row = [] # newer row arrived between refreshes for n in common: a, b = pyi[n], goi[n] if a.get("received_at") is not None and a.get("received_at") == b.get("received_at"): same_row.append(n) else: diff_row.append(n) print("\n-- per-player parity --") print(f"same-row pairs (identical received_at, must fully match): {len(same_row)}") print(f"diff-row pairs (newer telemetry between refreshes) : {len(diff_row)}") # Identity + type/null-pattern parity across ALL common players. id_bad = type_bad = 0 for n in common: a, b = pyi[n], goi[n] for k in IDENTITY: if a.get(k) != b.get(k): id_bad += 1 print(f" IDENTITY mismatch {n}.{k}: py={a.get(k)!r} go={b.get(k)!r}") for k in py_keys: ta, tb = jtype(a.get(k)), jtype(b.get(k)) if ta != tb: # null vs num/str is a real null-pattern divergence; num-vs-num # whole-float (0.0) vs int (0) is already unified under "num". type_bad += 1 print(f" TYPE mismatch {n}.{k}: py={ta}({a.get(k)!r}) go={tb}({b.get(k)!r})") if id_bad: violations += id_bad if type_bad: violations += type_bad if not id_bad and not type_bad: print("identity+type : IDENTICAL for all common players") # Rigorous: same-row pairs must match on every field. sr_full_match = 0 for n in same_row: a, b = pyi[n], goi[n] diffs = [] for k in py_keys: if not values_equal(k, a.get(k), b.get(k)): diffs.append((k, a.get(k), b.get(k))) if not diffs: sr_full_match += 1 else: # Aggregate-only diffs are timing-explainable even on a same row. non_agg = [d for d in diffs if d[0] not in AGGREGATES] if non_agg: violations += 1 print(f" SAME-ROW FIELD divergence {n}: " + ", ".join(f"{k}: py={pa!r} go={ga!r}" for k, pa, ga in non_agg)) else: print(f" (same-row {n}: only aggregate fields differ — kill/rare between refreshes: " + ", ".join(f"{k} py={pa} go={ga}" for k, pa, ga in diffs) + ")") print(f"same-row full-field matches: {sr_full_match}/{len(same_row)}") # Volatile-field skew on diff-row pairs (informational). if diff_row: ts_deltas = [] for n in diff_row: da, db = parse_ts(pyi[n].get("timestamp")), parse_ts(goi[n].get("timestamp")) if da and db: ts_deltas.append(abs((da - db).total_seconds())) if ts_deltas: ts_deltas.sort() print(f"diff-row timestamp skew: min={ts_deltas[0]:.1f}s " f"median={ts_deltas[len(ts_deltas)//2]:.1f}s max={ts_deltas[-1]:.1f}s " "(bounded by the two 5s refresh cycles)") print("\n" + "=" * 72) if violations == 0: print("RESULT: PARITY OK — no structural or same-row divergences.") else: print(f"RESULT: {violations} PARITY VIOLATION(S) — see above.") print("=" * 72) return 1 if violations else 0 if __name__ == "__main__": sys.exit(main())