feat(go-services): tracker-go Phase 0/1 — /live + /trails read parity
Parallel Go reimplementation of the dereth-tracker read side, deployed loopback-only (:8770) and reading the dereth TimescaleDB read-only. The live Python stack is untouched (added via a compose override, not by editing the tracked docker-compose.yml). - Phase 0 scaffold: stdlib net/http server (Go 1.22+ method+path routing), /health + /api-version, multi-stage distroless Docker build, and go-services/docker-compose.go.yml override (loopback :8770). - Phase 1: pgx v5 pool forced into read-only transactions, a 5s /live + /trails cache loop using the exact main.py:837 SQL, and Python-isoformat timestamps so output matches FastAPI's jsonable_encoder. - compare/compare_live.py: parity harness vs the live Python service. Uses the server-stamped received_at to prove same-row full-field equality and to make the online-set diff boundary-aware. Verified on live traffic (73 players): identical online set + 23-key schema, identity/type parity for all, every same-row pair matches on every field, and diff-row pairs differ only by the ~6s two-cache refresh skew. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
b8fd449d62
commit
1af47520c0
7 changed files with 691 additions and 0 deletions
223
go-services/compare/compare_live.py
Normal file
223
go-services/compare/compare_live.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Compare the Go tracker's /live (and /trails) against the live Python service.
|
||||
|
||||
Run on the server (or anywhere with loopback access to both):
|
||||
python3 compare_live.py # default loopback ports
|
||||
python3 compare_live.py --py http://127.0.0.1:8765 --go http://127.0.0.1:8770
|
||||
|
||||
Parity strategy for a live firehose
|
||||
-----------------------------------
|
||||
The two services rebuild their /live cache independently every 5s, so an
|
||||
actively-updating character can legitimately show a newer telemetry row in one
|
||||
than the other. We separate "is this a real divergence?" from "is this just
|
||||
cache timing?" using the server-stamped received_at:
|
||||
|
||||
* SAME ROW (py.received_at == go.received_at): both rendered the *same*
|
||||
telemetry_events row -> every field MUST match (numbers within epsilon,
|
||||
timestamps compared as instants). This is the rigorous render-parity proof.
|
||||
* DIFFERENT ROW: a newer row arrived between the two refreshes -> we only
|
||||
require identity + key-set + type/null-pattern parity, and report the
|
||||
volatile-field skew (which should be small and recent).
|
||||
|
||||
Exit code 0 if no real parity violations, 1 otherwise.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
|
||||
EPS = 1e-6
|
||||
|
||||
# Fields that identify the entity / join keys — must always match for a player
|
||||
# present in both outputs.
|
||||
IDENTITY = ("character_name", "char_tag", "session_id")
|
||||
# Slowly-changing aggregates — informational when they differ on a same-row pair
|
||||
# (a kill/rare recorded between refreshes can bump these even for the same
|
||||
# telemetry row).
|
||||
AGGREGATES = ("total_kills", "total_rares", "session_rares")
|
||||
TIMESTAMP_FIELDS = ("timestamp", "received_at")
|
||||
|
||||
|
||||
def fetch(base, path):
|
||||
with urllib.request.urlopen(base.rstrip("/") + path, timeout=8) as r:
|
||||
return json.load(r)
|
||||
|
||||
|
||||
def jtype(v):
|
||||
if v is None:
|
||||
return "null"
|
||||
if isinstance(v, bool):
|
||||
return "bool"
|
||||
if isinstance(v, (int, float)):
|
||||
return "num"
|
||||
if isinstance(v, str):
|
||||
return "str"
|
||||
return type(v).__name__
|
||||
|
||||
|
||||
def parse_ts(s):
|
||||
if s is None:
|
||||
return None
|
||||
return datetime.fromisoformat(s.replace("Z", "+00:00"))
|
||||
|
||||
|
||||
def values_equal(key, a, b):
|
||||
"""Semantic equality for a single field value."""
|
||||
if a is None or b is None:
|
||||
return a is b or a == b
|
||||
if key in TIMESTAMP_FIELDS and isinstance(a, str) and isinstance(b, str):
|
||||
return parse_ts(a) == parse_ts(b)
|
||||
an, bn = isinstance(a, (int, float)) and not isinstance(a, bool), isinstance(b, (int, float)) and not isinstance(b, bool)
|
||||
if an and bn:
|
||||
return abs(float(a) - float(b)) <= EPS
|
||||
return a == b
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--py", default="http://127.0.0.1:8765")
|
||||
ap.add_argument("--go", default="http://127.0.0.1:8770")
|
||||
args = ap.parse_args()
|
||||
|
||||
py = fetch(args.py, "/live")["players"]
|
||||
go = fetch(args.go, "/live")["players"]
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
pyi = {p["character_name"]: p for p in py}
|
||||
goi = {p["character_name"]: p for p in go}
|
||||
common = sorted(set(pyi) & set(goi))
|
||||
only_py = sorted(set(pyi) - set(goi))
|
||||
only_go = sorted(set(goi) - set(pyi))
|
||||
|
||||
print("=" * 72)
|
||||
print("/live PARITY python(%s) vs go(%s)" % (args.py, args.go))
|
||||
print("=" * 72)
|
||||
print(f"python players : {len(py)}")
|
||||
print(f"go players : {len(go)}")
|
||||
print(f"common : {len(common)}")
|
||||
|
||||
violations = 0
|
||||
|
||||
# --- key-set parity (all players) ---
|
||||
py_keys = set().union(*[set(p) for p in py]) if py else set()
|
||||
go_keys = set().union(*[set(p) for p in go]) if go else set()
|
||||
if py_keys == go_keys:
|
||||
print(f"key set : IDENTICAL ({len(py_keys)} keys)")
|
||||
else:
|
||||
violations += 1
|
||||
print("key set : MISMATCH")
|
||||
print(" only in python:", sorted(py_keys - go_keys))
|
||||
print(" only in go :", sorted(go_keys - py_keys))
|
||||
|
||||
# --- online-set parity (boundary-aware) ---
|
||||
def age(p):
|
||||
ts = parse_ts(p.get("received_at") or p.get("timestamp"))
|
||||
return (now - ts).total_seconds() if ts else None
|
||||
|
||||
print("\n-- online set --")
|
||||
if not only_py and not only_go:
|
||||
print("online set : IDENTICAL")
|
||||
else:
|
||||
# Players near the 30s boundary can flap between the two refreshes.
|
||||
def explain(names, idx):
|
||||
for n in names:
|
||||
a = age(idx[n])
|
||||
tag = "boundary-flap (age %.1fs)" % a if a is not None and 22 <= a <= 38 else "age %s" % (None if a is None else round(a, 1))
|
||||
print(f" only_{('py' if idx is pyi else 'go')}: {n:<20} {tag}")
|
||||
if only_py:
|
||||
print(f"only in python : {len(only_py)}")
|
||||
explain(only_py, pyi)
|
||||
if only_go:
|
||||
print(f"only in go : {len(only_go)}")
|
||||
explain(only_go, goi)
|
||||
unexplained = [n for n in (only_py + only_go)
|
||||
if not (lambda a: a is not None and 22 <= a <= 38)(age((pyi.get(n) or goi.get(n))))]
|
||||
if unexplained:
|
||||
violations += 1
|
||||
print(" UNEXPLAINED set difference (not near 30s boundary):", unexplained)
|
||||
else:
|
||||
print(" (all set differences explained by the 30s online boundary)")
|
||||
|
||||
# --- per-player field parity ---
|
||||
same_row = [] # py.received_at == go.received_at -> must fully match
|
||||
diff_row = [] # newer row arrived between refreshes
|
||||
for n in common:
|
||||
a, b = pyi[n], goi[n]
|
||||
if a.get("received_at") is not None and a.get("received_at") == b.get("received_at"):
|
||||
same_row.append(n)
|
||||
else:
|
||||
diff_row.append(n)
|
||||
|
||||
print("\n-- per-player parity --")
|
||||
print(f"same-row pairs (identical received_at, must fully match): {len(same_row)}")
|
||||
print(f"diff-row pairs (newer telemetry between refreshes) : {len(diff_row)}")
|
||||
|
||||
# Identity + type/null-pattern parity across ALL common players.
|
||||
id_bad = type_bad = 0
|
||||
for n in common:
|
||||
a, b = pyi[n], goi[n]
|
||||
for k in IDENTITY:
|
||||
if a.get(k) != b.get(k):
|
||||
id_bad += 1
|
||||
print(f" IDENTITY mismatch {n}.{k}: py={a.get(k)!r} go={b.get(k)!r}")
|
||||
for k in py_keys:
|
||||
ta, tb = jtype(a.get(k)), jtype(b.get(k))
|
||||
if ta != tb:
|
||||
# null vs num/str is a real null-pattern divergence; num-vs-num
|
||||
# whole-float (0.0) vs int (0) is already unified under "num".
|
||||
type_bad += 1
|
||||
print(f" TYPE mismatch {n}.{k}: py={ta}({a.get(k)!r}) go={tb}({b.get(k)!r})")
|
||||
if id_bad:
|
||||
violations += id_bad
|
||||
if type_bad:
|
||||
violations += type_bad
|
||||
if not id_bad and not type_bad:
|
||||
print("identity+type : IDENTICAL for all common players")
|
||||
|
||||
# Rigorous: same-row pairs must match on every field.
|
||||
sr_full_match = 0
|
||||
for n in same_row:
|
||||
a, b = pyi[n], goi[n]
|
||||
diffs = []
|
||||
for k in py_keys:
|
||||
if not values_equal(k, a.get(k), b.get(k)):
|
||||
diffs.append((k, a.get(k), b.get(k)))
|
||||
if not diffs:
|
||||
sr_full_match += 1
|
||||
else:
|
||||
# Aggregate-only diffs are timing-explainable even on a same row.
|
||||
non_agg = [d for d in diffs if d[0] not in AGGREGATES]
|
||||
if non_agg:
|
||||
violations += 1
|
||||
print(f" SAME-ROW FIELD divergence {n}: " +
|
||||
", ".join(f"{k}: py={pa!r} go={ga!r}" for k, pa, ga in non_agg))
|
||||
else:
|
||||
print(f" (same-row {n}: only aggregate fields differ — kill/rare between refreshes: "
|
||||
+ ", ".join(f"{k} py={pa} go={ga}" for k, pa, ga in diffs) + ")")
|
||||
print(f"same-row full-field matches: {sr_full_match}/{len(same_row)}")
|
||||
|
||||
# Volatile-field skew on diff-row pairs (informational).
|
||||
if diff_row:
|
||||
ts_deltas = []
|
||||
for n in diff_row:
|
||||
da, db = parse_ts(pyi[n].get("timestamp")), parse_ts(goi[n].get("timestamp"))
|
||||
if da and db:
|
||||
ts_deltas.append(abs((da - db).total_seconds()))
|
||||
if ts_deltas:
|
||||
ts_deltas.sort()
|
||||
print(f"diff-row timestamp skew: min={ts_deltas[0]:.1f}s "
|
||||
f"median={ts_deltas[len(ts_deltas)//2]:.1f}s max={ts_deltas[-1]:.1f}s "
|
||||
"(bounded by the two 5s refresh cycles)")
|
||||
|
||||
print("\n" + "=" * 72)
|
||||
if violations == 0:
|
||||
print("RESULT: PARITY OK — no structural or same-row divergences.")
|
||||
else:
|
||||
print(f"RESULT: {violations} PARITY VIOLATION(S) — see above.")
|
||||
print("=" * 72)
|
||||
return 1 if violations else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue