acdream/tools/cdb/decode_retail_hex.py

#!/usr/bin/env python3
"""decode_retail_hex.py — A6.P1 retail-log hex→float decoder.

cdb's `.printf %f` doesn't reliably format floats from `dwo()` reads
(see history of a6-probe.cdb v2→v3→v4). The v4 probe prints all float
fields as 32-bit hex bits (`_h=0xHHHHHHHH`), and this script decodes
them via IEEE 754 single-precision reinterpretation.

Usage:
    py tools/cdb/decode_retail_hex.py <retail.log path>

Output:
    Writes a sibling file `<input>.decoded.log` with all `_h=0x...`
    fields replaced by `=<float-value>`.

Example:
    [BP6] check_walkable hit#1 threshold_h=0x3F2A0751
    →
    [BP6] check_walkable hit#1 threshold=0.66417414
"""

import re
import struct
import sys
from pathlib import Path


HEX_FIELD_RE = re.compile(r'(\w+)_h=0x([0-9A-Fa-f]{8})')


def decode_hex_float(hex_str: str) -> float:
    """Decode 8 hex chars as IEEE 754 single-precision little-endian float.

    Note: cdb prints the dword in big-endian byte order (most significant
    byte first), but IEEE 754 single is little-endian in memory. So we
    decode the 4 bytes as if read directly from memory: take the hex
    string, convert to bytes, reverse byte order (since x86 is LE), then
    unpack as little-endian float.

    Actually simpler: cdb's %X prints the dword value, which is already
    interpreted as a uint32. To get the float, we re-pack the uint32 as
    bytes (in any consistent order) and unpack as float with matching
    order. Using struct.pack/unpack with the same byte order ensures
    round-trip correctness.
    """
    n = int(hex_str, 16)
    # Pack as little-endian uint32, unpack as little-endian float.
    # The byte order cancels out — what matters is that pack + unpack
    # agree, which they do via the same '<' specifier.
    return struct.unpack('<f', struct.pack('<I', n))[0]


def decode_line(line: str) -> str:
    """Replace all `name_h=0xHHHHHHHH` occurrences with `name=<float>`."""
    def repl(m):
        name = m.group(1)
        hex_str = m.group(2)
        try:
            value = decode_hex_float(hex_str)
            # Match the acdream-side formatting: 4 decimal places.
            return f'{name}={value:.4f}'
        except (ValueError, struct.error):
            # Keep original on decode failure.
            return m.group(0)
    return HEX_FIELD_RE.sub(repl, line)


def main():
    if len(sys.argv) != 2:
        print('Usage: py tools/cdb/decode_retail_hex.py <retail.log>',
              file=sys.stderr)
        sys.exit(1)

    in_path = Path(sys.argv[1])
    if not in_path.exists():
        print(f'Error: {in_path} not found', file=sys.stderr)
        sys.exit(1)

    out_path = in_path.with_suffix('.decoded.log')

    lines_decoded = 0
    fields_decoded = 0

    with in_path.open('r', encoding='ascii', errors='replace') as fin, \
            out_path.open('w', encoding='utf-8') as fout:
        for line in fin:
            decoded = decode_line(line)
            if decoded != line:
                lines_decoded += 1
                fields_decoded += len(HEX_FIELD_RE.findall(line))
            fout.write(decoded)

    print(f'Decoded {lines_decoded} lines, {fields_decoded} float fields')
    print(f'Output: {out_path}')


if __name__ == '__main__':
    main()