Source code for hs_py.encoding.json

"""Haystack JSON encoding and decoding.

Supports both Haystack 4 (v4) and Haystack 3 (v3) JSON formats, with an
optional pythonic decode mode that converts Haystack types to native Python
equivalents where possible.

See: https://project-haystack.org/doc/docHaystack/Json
"""

from __future__ import annotations

import datetime
import math
import re
from enum import Enum
from typing import Any

import orjson

from hs_py.encoding.scanner import city_to_tz, format_num, tz_name
from hs_py.grid import Col, Grid
from hs_py.kinds import (
    MARKER,
    NA,
    REMOVE,
    Coord,
    Marker,
    Na,
    Number,
    Ref,
    Remove,
    Symbol,
    Uri,
    XStr,
)

__all__ = [
    "JsonVersion",
    "decode_grid",
    "decode_grid_dict",
    "decode_val",
    "encode_grid",
    "encode_grid_dict",
    "encode_val",
]


[docs] class JsonVersion(Enum): """Haystack JSON encoding version.""" V3 = "v3" """Haystack 3 JSON — type-prefixed strings (e.g. ``"n:42 °F"``).""" V4 = "v4" """Haystack 4 JSON — ``_kind`` object wrappers."""
# Maximum recursion depth for JSON decoding to prevent stack overflow. _MAX_DECODE_DEPTH = 64 # Maximum number of rows/columns in a decoded grid to prevent memory exhaustion. _MAX_GRID_ROWS = 100_000 _MAX_GRID_COLS = 10_000 # --------------------------------------------------------------------------- # Public API # ---------------------------------------------------------------------------
[docs] def encode_val(val: Any, *, version: JsonVersion = JsonVersion.V4) -> Any: """Encode a single Haystack value to its JSON-compatible representation. :param val: Haystack value to encode. :param version: JSON encoding version to use. :returns: JSON-serializable Python object. """ if version is JsonVersion.V3: return _encode_val_v3(val) return _encode_val_v4(val)
[docs] def decode_val( obj: Any, *, version: JsonVersion = JsonVersion.V4, pythonic: bool = False, ) -> Any: """Decode a JSON value to a Haystack kind. :param obj: JSON-deserialized value. :param version: JSON encoding version to decode. :param pythonic: If ``True``, convert to native Python types where possible. :class:`~hs_py.kinds.Marker` becomes ``True``, unitless :class:`~hs_py.kinds.Number` becomes ``float``, :class:`~hs_py.kinds.Symbol` and :class:`~hs_py.kinds.Uri` become ``str``. :returns: Decoded Haystack value. """ result = _decode_val_v3(obj) if version is JsonVersion.V3 else _decode_val_v4(obj) if pythonic: return _to_pythonic(result) return result
[docs] def encode_grid(grid: Grid, *, version: JsonVersion = JsonVersion.V4) -> bytes: """Encode a :class:`~hs_py.grid.Grid` to Haystack JSON bytes. :param grid: Grid to encode. :param version: JSON encoding version to use. :returns: JSON-encoded bytes via :mod:`orjson`. """ if version is JsonVersion.V3: return orjson.dumps(_encode_grid_v3(grid)) return orjson.dumps( _encode_grid_v4_raw(grid), default=_orjson_default_v4, option=orjson.OPT_PASSTHROUGH_DATACLASS | orjson.OPT_PASSTHROUGH_DATETIME, )
[docs] def encode_grid_dict(grid: Grid, *, version: JsonVersion = JsonVersion.V4) -> dict[str, Any]: """Encode a :class:`~hs_py.grid.Grid` to a JSON-compatible dict (no serialization). Use this when embedding a grid dict inside a larger JSON structure to avoid the overhead of serializing to bytes and back. :param grid: Grid to encode. :param version: JSON encoding version to use. :returns: JSON-serializable dict. """ if version is JsonVersion.V3: return _encode_grid_v3(grid) return _encode_grid_v4(grid)
[docs] def decode_grid_dict( obj: dict[str, Any], *, version: JsonVersion = JsonVersion.V4, pythonic: bool = False, ) -> Grid: """Decode a pre-parsed JSON dict to a :class:`~hs_py.grid.Grid`. Use this when the JSON has already been deserialized (e.g. from a WebSocket message) to avoid an unnecessary ``orjson.dumps`` / ``orjson.loads`` round-trip. :param obj: JSON-deserialized dict representing a grid. :param version: JSON encoding version to decode. :param pythonic: If ``True``, convert values to native Python types. :returns: Decoded :class:`~hs_py.grid.Grid`. """ grid = _decode_grid_v3(obj) if version is JsonVersion.V3 else _decode_grid_v4(obj) if pythonic: return _pythonic_grid(grid) return grid
[docs] def decode_grid( data: bytes, *, version: JsonVersion = JsonVersion.V4, pythonic: bool = False, ) -> Grid: """Decode Haystack JSON bytes to a :class:`~hs_py.grid.Grid`. :param data: JSON bytes. :param version: JSON encoding version to decode. :param pythonic: If ``True``, convert values to native Python types. :returns: Decoded :class:`~hs_py.grid.Grid`. """ obj = orjson.loads(data) grid = _decode_grid_v3(obj) if version is JsonVersion.V3 else _decode_grid_v4(obj) if pythonic: return _pythonic_grid(grid) return grid
# --------------------------------------------------------------------------- # V4 Encoding (Haystack 4 — _kind objects) # --------------------------------------------------------------------------- def _encode_val_v4(val: Any) -> Any: """Encode a value using Haystack 4 JSON format. Uses a type dispatch table for O(1) lookup of Haystack kind encoders, with fast-path checks for the most common types (None, str, int, float). """ # Fast path: most common types by identity/type if val is None: return None typ = type(val) if typ is str: return val if typ is int or typ is float: return val if typ is bool: return val # Singleton identity checks (guaranteed single-instance) if val is MARKER: return _MARKER_V4 if val is NA: return _NA_V4 if val is REMOVE: return _REMOVE_V4 # Dispatch table for Haystack kind types encoder = _V4_TYPE_ENCODERS.get(typ) if encoder is not None: return encoder(val) # Fallback: subclass and container checks if isinstance(val, bool): return val if isinstance(val, datetime.datetime): return _encode_datetime_v4(val) if isinstance(val, datetime.date): return {"_kind": "date", "val": val.isoformat()} if isinstance(val, datetime.time): return {"_kind": "time", "val": val.isoformat()} if isinstance(val, int | float): return val msg = f"Cannot encode {type(val).__name__} as Haystack JSON" raise TypeError(msg) # Pre-built singleton dicts (avoid allocation per call) _MARKER_V4: dict[str, str] = {"_kind": "marker"} _NA_V4: dict[str, str] = {"_kind": "na"} _REMOVE_V4: dict[str, str] = {"_kind": "remove"} def _encode_number_v4_dispatch(val: Any) -> Any: return _encode_number_v4(val) def _encode_ref_v4(val: Any) -> dict[str, Any]: d: dict[str, Any] = {"_kind": "ref", "val": val.val} if val.dis is not None: d["dis"] = val.dis return d def _encode_symbol_v4(val: Any) -> dict[str, str]: return {"_kind": "symbol", "val": val.val} def _encode_uri_v4(val: Any) -> dict[str, str]: return {"_kind": "uri", "val": val.val} def _encode_coord_v4(val: Any) -> dict[str, Any]: return {"_kind": "coord", "lat": val.lat, "lng": val.lng} def _encode_xstr_v4(val: Any) -> dict[str, str]: return {"_kind": "xstr", "type": val.type_name, "val": val.val} def _encode_grid_v4_dispatch(val: Any) -> dict[str, Any]: return _encode_grid_v4(val) def _encode_list_v4(val: Any) -> list[Any]: return [_encode_val_v4(v) for v in val] def _encode_dict_v4(val: Any) -> dict[str, Any]: return {k: _encode_val_v4(v) for k, v in val.items()} # Type → encoder dispatch table (exact type match, no inheritance) _V4_TYPE_ENCODERS: dict[type, Any] = { Number: _encode_number_v4_dispatch, Ref: _encode_ref_v4, Symbol: _encode_symbol_v4, Uri: _encode_uri_v4, Coord: _encode_coord_v4, XStr: _encode_xstr_v4, Grid: _encode_grid_v4_dispatch, list: _encode_list_v4, dict: _encode_dict_v4, # datetime types handled in isinstance fallback (subclass ordering matters) } def _orjson_default_v4(val: Any) -> Any: """Serialize a Haystack type for orjson's ``default`` hook. Called by orjson for any object it cannot natively serialize. Ordered by frequency: Marker (22K) > Ref (10K) > others. """ # Singleton: most common non-native type (~22K calls per /read) if val is MARKER: return _MARKER_V4 typ = type(val) # Inline Ref: 2nd most common (~10K calls), avoids dispatch + function call if typ is Ref: d: dict[str, Any] = {"_kind": "ref", "val": val.val} if val.dis is not None: d["dis"] = val.dis return d # Rare singletons (moved after Ref for frequency ordering) if val is NA: return _NA_V4 if val is REMOVE: return _REMOVE_V4 # Dispatch table for remaining types encoder = _V4_TYPE_ENCODERS.get(typ) if encoder is not None: return encoder(val) if isinstance(val, datetime.datetime): return _encode_datetime_v4(val) if isinstance(val, datetime.date): return {"_kind": "date", "val": val.isoformat()} if isinstance(val, datetime.time): return {"_kind": "time", "val": val.isoformat()} msg = f"Cannot encode {type(val).__name__} as Haystack JSON" raise TypeError(msg) def _encode_number_v4(n: Number) -> Any: """Encode a Number for v4, using plain JSON number when possible.""" if n.unit is None and not math.isnan(n.val) and not math.isinf(n.val): return n.val d: dict[str, Any] = {"_kind": "number"} if math.isnan(n.val): d["val"] = "NaN" elif math.isinf(n.val): d["val"] = "INF" if n.val > 0 else "-INF" else: d["val"] = n.val if n.unit is not None: d["unit"] = n.unit return d # Cache for datetime encoding — few unique datetimes appear many times. _DT_CACHE: dict[datetime.datetime, dict[str, Any]] = {} _DT_CACHE_MAX = 256 def _encode_datetime_v4(dt: datetime.datetime) -> dict[str, Any]: """Encode a datetime for v4, with caching for repeated values.""" cached = _DT_CACHE.get(dt) if cached is not None: return cached d: dict[str, Any] = {"_kind": "dateTime", "val": dt.isoformat()} tz = tz_name(dt) if tz: d["tz"] = tz if len(_DT_CACHE) < _DT_CACHE_MAX: _DT_CACHE[dt] = d return d def _encode_grid_v4(grid: Grid) -> dict[str, Any]: """Encode a Grid as a v4 JSON dict (fully pre-converted).""" meta = {k: _encode_val_v4(v) for k, v in grid.meta.items()} cols = [] for c in grid.cols: col_d: dict[str, Any] = {"name": c.name} if c.meta: col_d["meta"] = {k: _encode_val_v4(v) for k, v in c.meta.items()} cols.append(col_d) rows = [{k: _encode_val_v4(v) for k, v in row.items()} for row in grid.rows] return {"_kind": "grid", "meta": meta, "cols": cols, "rows": rows} def _encode_grid_v4_raw(grid: Grid) -> dict[str, Any]: """Encode a Grid as a v4 dict with raw Haystack values. Values are NOT pre-converted — the caller must use ``orjson.dumps`` with ``default=_orjson_default_v4`` to serialize them. This avoids a full pre-walk of the value tree, letting orjson handle native types (str, int, float, bool, None, list, dict) in C and only calling back into Python for Haystack kind objects. Row and meta dicts are passed by reference (no copy) since orjson reads them read-only during serialization. """ cols = [] for c in grid.cols: col_d: dict[str, Any] = {"name": c.name} if c.meta: col_d["meta"] = c.meta cols.append(col_d) return {"_kind": "grid", "meta": grid.meta, "cols": cols, "rows": grid.rows} # --------------------------------------------------------------------------- # V4 Decoding # --------------------------------------------------------------------------- def _decode_val_v4(obj: Any, _depth: int = 0) -> Any: """Decode a value from Haystack 4 JSON format.""" if _depth > _MAX_DECODE_DEPTH: msg = "Maximum decoding depth exceeded" raise ValueError(msg) # Fast type dispatch — avoids isinstance chain for common JSON types. t = type(obj) if t is str or t is bool or t is int or t is float or obj is None: return obj if t is dict: kind = obj.get("_kind") if kind is not None: # Inline _decode_kind_v4 to avoid function call overhead. decoder = _V4_KIND_DECODERS.get(kind) if decoder is not None: return decoder(obj, _depth) msg = f"Unknown _kind: {kind!r}" raise ValueError(msg) return {k: _decode_val_v4(v, _depth + 1) for k, v in obj.items()} if t is list: return [_decode_val_v4(v, _depth + 1) for v in obj] msg = f"Cannot decode {type(obj).__name__} as Haystack value" raise TypeError(msg) def _decode_kind_v4(kind: str, obj: dict[str, Any], _depth: int = 0) -> Any: """Decode a typed JSON object by its ``_kind`` field.""" decoder = _V4_KIND_DECODERS.get(kind) if decoder is not None: return decoder(obj, _depth) msg = f"Unknown _kind: {kind!r}" raise ValueError(msg) def _decode_marker_v4(_obj: dict[str, Any], _depth: int = 0) -> Marker: return MARKER def _decode_na_v4(_obj: dict[str, Any], _depth: int = 0) -> Na: return NA def _decode_remove_v4(_obj: dict[str, Any], _depth: int = 0) -> Remove: return REMOVE def _decode_number_v4(obj: dict[str, Any], _depth: int = 0) -> Number: val = obj["val"] unit = obj.get("unit") if isinstance(val, str): if val == "NaN": return Number(float("nan"), unit) if val == "INF": return Number(float("inf"), unit) if val == "-INF": return Number(float("-inf"), unit) return Number(float(val), unit) _REF_VAL_RE_MATCH = re.compile(r"^[a-zA-Z0-9_:\-.~]+$").match def _decode_ref_v4(obj: dict[str, Any], _depth: int = 0) -> Ref: # Fast path: bypass __post_init__ for trusted data, but still validate val val = obj["val"] if not isinstance(val, str) or not _REF_VAL_RE_MATCH(val): return Ref(val, obj.get("dis")) # let __post_init__ raise ref = Ref.__new__(Ref) object.__setattr__(ref, "val", val) object.__setattr__(ref, "dis", obj.get("dis")) return ref def _decode_symbol_v4(obj: dict[str, Any], _depth: int = 0) -> Symbol: return Symbol(obj["val"]) def _decode_uri_v4(obj: dict[str, Any], _depth: int = 0) -> Uri: return Uri(obj["val"]) def _decode_coord_v4(obj: dict[str, Any], _depth: int = 0) -> Coord: return Coord(obj["lat"], obj["lng"]) def _decode_xstr_v4(obj: dict[str, Any], _depth: int = 0) -> XStr: return XStr(obj["type"], obj["val"]) def _decode_date_v4(obj: dict[str, Any], _depth: int = 0) -> datetime.date: return datetime.date.fromisoformat(obj["val"]) def _decode_time_v4(obj: dict[str, Any], _depth: int = 0) -> datetime.time: return datetime.time.fromisoformat(obj["val"]) _DECODE_DT_CACHE: dict[tuple[str, str | None], datetime.datetime] = {} _DECODE_DT_CACHE_MAX = 512 def _decode_datetime_v4(obj: dict[str, Any], _depth: int = 0) -> datetime.datetime: val_str: str = obj["val"] tz_name: str | None = obj.get("tz") key = (val_str, tz_name) cached = _DECODE_DT_CACHE.get(key) if cached is not None: return cached dt = datetime.datetime.fromisoformat(val_str) if tz_name: tz = city_to_tz(tz_name) dt = dt.replace(tzinfo=tz) if dt.tzinfo is None else dt.astimezone(tz) if len(_DECODE_DT_CACHE) < _DECODE_DT_CACHE_MAX: _DECODE_DT_CACHE[key] = dt return dt def _decode_dict_v4(obj: dict[str, Any], _depth: int = 0) -> dict[str, Any]: """Decode a dict with explicit ``_kind: 'dict'``.""" return {k: _decode_val_v4(v, _depth + 1) for k, v in obj.items() if k != "_kind"} def _decode_grid_v4(obj: dict[str, Any], _depth: int = 0) -> Grid: meta_raw = obj.get("meta", {}) meta = {k: _decode_val_v4(v, _depth + 1) for k, v in meta_raw.items()} cols_raw = obj.get("cols", []) if len(cols_raw) > _MAX_GRID_COLS: msg = f"Grid exceeds maximum column count ({len(cols_raw)} > {_MAX_GRID_COLS})" raise ValueError(msg) cols = tuple( Col( name=c["name"], meta={k: _decode_val_v4(v, _depth + 1) for k, v in c.get("meta", {}).items()}, ) for c in cols_raw ) rows_raw = obj.get("rows", []) if len(rows_raw) > _MAX_GRID_ROWS: msg = f"Grid exceeds maximum row count ({len(rows_raw)} > {_MAX_GRID_ROWS})" raise ValueError(msg) rows = tuple({k: _decode_val_v4(v, _depth + 1) for k, v in row.items()} for row in rows_raw) return Grid(meta=meta, cols=cols, rows=rows) _V4_KIND_DECODERS: dict[str, Any] = { "marker": _decode_marker_v4, "na": _decode_na_v4, "remove": _decode_remove_v4, "number": _decode_number_v4, "ref": _decode_ref_v4, "symbol": _decode_symbol_v4, "uri": _decode_uri_v4, "coord": _decode_coord_v4, "xstr": _decode_xstr_v4, "date": _decode_date_v4, "time": _decode_time_v4, "dateTime": _decode_datetime_v4, "grid": _decode_grid_v4, "dict": _decode_dict_v4, } # --------------------------------------------------------------------------- # V3 Encoding (Haystack 3 — string prefixes) # --------------------------------------------------------------------------- # Characters that, when followed by ':', form a v3 type prefix. _V3_TYPE_PREFIXES = frozenset("cdhmnrstuxyz-") def _encode_val_v3(val: Any) -> Any: """Encode a value using Haystack 3 JSON format. Uses fast-path checks for common types and dispatch table for kinds. """ if val is None: return None typ = type(val) if typ is str: return _encode_str_v3(val) if typ is int or typ is float: return _encode_number_v3(Number(float(val))) if typ is bool: return val # Singleton identity if val is MARKER: return "m:" if val is NA: return "z:" if val is REMOVE: return "-:" # Dispatch table encoder = _V3_TYPE_ENCODERS.get(typ) if encoder is not None: return encoder(val) # Fallback: subclass and datetime checks if isinstance(val, bool): return val if isinstance(val, datetime.datetime): tz = tz_name(val) or "UTC" return f"t:{val.isoformat()} {tz}" if isinstance(val, datetime.date): return f"d:{val.isoformat()}" if isinstance(val, datetime.time): return f"h:{val.isoformat()}" if isinstance(val, int | float): return _encode_number_v3(Number(float(val))) msg = f"Cannot encode {type(val).__name__} as Haystack JSON" raise TypeError(msg) def _encode_str_v3(s: str) -> str: """Encode a string for v3, adding ``s:`` prefix if ambiguous.""" if len(s) >= 2 and s[1] == ":" and s[0] in _V3_TYPE_PREFIXES: return f"s:{s}" return s def _encode_number_v3(n: Number) -> str: """Encode a Number for v3 as a string with ``n:`` prefix.""" if math.isnan(n.val): num_str = "NaN" elif math.isinf(n.val): num_str = "INF" if n.val > 0 else "-INF" else: num_str = format_num(n.val) if n.unit is not None: return f"n:{num_str} {n.unit}" return f"n:{num_str}" def _encode_grid_v3(grid: Grid) -> dict[str, Any]: """Encode a Grid as a v3 JSON dict (no ``_kind``, flat col meta).""" meta = {k: _encode_val_v3(v) for k, v in grid.meta.items()} cols = [] for c in grid.cols: col_d: dict[str, Any] = {"name": c.name} for k, v in c.meta.items(): col_d[k] = _encode_val_v3(v) cols.append(col_d) rows = [{k: _encode_val_v3(v) for k, v in row.items()} for row in grid.rows] return {"meta": meta, "cols": cols, "rows": rows} def _encode_number_v3_dispatch(val: Any) -> str: return _encode_number_v3(val) def _encode_ref_v3(val: Any) -> str: if val.dis is not None: return f"r:{val.val} {val.dis}" return f"r:{val.val}" def _encode_symbol_v3(val: Any) -> str: return f"y:{val.val}" def _encode_uri_v3(val: Any) -> str: return f"u:{val.val}" def _encode_coord_v3(val: Any) -> str: return f"c:{val.lat},{val.lng}" def _encode_xstr_v3(val: Any) -> str: return f"x:{val.type_name}:{val.val}" def _encode_grid_v3_dispatch(val: Any) -> dict[str, Any]: return _encode_grid_v3(val) def _encode_list_v3(val: Any) -> list[Any]: return [_encode_val_v3(v) for v in val] def _encode_dict_v3(val: Any) -> dict[str, Any]: return {k: _encode_val_v3(v) for k, v in val.items()} _V3_TYPE_ENCODERS: dict[type, Any] = { Number: _encode_number_v3_dispatch, Ref: _encode_ref_v3, Symbol: _encode_symbol_v3, Uri: _encode_uri_v3, Coord: _encode_coord_v3, XStr: _encode_xstr_v3, Grid: _encode_grid_v3_dispatch, list: _encode_list_v3, dict: _encode_dict_v3, } # --------------------------------------------------------------------------- # V3 Decoding (Haystack 3 — string prefixes) # --------------------------------------------------------------------------- def _decode_val_v3(obj: Any, _depth: int = 0) -> Any: """Decode a value from Haystack 3 JSON format.""" if _depth > _MAX_DECODE_DEPTH: msg = "Maximum decoding depth exceeded" raise ValueError(msg) if obj is None: return None if isinstance(obj, bool): return obj if isinstance(obj, int | float): return obj if isinstance(obj, str): return _decode_str_v3(obj) if isinstance(obj, list): return [_decode_val_v3(v, _depth + 1) for v in obj] if isinstance(obj, dict): return {k: _decode_val_v3(v, _depth + 1) for k, v in obj.items()} msg = f"Cannot decode {type(obj).__name__} as Haystack value" raise TypeError(msg) def _decode_str_v3(s: str) -> Any: """Decode a v3 type-prefixed string.""" if len(s) >= 2 and s[1] == ":": decoder = _V3_STR_DECODERS.get(s[0]) if decoder is not None: return decoder(s[2:]) return s def _v3_marker(_rest: str) -> Marker: return MARKER def _v3_na(_rest: str) -> Na: return NA def _v3_remove(_rest: str) -> Remove: return REMOVE def _v3_str(rest: str) -> str: return rest def _v3_number(rest: str) -> Number: """Parse ``"45.5"`` or ``"45.5 °F"``.""" parts = rest.split(" ", 1) num_str = parts[0] unit = parts[1] if len(parts) > 1 else None if num_str == "NaN": return Number(float("nan"), unit) if num_str == "INF": return Number(float("inf"), unit) if num_str == "-INF": return Number(float("-inf"), unit) return Number(float(num_str), unit) def _v3_ref(rest: str) -> Ref: """Parse ``"abc-123"`` or ``"abc-123 Display Name"``.""" parts = rest.split(" ", 1) return Ref(parts[0], parts[1] if len(parts) > 1 else None) def _v3_symbol(rest: str) -> Symbol: return Symbol(rest) def _v3_date(rest: str) -> datetime.date: return datetime.date.fromisoformat(rest) def _v3_time(rest: str) -> datetime.time: return datetime.time.fromisoformat(rest) def _v3_datetime(rest: str) -> datetime.datetime: """Parse ``"ISO8601 Timezone"``.""" if " " in rest: iso_part, tz_name = rest.rsplit(" ", 1) dt = datetime.datetime.fromisoformat(iso_part) dt = dt.astimezone(city_to_tz(tz_name)) return dt return datetime.datetime.fromisoformat(rest) def _v3_uri(rest: str) -> Uri: return Uri(rest) def _v3_coord(rest: str) -> Coord: """Parse ``"lat,lng"``.""" lat_s, lng_s = rest.split(",") return Coord(float(lat_s), float(lng_s)) def _v3_xstr(rest: str) -> XStr: """Parse ``"Type:value"``.""" idx = rest.find(":") if idx < 0: msg = f"Invalid XStr format (missing ':' separator): {rest!r}" raise ValueError(msg) return XStr(rest[:idx], rest[idx + 1 :]) _V3_STR_DECODERS: dict[str, Any] = { "m": _v3_marker, "z": _v3_na, "-": _v3_remove, "s": _v3_str, "n": _v3_number, "r": _v3_ref, "y": _v3_symbol, "d": _v3_date, "h": _v3_time, "t": _v3_datetime, "u": _v3_uri, "c": _v3_coord, "x": _v3_xstr, } def _decode_grid_v3(obj: dict[str, Any], _depth: int = 0) -> Grid: """Decode a v3 grid JSON object (flat col meta, no ``_kind``).""" meta_raw = obj.get("meta", {}) meta = {k: _decode_val_v3(v, _depth + 1) for k, v in meta_raw.items()} cols_raw = obj.get("cols", []) if len(cols_raw) > _MAX_GRID_COLS: msg = f"Grid exceeds maximum column count ({len(cols_raw)} > {_MAX_GRID_COLS})" raise ValueError(msg) cols = tuple( Col( name=c["name"], meta={k: _decode_val_v3(v, _depth + 1) for k, v in c.items() if k != "name"}, ) for c in cols_raw ) rows_raw = obj.get("rows", []) if len(rows_raw) > _MAX_GRID_ROWS: msg = f"Grid exceeds maximum row count ({len(rows_raw)} > {_MAX_GRID_ROWS})" raise ValueError(msg) rows = tuple({k: _decode_val_v3(v, _depth + 1) for k, v in row.items()} for row in rows_raw) return Grid(meta=meta, cols=cols, rows=rows) # --------------------------------------------------------------------------- # Pythonic transform (decode-only) # --------------------------------------------------------------------------- _PYTHONIC_TYPES = (Marker, Number, Symbol, Uri, list, dict, Grid) def _to_pythonic(val: Any) -> Any: """Convert Haystack types to native Python equivalents where possible. - Marker → True - Number (unitless) → float - Symbol → str - Uri → str """ if not isinstance(val, _PYTHONIC_TYPES): return val if isinstance(val, Marker): return True if isinstance(val, Number): return val.val if val.unit is None else val if isinstance(val, Symbol): return val.val if isinstance(val, Uri): return val.val if isinstance(val, list): return [_to_pythonic(v) for v in val] if isinstance(val, dict): return {k: _to_pythonic(v) for k, v in val.items()} # Grid return _pythonic_grid(val) def _pythonic_grid(grid: Grid) -> Grid: """Apply pythonic transform to all values in a Grid.""" meta = {k: _to_pythonic(v) for k, v in grid.meta.items()} cols = tuple( Col(name=c.name, meta={k: _to_pythonic(v) for k, v in c.meta.items()}) for c in grid.cols ) rows = tuple({k: _to_pythonic(v) for k, v in row.items()} for row in grid.rows) return Grid(meta=meta, cols=cols, rows=rows)