"""Haystack JSON encoding and decoding.
Supports both Haystack 4 (v4) and Haystack 3 (v3) JSON formats, with an
optional pythonic decode mode that converts Haystack types to native Python
equivalents where possible.
See: https://project-haystack.org/doc/docHaystack/Json
"""
from __future__ import annotations
import datetime
import math
import re
from enum import Enum
from typing import Any
import orjson
from hs_py.encoding.scanner import city_to_tz, format_num, tz_name
from hs_py.grid import Col, Grid
from hs_py.kinds import (
MARKER,
NA,
REMOVE,
Coord,
Marker,
Na,
Number,
Ref,
Remove,
Symbol,
Uri,
XStr,
)
__all__ = [
"JsonVersion",
"decode_grid",
"decode_grid_dict",
"decode_val",
"encode_grid",
"encode_grid_dict",
"encode_val",
]
[docs]
class JsonVersion(Enum):
"""Haystack JSON encoding version."""
V3 = "v3"
"""Haystack 3 JSON — type-prefixed strings (e.g. ``"n:42 °F"``)."""
V4 = "v4"
"""Haystack 4 JSON — ``_kind`` object wrappers."""
# Maximum recursion depth for JSON decoding to prevent stack overflow.
_MAX_DECODE_DEPTH = 64
# Maximum number of rows/columns in a decoded grid to prevent memory exhaustion.
_MAX_GRID_ROWS = 100_000
_MAX_GRID_COLS = 10_000
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
[docs]
def encode_val(val: Any, *, version: JsonVersion = JsonVersion.V4) -> Any:
"""Encode a single Haystack value to its JSON-compatible representation.
:param val: Haystack value to encode.
:param version: JSON encoding version to use.
:returns: JSON-serializable Python object.
"""
if version is JsonVersion.V3:
return _encode_val_v3(val)
return _encode_val_v4(val)
[docs]
def decode_val(
obj: Any,
*,
version: JsonVersion = JsonVersion.V4,
pythonic: bool = False,
) -> Any:
"""Decode a JSON value to a Haystack kind.
:param obj: JSON-deserialized value.
:param version: JSON encoding version to decode.
:param pythonic: If ``True``, convert to native Python types where possible.
:class:`~hs_py.kinds.Marker` becomes ``True``, unitless
:class:`~hs_py.kinds.Number` becomes ``float``,
:class:`~hs_py.kinds.Symbol` and :class:`~hs_py.kinds.Uri` become ``str``.
:returns: Decoded Haystack value.
"""
result = _decode_val_v3(obj) if version is JsonVersion.V3 else _decode_val_v4(obj)
if pythonic:
return _to_pythonic(result)
return result
[docs]
def encode_grid(grid: Grid, *, version: JsonVersion = JsonVersion.V4) -> bytes:
"""Encode a :class:`~hs_py.grid.Grid` to Haystack JSON bytes.
:param grid: Grid to encode.
:param version: JSON encoding version to use.
:returns: JSON-encoded bytes via :mod:`orjson`.
"""
if version is JsonVersion.V3:
return orjson.dumps(_encode_grid_v3(grid))
return orjson.dumps(
_encode_grid_v4_raw(grid),
default=_orjson_default_v4,
option=orjson.OPT_PASSTHROUGH_DATACLASS | orjson.OPT_PASSTHROUGH_DATETIME,
)
[docs]
def encode_grid_dict(grid: Grid, *, version: JsonVersion = JsonVersion.V4) -> dict[str, Any]:
"""Encode a :class:`~hs_py.grid.Grid` to a JSON-compatible dict (no serialization).
Use this when embedding a grid dict inside a larger JSON structure
to avoid the overhead of serializing to bytes and back.
:param grid: Grid to encode.
:param version: JSON encoding version to use.
:returns: JSON-serializable dict.
"""
if version is JsonVersion.V3:
return _encode_grid_v3(grid)
return _encode_grid_v4(grid)
[docs]
def decode_grid_dict(
obj: dict[str, Any],
*,
version: JsonVersion = JsonVersion.V4,
pythonic: bool = False,
) -> Grid:
"""Decode a pre-parsed JSON dict to a :class:`~hs_py.grid.Grid`.
Use this when the JSON has already been deserialized (e.g. from a
WebSocket message) to avoid an unnecessary ``orjson.dumps`` /
``orjson.loads`` round-trip.
:param obj: JSON-deserialized dict representing a grid.
:param version: JSON encoding version to decode.
:param pythonic: If ``True``, convert values to native Python types.
:returns: Decoded :class:`~hs_py.grid.Grid`.
"""
grid = _decode_grid_v3(obj) if version is JsonVersion.V3 else _decode_grid_v4(obj)
if pythonic:
return _pythonic_grid(grid)
return grid
[docs]
def decode_grid(
data: bytes,
*,
version: JsonVersion = JsonVersion.V4,
pythonic: bool = False,
) -> Grid:
"""Decode Haystack JSON bytes to a :class:`~hs_py.grid.Grid`.
:param data: JSON bytes.
:param version: JSON encoding version to decode.
:param pythonic: If ``True``, convert values to native Python types.
:returns: Decoded :class:`~hs_py.grid.Grid`.
"""
obj = orjson.loads(data)
grid = _decode_grid_v3(obj) if version is JsonVersion.V3 else _decode_grid_v4(obj)
if pythonic:
return _pythonic_grid(grid)
return grid
# ---------------------------------------------------------------------------
# V4 Encoding (Haystack 4 — _kind objects)
# ---------------------------------------------------------------------------
def _encode_val_v4(val: Any) -> Any:
"""Encode a value using Haystack 4 JSON format.
Uses a type dispatch table for O(1) lookup of Haystack kind encoders,
with fast-path checks for the most common types (None, str, int, float).
"""
# Fast path: most common types by identity/type
if val is None:
return None
typ = type(val)
if typ is str:
return val
if typ is int or typ is float:
return val
if typ is bool:
return val
# Singleton identity checks (guaranteed single-instance)
if val is MARKER:
return _MARKER_V4
if val is NA:
return _NA_V4
if val is REMOVE:
return _REMOVE_V4
# Dispatch table for Haystack kind types
encoder = _V4_TYPE_ENCODERS.get(typ)
if encoder is not None:
return encoder(val)
# Fallback: subclass and container checks
if isinstance(val, bool):
return val
if isinstance(val, datetime.datetime):
return _encode_datetime_v4(val)
if isinstance(val, datetime.date):
return {"_kind": "date", "val": val.isoformat()}
if isinstance(val, datetime.time):
return {"_kind": "time", "val": val.isoformat()}
if isinstance(val, int | float):
return val
msg = f"Cannot encode {type(val).__name__} as Haystack JSON"
raise TypeError(msg)
# Pre-built singleton dicts (avoid allocation per call)
_MARKER_V4: dict[str, str] = {"_kind": "marker"}
_NA_V4: dict[str, str] = {"_kind": "na"}
_REMOVE_V4: dict[str, str] = {"_kind": "remove"}
def _encode_number_v4_dispatch(val: Any) -> Any:
return _encode_number_v4(val)
def _encode_ref_v4(val: Any) -> dict[str, Any]:
d: dict[str, Any] = {"_kind": "ref", "val": val.val}
if val.dis is not None:
d["dis"] = val.dis
return d
def _encode_symbol_v4(val: Any) -> dict[str, str]:
return {"_kind": "symbol", "val": val.val}
def _encode_uri_v4(val: Any) -> dict[str, str]:
return {"_kind": "uri", "val": val.val}
def _encode_coord_v4(val: Any) -> dict[str, Any]:
return {"_kind": "coord", "lat": val.lat, "lng": val.lng}
def _encode_xstr_v4(val: Any) -> dict[str, str]:
return {"_kind": "xstr", "type": val.type_name, "val": val.val}
def _encode_grid_v4_dispatch(val: Any) -> dict[str, Any]:
return _encode_grid_v4(val)
def _encode_list_v4(val: Any) -> list[Any]:
return [_encode_val_v4(v) for v in val]
def _encode_dict_v4(val: Any) -> dict[str, Any]:
return {k: _encode_val_v4(v) for k, v in val.items()}
# Type → encoder dispatch table (exact type match, no inheritance)
_V4_TYPE_ENCODERS: dict[type, Any] = {
Number: _encode_number_v4_dispatch,
Ref: _encode_ref_v4,
Symbol: _encode_symbol_v4,
Uri: _encode_uri_v4,
Coord: _encode_coord_v4,
XStr: _encode_xstr_v4,
Grid: _encode_grid_v4_dispatch,
list: _encode_list_v4,
dict: _encode_dict_v4,
# datetime types handled in isinstance fallback (subclass ordering matters)
}
def _orjson_default_v4(val: Any) -> Any:
"""Serialize a Haystack type for orjson's ``default`` hook.
Called by orjson for any object it cannot natively serialize.
Ordered by frequency: Marker (22K) > Ref (10K) > others.
"""
# Singleton: most common non-native type (~22K calls per /read)
if val is MARKER:
return _MARKER_V4
typ = type(val)
# Inline Ref: 2nd most common (~10K calls), avoids dispatch + function call
if typ is Ref:
d: dict[str, Any] = {"_kind": "ref", "val": val.val}
if val.dis is not None:
d["dis"] = val.dis
return d
# Rare singletons (moved after Ref for frequency ordering)
if val is NA:
return _NA_V4
if val is REMOVE:
return _REMOVE_V4
# Dispatch table for remaining types
encoder = _V4_TYPE_ENCODERS.get(typ)
if encoder is not None:
return encoder(val)
if isinstance(val, datetime.datetime):
return _encode_datetime_v4(val)
if isinstance(val, datetime.date):
return {"_kind": "date", "val": val.isoformat()}
if isinstance(val, datetime.time):
return {"_kind": "time", "val": val.isoformat()}
msg = f"Cannot encode {type(val).__name__} as Haystack JSON"
raise TypeError(msg)
def _encode_number_v4(n: Number) -> Any:
"""Encode a Number for v4, using plain JSON number when possible."""
if n.unit is None and not math.isnan(n.val) and not math.isinf(n.val):
return n.val
d: dict[str, Any] = {"_kind": "number"}
if math.isnan(n.val):
d["val"] = "NaN"
elif math.isinf(n.val):
d["val"] = "INF" if n.val > 0 else "-INF"
else:
d["val"] = n.val
if n.unit is not None:
d["unit"] = n.unit
return d
# Cache for datetime encoding — few unique datetimes appear many times.
_DT_CACHE: dict[datetime.datetime, dict[str, Any]] = {}
_DT_CACHE_MAX = 256
def _encode_datetime_v4(dt: datetime.datetime) -> dict[str, Any]:
"""Encode a datetime for v4, with caching for repeated values."""
cached = _DT_CACHE.get(dt)
if cached is not None:
return cached
d: dict[str, Any] = {"_kind": "dateTime", "val": dt.isoformat()}
tz = tz_name(dt)
if tz:
d["tz"] = tz
if len(_DT_CACHE) < _DT_CACHE_MAX:
_DT_CACHE[dt] = d
return d
def _encode_grid_v4(grid: Grid) -> dict[str, Any]:
"""Encode a Grid as a v4 JSON dict (fully pre-converted)."""
meta = {k: _encode_val_v4(v) for k, v in grid.meta.items()}
cols = []
for c in grid.cols:
col_d: dict[str, Any] = {"name": c.name}
if c.meta:
col_d["meta"] = {k: _encode_val_v4(v) for k, v in c.meta.items()}
cols.append(col_d)
rows = [{k: _encode_val_v4(v) for k, v in row.items()} for row in grid.rows]
return {"_kind": "grid", "meta": meta, "cols": cols, "rows": rows}
def _encode_grid_v4_raw(grid: Grid) -> dict[str, Any]:
"""Encode a Grid as a v4 dict with raw Haystack values.
Values are NOT pre-converted — the caller must use ``orjson.dumps``
with ``default=_orjson_default_v4`` to serialize them. This avoids a
full pre-walk of the value tree, letting orjson handle native types
(str, int, float, bool, None, list, dict) in C and only calling back
into Python for Haystack kind objects.
Row and meta dicts are passed by reference (no copy) since orjson
reads them read-only during serialization.
"""
cols = []
for c in grid.cols:
col_d: dict[str, Any] = {"name": c.name}
if c.meta:
col_d["meta"] = c.meta
cols.append(col_d)
return {"_kind": "grid", "meta": grid.meta, "cols": cols, "rows": grid.rows}
# ---------------------------------------------------------------------------
# V4 Decoding
# ---------------------------------------------------------------------------
def _decode_val_v4(obj: Any, _depth: int = 0) -> Any:
"""Decode a value from Haystack 4 JSON format."""
if _depth > _MAX_DECODE_DEPTH:
msg = "Maximum decoding depth exceeded"
raise ValueError(msg)
# Fast type dispatch — avoids isinstance chain for common JSON types.
t = type(obj)
if t is str or t is bool or t is int or t is float or obj is None:
return obj
if t is dict:
kind = obj.get("_kind")
if kind is not None:
# Inline _decode_kind_v4 to avoid function call overhead.
decoder = _V4_KIND_DECODERS.get(kind)
if decoder is not None:
return decoder(obj, _depth)
msg = f"Unknown _kind: {kind!r}"
raise ValueError(msg)
return {k: _decode_val_v4(v, _depth + 1) for k, v in obj.items()}
if t is list:
return [_decode_val_v4(v, _depth + 1) for v in obj]
msg = f"Cannot decode {type(obj).__name__} as Haystack value"
raise TypeError(msg)
def _decode_kind_v4(kind: str, obj: dict[str, Any], _depth: int = 0) -> Any:
"""Decode a typed JSON object by its ``_kind`` field."""
decoder = _V4_KIND_DECODERS.get(kind)
if decoder is not None:
return decoder(obj, _depth)
msg = f"Unknown _kind: {kind!r}"
raise ValueError(msg)
def _decode_marker_v4(_obj: dict[str, Any], _depth: int = 0) -> Marker:
return MARKER
def _decode_na_v4(_obj: dict[str, Any], _depth: int = 0) -> Na:
return NA
def _decode_remove_v4(_obj: dict[str, Any], _depth: int = 0) -> Remove:
return REMOVE
def _decode_number_v4(obj: dict[str, Any], _depth: int = 0) -> Number:
val = obj["val"]
unit = obj.get("unit")
if isinstance(val, str):
if val == "NaN":
return Number(float("nan"), unit)
if val == "INF":
return Number(float("inf"), unit)
if val == "-INF":
return Number(float("-inf"), unit)
return Number(float(val), unit)
_REF_VAL_RE_MATCH = re.compile(r"^[a-zA-Z0-9_:\-.~]+$").match
def _decode_ref_v4(obj: dict[str, Any], _depth: int = 0) -> Ref:
# Fast path: bypass __post_init__ for trusted data, but still validate val
val = obj["val"]
if not isinstance(val, str) or not _REF_VAL_RE_MATCH(val):
return Ref(val, obj.get("dis")) # let __post_init__ raise
ref = Ref.__new__(Ref)
object.__setattr__(ref, "val", val)
object.__setattr__(ref, "dis", obj.get("dis"))
return ref
def _decode_symbol_v4(obj: dict[str, Any], _depth: int = 0) -> Symbol:
return Symbol(obj["val"])
def _decode_uri_v4(obj: dict[str, Any], _depth: int = 0) -> Uri:
return Uri(obj["val"])
def _decode_coord_v4(obj: dict[str, Any], _depth: int = 0) -> Coord:
return Coord(obj["lat"], obj["lng"])
def _decode_xstr_v4(obj: dict[str, Any], _depth: int = 0) -> XStr:
return XStr(obj["type"], obj["val"])
def _decode_date_v4(obj: dict[str, Any], _depth: int = 0) -> datetime.date:
return datetime.date.fromisoformat(obj["val"])
def _decode_time_v4(obj: dict[str, Any], _depth: int = 0) -> datetime.time:
return datetime.time.fromisoformat(obj["val"])
_DECODE_DT_CACHE: dict[tuple[str, str | None], datetime.datetime] = {}
_DECODE_DT_CACHE_MAX = 512
def _decode_datetime_v4(obj: dict[str, Any], _depth: int = 0) -> datetime.datetime:
val_str: str = obj["val"]
tz_name: str | None = obj.get("tz")
key = (val_str, tz_name)
cached = _DECODE_DT_CACHE.get(key)
if cached is not None:
return cached
dt = datetime.datetime.fromisoformat(val_str)
if tz_name:
tz = city_to_tz(tz_name)
dt = dt.replace(tzinfo=tz) if dt.tzinfo is None else dt.astimezone(tz)
if len(_DECODE_DT_CACHE) < _DECODE_DT_CACHE_MAX:
_DECODE_DT_CACHE[key] = dt
return dt
def _decode_dict_v4(obj: dict[str, Any], _depth: int = 0) -> dict[str, Any]:
"""Decode a dict with explicit ``_kind: 'dict'``."""
return {k: _decode_val_v4(v, _depth + 1) for k, v in obj.items() if k != "_kind"}
def _decode_grid_v4(obj: dict[str, Any], _depth: int = 0) -> Grid:
meta_raw = obj.get("meta", {})
meta = {k: _decode_val_v4(v, _depth + 1) for k, v in meta_raw.items()}
cols_raw = obj.get("cols", [])
if len(cols_raw) > _MAX_GRID_COLS:
msg = f"Grid exceeds maximum column count ({len(cols_raw)} > {_MAX_GRID_COLS})"
raise ValueError(msg)
cols = tuple(
Col(
name=c["name"],
meta={k: _decode_val_v4(v, _depth + 1) for k, v in c.get("meta", {}).items()},
)
for c in cols_raw
)
rows_raw = obj.get("rows", [])
if len(rows_raw) > _MAX_GRID_ROWS:
msg = f"Grid exceeds maximum row count ({len(rows_raw)} > {_MAX_GRID_ROWS})"
raise ValueError(msg)
rows = tuple({k: _decode_val_v4(v, _depth + 1) for k, v in row.items()} for row in rows_raw)
return Grid(meta=meta, cols=cols, rows=rows)
_V4_KIND_DECODERS: dict[str, Any] = {
"marker": _decode_marker_v4,
"na": _decode_na_v4,
"remove": _decode_remove_v4,
"number": _decode_number_v4,
"ref": _decode_ref_v4,
"symbol": _decode_symbol_v4,
"uri": _decode_uri_v4,
"coord": _decode_coord_v4,
"xstr": _decode_xstr_v4,
"date": _decode_date_v4,
"time": _decode_time_v4,
"dateTime": _decode_datetime_v4,
"grid": _decode_grid_v4,
"dict": _decode_dict_v4,
}
# ---------------------------------------------------------------------------
# V3 Encoding (Haystack 3 — string prefixes)
# ---------------------------------------------------------------------------
# Characters that, when followed by ':', form a v3 type prefix.
_V3_TYPE_PREFIXES = frozenset("cdhmnrstuxyz-")
def _encode_val_v3(val: Any) -> Any:
"""Encode a value using Haystack 3 JSON format.
Uses fast-path checks for common types and dispatch table for kinds.
"""
if val is None:
return None
typ = type(val)
if typ is str:
return _encode_str_v3(val)
if typ is int or typ is float:
return _encode_number_v3(Number(float(val)))
if typ is bool:
return val
# Singleton identity
if val is MARKER:
return "m:"
if val is NA:
return "z:"
if val is REMOVE:
return "-:"
# Dispatch table
encoder = _V3_TYPE_ENCODERS.get(typ)
if encoder is not None:
return encoder(val)
# Fallback: subclass and datetime checks
if isinstance(val, bool):
return val
if isinstance(val, datetime.datetime):
tz = tz_name(val) or "UTC"
return f"t:{val.isoformat()} {tz}"
if isinstance(val, datetime.date):
return f"d:{val.isoformat()}"
if isinstance(val, datetime.time):
return f"h:{val.isoformat()}"
if isinstance(val, int | float):
return _encode_number_v3(Number(float(val)))
msg = f"Cannot encode {type(val).__name__} as Haystack JSON"
raise TypeError(msg)
def _encode_str_v3(s: str) -> str:
"""Encode a string for v3, adding ``s:`` prefix if ambiguous."""
if len(s) >= 2 and s[1] == ":" and s[0] in _V3_TYPE_PREFIXES:
return f"s:{s}"
return s
def _encode_number_v3(n: Number) -> str:
"""Encode a Number for v3 as a string with ``n:`` prefix."""
if math.isnan(n.val):
num_str = "NaN"
elif math.isinf(n.val):
num_str = "INF" if n.val > 0 else "-INF"
else:
num_str = format_num(n.val)
if n.unit is not None:
return f"n:{num_str} {n.unit}"
return f"n:{num_str}"
def _encode_grid_v3(grid: Grid) -> dict[str, Any]:
"""Encode a Grid as a v3 JSON dict (no ``_kind``, flat col meta)."""
meta = {k: _encode_val_v3(v) for k, v in grid.meta.items()}
cols = []
for c in grid.cols:
col_d: dict[str, Any] = {"name": c.name}
for k, v in c.meta.items():
col_d[k] = _encode_val_v3(v)
cols.append(col_d)
rows = [{k: _encode_val_v3(v) for k, v in row.items()} for row in grid.rows]
return {"meta": meta, "cols": cols, "rows": rows}
def _encode_number_v3_dispatch(val: Any) -> str:
return _encode_number_v3(val)
def _encode_ref_v3(val: Any) -> str:
if val.dis is not None:
return f"r:{val.val} {val.dis}"
return f"r:{val.val}"
def _encode_symbol_v3(val: Any) -> str:
return f"y:{val.val}"
def _encode_uri_v3(val: Any) -> str:
return f"u:{val.val}"
def _encode_coord_v3(val: Any) -> str:
return f"c:{val.lat},{val.lng}"
def _encode_xstr_v3(val: Any) -> str:
return f"x:{val.type_name}:{val.val}"
def _encode_grid_v3_dispatch(val: Any) -> dict[str, Any]:
return _encode_grid_v3(val)
def _encode_list_v3(val: Any) -> list[Any]:
return [_encode_val_v3(v) for v in val]
def _encode_dict_v3(val: Any) -> dict[str, Any]:
return {k: _encode_val_v3(v) for k, v in val.items()}
_V3_TYPE_ENCODERS: dict[type, Any] = {
Number: _encode_number_v3_dispatch,
Ref: _encode_ref_v3,
Symbol: _encode_symbol_v3,
Uri: _encode_uri_v3,
Coord: _encode_coord_v3,
XStr: _encode_xstr_v3,
Grid: _encode_grid_v3_dispatch,
list: _encode_list_v3,
dict: _encode_dict_v3,
}
# ---------------------------------------------------------------------------
# V3 Decoding (Haystack 3 — string prefixes)
# ---------------------------------------------------------------------------
def _decode_val_v3(obj: Any, _depth: int = 0) -> Any:
"""Decode a value from Haystack 3 JSON format."""
if _depth > _MAX_DECODE_DEPTH:
msg = "Maximum decoding depth exceeded"
raise ValueError(msg)
if obj is None:
return None
if isinstance(obj, bool):
return obj
if isinstance(obj, int | float):
return obj
if isinstance(obj, str):
return _decode_str_v3(obj)
if isinstance(obj, list):
return [_decode_val_v3(v, _depth + 1) for v in obj]
if isinstance(obj, dict):
return {k: _decode_val_v3(v, _depth + 1) for k, v in obj.items()}
msg = f"Cannot decode {type(obj).__name__} as Haystack value"
raise TypeError(msg)
def _decode_str_v3(s: str) -> Any:
"""Decode a v3 type-prefixed string."""
if len(s) >= 2 and s[1] == ":":
decoder = _V3_STR_DECODERS.get(s[0])
if decoder is not None:
return decoder(s[2:])
return s
def _v3_marker(_rest: str) -> Marker:
return MARKER
def _v3_na(_rest: str) -> Na:
return NA
def _v3_remove(_rest: str) -> Remove:
return REMOVE
def _v3_str(rest: str) -> str:
return rest
def _v3_number(rest: str) -> Number:
"""Parse ``"45.5"`` or ``"45.5 °F"``."""
parts = rest.split(" ", 1)
num_str = parts[0]
unit = parts[1] if len(parts) > 1 else None
if num_str == "NaN":
return Number(float("nan"), unit)
if num_str == "INF":
return Number(float("inf"), unit)
if num_str == "-INF":
return Number(float("-inf"), unit)
return Number(float(num_str), unit)
def _v3_ref(rest: str) -> Ref:
"""Parse ``"abc-123"`` or ``"abc-123 Display Name"``."""
parts = rest.split(" ", 1)
return Ref(parts[0], parts[1] if len(parts) > 1 else None)
def _v3_symbol(rest: str) -> Symbol:
return Symbol(rest)
def _v3_date(rest: str) -> datetime.date:
return datetime.date.fromisoformat(rest)
def _v3_time(rest: str) -> datetime.time:
return datetime.time.fromisoformat(rest)
def _v3_datetime(rest: str) -> datetime.datetime:
"""Parse ``"ISO8601 Timezone"``."""
if " " in rest:
iso_part, tz_name = rest.rsplit(" ", 1)
dt = datetime.datetime.fromisoformat(iso_part)
dt = dt.astimezone(city_to_tz(tz_name))
return dt
return datetime.datetime.fromisoformat(rest)
def _v3_uri(rest: str) -> Uri:
return Uri(rest)
def _v3_coord(rest: str) -> Coord:
"""Parse ``"lat,lng"``."""
lat_s, lng_s = rest.split(",")
return Coord(float(lat_s), float(lng_s))
def _v3_xstr(rest: str) -> XStr:
"""Parse ``"Type:value"``."""
idx = rest.find(":")
if idx < 0:
msg = f"Invalid XStr format (missing ':' separator): {rest!r}"
raise ValueError(msg)
return XStr(rest[:idx], rest[idx + 1 :])
_V3_STR_DECODERS: dict[str, Any] = {
"m": _v3_marker,
"z": _v3_na,
"-": _v3_remove,
"s": _v3_str,
"n": _v3_number,
"r": _v3_ref,
"y": _v3_symbol,
"d": _v3_date,
"h": _v3_time,
"t": _v3_datetime,
"u": _v3_uri,
"c": _v3_coord,
"x": _v3_xstr,
}
def _decode_grid_v3(obj: dict[str, Any], _depth: int = 0) -> Grid:
"""Decode a v3 grid JSON object (flat col meta, no ``_kind``)."""
meta_raw = obj.get("meta", {})
meta = {k: _decode_val_v3(v, _depth + 1) for k, v in meta_raw.items()}
cols_raw = obj.get("cols", [])
if len(cols_raw) > _MAX_GRID_COLS:
msg = f"Grid exceeds maximum column count ({len(cols_raw)} > {_MAX_GRID_COLS})"
raise ValueError(msg)
cols = tuple(
Col(
name=c["name"],
meta={k: _decode_val_v3(v, _depth + 1) for k, v in c.items() if k != "name"},
)
for c in cols_raw
)
rows_raw = obj.get("rows", [])
if len(rows_raw) > _MAX_GRID_ROWS:
msg = f"Grid exceeds maximum row count ({len(rows_raw)} > {_MAX_GRID_ROWS})"
raise ValueError(msg)
rows = tuple({k: _decode_val_v3(v, _depth + 1) for k, v in row.items()} for row in rows_raw)
return Grid(meta=meta, cols=cols, rows=rows)
# ---------------------------------------------------------------------------
# Pythonic transform (decode-only)
# ---------------------------------------------------------------------------
_PYTHONIC_TYPES = (Marker, Number, Symbol, Uri, list, dict, Grid)
def _to_pythonic(val: Any) -> Any:
"""Convert Haystack types to native Python equivalents where possible.
- Marker → True
- Number (unitless) → float
- Symbol → str
- Uri → str
"""
if not isinstance(val, _PYTHONIC_TYPES):
return val
if isinstance(val, Marker):
return True
if isinstance(val, Number):
return val.val if val.unit is None else val
if isinstance(val, Symbol):
return val.val
if isinstance(val, Uri):
return val.val
if isinstance(val, list):
return [_to_pythonic(v) for v in val]
if isinstance(val, dict):
return {k: _to_pythonic(v) for k, v in val.items()}
# Grid
return _pythonic_grid(val)
def _pythonic_grid(grid: Grid) -> Grid:
"""Apply pythonic transform to all values in a Grid."""
meta = {k: _to_pythonic(v) for k, v in grid.meta.items()}
cols = tuple(
Col(name=c.name, meta={k: _to_pythonic(v) for k, v in c.meta.items()}) for c in grid.cols
)
rows = tuple({k: _to_pythonic(v) for k, v in row.items()} for row in grid.rows)
return Grid(meta=meta, cols=cols, rows=rows)