Source code for hs_py.encoding.csv
"""Haystack CSV encoding.
CSV is a lossy text format for grids — metadata, column meta, and type
information are discarded. It is useful for exporting grid data to
spreadsheets and other tools that consume RFC 4180 CSV.
See: https://project-haystack.org/doc/docHaystack/Csv
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from hs_py.encoding.scanner import format_number, format_ref
from hs_py.kinds import Marker, Na, Number, Ref, Remove, Uri
if TYPE_CHECKING:
from hs_py.grid import Grid
__all__ = [
"encode_grid",
]
[docs]
def encode_grid(grid: Grid) -> str:
"""Encode a Grid as CSV text.
Column headers use the ``dis`` metadata value when present,
otherwise the programmatic column name. Grid and column metadata
are discarded. Type information is simplified per the Haystack
CSV spec.
:param grid: Grid to encode.
:returns: CSV-formatted string (with trailing newline).
"""
lines: list[str] = []
# Header row: display names
headers: list[str] = []
for col in grid.cols:
dis = col.meta.get("dis", col.name)
headers.append(_escape_cell(str(dis)))
lines.append(",".join(headers))
# Data rows
for row in grid.rows:
cells: list[str] = []
for col in grid.cols:
val = row.get(col.name)
cells.append(_escape_cell(_encode_val(val)))
lines.append(",".join(cells))
return "\n".join(lines) + "\n"
# ---------------------------------------------------------------------------
# Value encoding
# ---------------------------------------------------------------------------
def _encode_val(val: Any) -> str:
"""Encode a Haystack value for CSV output."""
if val is None:
return ""
if isinstance(val, Marker):
return "\u2713"
if isinstance(val, Na):
return ""
if isinstance(val, Remove):
return ""
if isinstance(val, bool):
return "true" if val else "false"
if isinstance(val, str):
return val
if isinstance(val, Uri):
return val.val
if isinstance(val, Ref):
return format_ref(val)
if isinstance(val, Number):
return format_number(val)
# Fall back to Zinc encoding for all other types
from hs_py.encoding.zinc import encode_val as _zinc_encode_val
return _zinc_encode_val(val)
# ---------------------------------------------------------------------------
# RFC 4180 cell escaping
# ---------------------------------------------------------------------------
_CSV_ESCAPE_CHARS = frozenset(',"\n\r')
def _escape_cell(val: str) -> str:
"""Escape a CSV cell per RFC 4180.
Cells containing commas, double quotes, or newlines are wrapped
in double quotes. Any internal double quotes are doubled.
"""
if not val:
return val
if _CSV_ESCAPE_CHARS.intersection(val):
return '"' + val.replace('"', '""') + '"'
return val