XILU014 — Episode Summary CSV.
Scans all parsed_.json files under /parsed/ and writes a
one-row-per-episode summary CSV with dialogue line count, word count, and
TTS character count.
Usage:
xil episode-summary # write episode_summary.csv in workspace root
xil episode-summary --output summary.csv # custom output path
xil episode-summary --show "THE 413" # filter to one show
xil episode-summary --stdout # write CSV to stdout (no banner)
logger
module-attribute
logger = get_logger(__name__)
SCRIPT_NAME
module-attribute
SCRIPT_NAME = basename(__file__)
ALL_COLS
module-attribute
ALL_COLS = ['show', 'tag', 'season', 'episode', 'title', 'season_title', 'dialogue_lines', 'words', 'tts_chars']
build_summary
build_summary(parsed_root: Path, show_filter: str | None = None) -> list[dict]
Return one summary dict per episode found under parsed_root.
Source code in src/xil_pipeline/XILU014_episode_summary.py
| def build_summary(parsed_root: Path, show_filter: str | None = None) -> list[dict]:
"""Return one summary dict per episode found under parsed_root."""
files = _collect_files(parsed_root)
rows = []
for path in files:
try:
with open(path, encoding="utf-8") as f:
data = json.load(f)
except Exception as exc:
logger.warning("Skipping %s — %s", os.path.basename(path), exc)
continue
show = data.get("show", "")
if show_filter and show.lower() != show_filter.lower():
continue
tag = os.path.basename(path).removeprefix("parsed_").removesuffix(".json")
season = data.get("season")
episode = data.get("episode")
stats = data.get("stats", {})
words = sum(
len(entry.get("text", "").split())
for entry in data.get("entries", [])
if entry.get("type") == "dialogue"
)
rows.append({
"show": show,
"tag": tag,
"season": season if season is not None else "",
"episode": episode if episode is not None else "",
"title": data.get("title", ""),
"season_title": data.get("season_title", ""),
"dialogue_lines": stats.get("dialogue_lines", 0),
"words": words,
"tts_chars": stats.get("characters_for_tts", 0),
})
rows.sort(key=_sort_key)
return rows
|
get_parser
get_parser() -> argparse.ArgumentParser
Source code in src/xil_pipeline/XILU014_episode_summary.py
| def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="xil-episode-summary",
description="Write a one-row-per-episode summary CSV from all parsed_<tag>.json files.",
)
parser.add_argument(
"--output", "-o",
default=None,
metavar="FILE",
help="Output CSV path (default: <workspace>/episode_summary.csv)",
)
parser.add_argument(
"--show",
default=None,
metavar="NAME",
help="Filter to a single show name, e.g. 'THE 413' (case-insensitive)",
)
parser.add_argument(
"--stdout",
action="store_true",
help="Write CSV to stdout — no banner, safe to pipe",
)
return parser
|
main
Source code in src/xil_pipeline/XILU014_episode_summary.py
| def main() -> None:
configure_logging()
args = get_parser().parse_args()
if args.stdout:
_run(args)
else:
with run_banner(SCRIPT_NAME):
_run(args)
|