Skip to content

Xilu014 Episode Summary

src.xil_pipeline.XILU014_episode_summary

XILU014 — Episode Summary CSV.

Scans all parsed_.json files under /parsed/ and writes a one-row-per-episode summary CSV with dialogue line count, word count, and TTS character count.

Usage:

xil episode-summary                          # write episode_summary.csv in workspace root
xil episode-summary --output summary.csv     # custom output path
xil episode-summary --show "THE 413"         # filter to one show
xil episode-summary --stdout                 # write CSV to stdout (no banner)

logger module-attribute

logger = get_logger(__name__)

SCRIPT_NAME module-attribute

SCRIPT_NAME = basename(__file__)

ALL_COLS module-attribute

ALL_COLS = ['show', 'tag', 'season', 'episode', 'title', 'season_title', 'dialogue_lines', 'words', 'tts_chars']

build_summary

build_summary(parsed_root: Path, show_filter: str | None = None) -> list[dict]

Return one summary dict per episode found under parsed_root.

Source code in src/xil_pipeline/XILU014_episode_summary.py
def build_summary(parsed_root: Path, show_filter: str | None = None) -> list[dict]:
    """Return one summary dict per episode found under parsed_root."""
    files = _collect_files(parsed_root)
    rows = []
    for path in files:
        try:
            with open(path, encoding="utf-8") as f:
                data = json.load(f)
        except Exception as exc:
            logger.warning("Skipping %s%s", os.path.basename(path), exc)
            continue

        show = data.get("show", "")
        if show_filter and show.lower() != show_filter.lower():
            continue

        tag = os.path.basename(path).removeprefix("parsed_").removesuffix(".json")
        season  = data.get("season")
        episode = data.get("episode")
        stats   = data.get("stats", {})

        words = sum(
            len(entry.get("text", "").split())
            for entry in data.get("entries", [])
            if entry.get("type") == "dialogue"
        )

        rows.append({
            "show":           show,
            "tag":            tag,
            "season":         season if season is not None else "",
            "episode":        episode if episode is not None else "",
            "title":          data.get("title", ""),
            "season_title":   data.get("season_title", ""),
            "dialogue_lines": stats.get("dialogue_lines", 0),
            "words":          words,
            "tts_chars":      stats.get("characters_for_tts", 0),
        })

    rows.sort(key=_sort_key)
    return rows

get_parser

get_parser() -> argparse.ArgumentParser
Source code in src/xil_pipeline/XILU014_episode_summary.py
def get_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="xil-episode-summary",
        description="Write a one-row-per-episode summary CSV from all parsed_<tag>.json files.",
    )
    parser.add_argument(
        "--output", "-o",
        default=None,
        metavar="FILE",
        help="Output CSV path (default: <workspace>/episode_summary.csv)",
    )
    parser.add_argument(
        "--show",
        default=None,
        metavar="NAME",
        help="Filter to a single show name, e.g. 'THE 413' (case-insensitive)",
    )
    parser.add_argument(
        "--stdout",
        action="store_true",
        help="Write CSV to stdout — no banner, safe to pipe",
    )
    return parser

main

main() -> None
Source code in src/xil_pipeline/XILU014_episode_summary.py
def main() -> None:
    configure_logging()
    args = get_parser().parse_args()
    if args.stdout:
        _run(args)
    else:
        with run_banner(SCRIPT_NAME):
            _run(args)