Skip to content

Xilp009 Script Regenerator

src.xil_pipeline.XILP009_script_regenerator

XILP009 — Reverse Script Generator.

Reconstructs a readable markdown production script from a parsed JSON, using cast config for speaker display names. Produces a clean "revised" version that reflects any post-parse edits (speaker reassignments, section changes, direction_type reclassifications, etc.).

logger module-attribute

logger = get_logger(__name__)

SCRIPT_NAME module-attribute

SCRIPT_NAME = basename(__file__)

section_display_name

section_display_name(slug: str) -> str

Convert a section slug back to its display header text.

Source code in src/xil_pipeline/XILP009_script_regenerator.py
def section_display_name(slug: str) -> str:
    """Convert a section slug back to its display header text."""
    return _SECTION_SLUG_TO_DISPLAY.get(slug, slug.upper().replace("-", " "))

speaker_display_name

speaker_display_name(key: str) -> str

Convert a speaker key back to its display name.

Source code in src/xil_pipeline/XILP009_script_regenerator.py
def speaker_display_name(key: str) -> str:
    """Convert a speaker key back to its display name."""
    return _SPEAKER_KEY_TO_DISPLAY.get(key, key.upper())

regenerate_script

regenerate_script(parsed: dict, cast: dict | None = None, sfx_config: dict | None = None) -> str

Regenerate a markdown production script from parsed JSON.

Parameters:

  • parsed (dict) –

    The full parsed script dict (with metadata and entries).

  • cast (dict | None, default: None ) –

    Optional cast config dict for full_name lookups.

  • sfx_config (dict | None, default: None ) –

    Optional SFX config dict. When provided, direction entries whose text matches a source-backed SFX config key are emitted with a pipe-hint filename suffix, e.g. [SFX: PAPER LETTER FOLDED, SET DOWN ON TABLE | PAPRImpt-...mp3].

Returns:

  • str

    The reconstructed markdown script as a string.

Source code in src/xil_pipeline/XILP009_script_regenerator.py
def regenerate_script(
    parsed: dict,
    cast: dict | None = None,
    sfx_config: dict | None = None,
) -> str:
    """Regenerate a markdown production script from parsed JSON.

    Args:
        parsed: The full parsed script dict (with metadata and entries).
        cast: Optional cast config dict for full_name lookups.
        sfx_config: Optional SFX config dict.  When provided, direction
            entries whose text matches a ``source``-backed SFX config key
            are emitted with a pipe-hint filename suffix, e.g.
            ``[SFX: PAPER LETTER FOLDED, SET DOWN ON TABLE | PAPRImpt-...mp3]``.

    Returns:
        The reconstructed markdown script as a string.
    """
    sfx_lookup = _build_sfx_lookup(sfx_config) if sfx_config else {}

    show = parsed.get("show", "Unknown Show")
    season = parsed.get("season")
    episode = parsed.get("episode", 1)
    title = parsed.get("title", "")
    season_title = parsed.get("season_title", "")

    lines: list[str] = []

    # Header line (no markup — parser spec requires plain text)
    header = f"{show}"
    if season is not None:
        header += f" Season {season}:"
    header += f" Episode {episode}:"
    if title:
        header += f' "{title}"'
    if season_title:
        header += f' Arc: "{season_title}"'
    lines.append(header)
    lines.append("")

    # CAST block — cast config stores characters under the "cast" key
    characters = cast.get("cast") if cast else None
    if characters:
        lines.append("CAST:")
        for _key, char in characters.items():
            display = char.get("full_name") or _key.upper()
            role = (char.get("role") or "").split("\n")[0]  # first line only
            lines.append(f"* {display}{role}")
        lines.append("")

    entries = parsed.get("entries", [])

    # Exclude seq-0 synthetic entries (injected preamble stems from XILP002 old format)
    entries = [e for e in entries if e.get("seq", 0) >= 1]

    for entry in entries:
        entry_type = entry.get("type")
        text = entry.get("text", "")
        speaker = entry.get("speaker")
        direction = entry.get("direction")

        if entry_type == "section_header":
            lines.append("===")
            lines.append("")
            lines.append(text)
            lines.append("")
            continue

        if entry_type == "scene_header":
            lines.append(text)
            lines.append("")
            continue

        if entry_type == "direction":
            hint = sfx_lookup.get(text)
            suffix = f" | {hint}" if hint else ""
            lines.append(f"[{text}{suffix}]")
            lines.append("")
            continue

        if entry_type == "dialogue":
            display_name = speaker_display_name(speaker) if speaker else "UNKNOWN"

            if direction:
                lines.append(f"{display_name} ({direction})")
            else:
                lines.append(display_name)
            lines.append(text)
            lines.append("")
            continue

    # End marker
    lines.append("END OF EPISODE")
    lines.append("")

    return "\n".join(lines)

get_parser

get_parser() -> argparse.ArgumentParser
Source code in src/xil_pipeline/XILP009_script_regenerator.py
def get_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="xil-regen",
        description="Regenerate a production script markdown from parsed JSON.",
    )
    tag_group = parser.add_mutually_exclusive_group(required=True)
    tag_group.add_argument("--episode", help="Episode tag (e.g. S02E03)")
    tag_group.add_argument("--tag", help="Raw tag for non-episodic content (e.g. V01C03, D01)")
    parser.add_argument("--parsed", default=None,
                        help="Override parsed JSON path")
    parser.add_argument("--cast", default=None,
                        help="Override cast config path")
    parser.add_argument("--sfx", default=None,
                        help="Override SFX config path (sfx_<TAG>.json); "
                             "when supplied, direction entries are emitted with "
                             "a pipe-hint filename suffix for source-backed assets")
    parser.add_argument("--show", default=None,
                        help="Show name override (default: from project.json)")
    parser.add_argument("--output", default=None,
                        help="Output markdown path (default: scripts/revised_<slug>_{TAG}.md)")
    parser.add_argument("--speakers", default=None,
                        help="Path to speakers.json (default: auto-detect from CWD, then built-in)")
    return parser

main

main()
Source code in src/xil_pipeline/XILP009_script_regenerator.py
def main():
    configure_logging()
    args = get_parser().parse_args()
    tag = args.episode or args.tag

    with run_banner(SCRIPT_NAME):
        # Load speakers and rebuild reverse mappings
        _loaded_speakers, loaded_keys = load_speakers(args.speakers)
        global _SECTION_SLUG_TO_DISPLAY, _SPEAKER_KEY_TO_DISPLAY
        _SECTION_SLUG_TO_DISPLAY, _SPEAKER_KEY_TO_DISPLAY = _build_reverse_mappings(loaded_keys)

        slug = resolve_slug(args.show)
        p = derive_paths(slug, tag)
        parsed_path = args.parsed or p["parsed"]
        cast_path = args.cast or p["cast"]
        sfx_path = args.sfx or p["sfx"]
        output_path = args.output or p["revised_script"]

        if not os.path.exists(parsed_path):
            logger.error(f"Parsed JSON not found: {parsed_path}")
            sys.exit(1)

        with open(parsed_path, encoding="utf-8") as f:
            parsed = json.load(f)

        cast = None
        if os.path.exists(cast_path):
            with open(cast_path, encoding="utf-8") as f:
                cast = json.load(f)

        sfx_config = None
        if os.path.exists(sfx_path):
            with open(sfx_path, encoding="utf-8") as f:
                sfx_config = json.load(f)
            logger.info(f"  SFX config loaded: {sfx_path}")
        else:
            logger.debug(f"  No SFX config found at {sfx_path} — pipe-hints disabled")

        script_text = regenerate_script(parsed, cast, sfx_config)

        os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(script_text)

        # Summary
        entry_count = len(parsed.get("entries", []))
        dialogue_count = parsed.get("stats", {}).get("dialogue_lines", 0)
        logger.info(f"  Regenerated script from {entry_count} entries ({dialogue_count} dialogue)")
        logger.info(f"  Written to: {output_path}")