Skip to content

Xilu006 Splice Parsed

src.xil_pipeline.XILU006_splice_parsed

XILU006 — Splice Parsed JSON Utility.

Insert entries into (or delete entries from) a parsed episode JSON file, with automatic seq renumbering. Supports sourcing new entries from another parsed JSON by seq range, or from a standalone JSON array file.

After splicing, run XILP007_stem_migrator.py with --orig-prefix pre_splice_ to migrate stems to the new seq numbers, then XILP002_producer.py to generate TTS for the newly inserted entries.

Usage:

python XILU006_splice_parsed.py --episode S02E03 --insert-after 322 \
    --from-parsed parsed/parsed_<slug>_S02E02.json --from-seq-range 232-233 \
    --section post-interview --dry-run
python XILU006_splice_parsed.py --episode S02E03 --delete-seq-range 100-105

python XILU006_splice_parsed.py --episode S02E03 --insert-after 322 \
    --from-json new_entries.json

No ElevenLabs API calls are made — this utility is safe to run freely.

logger module-attribute

logger = get_logger(__name__)

renumber_entries

renumber_entries(entries: list[dict]) -> list[dict]

Assign contiguous seq 1..N to positive-seq entries. Preamble (seq<=0) unchanged.

Returns a new list — does not mutate the input.

Source code in src/xil_pipeline/XILU006_splice_parsed.py
def renumber_entries(entries: list[dict]) -> list[dict]:
    """Assign contiguous seq 1..N to positive-seq entries.  Preamble (seq<=0) unchanged.

    Returns a new list — does not mutate the input.
    """
    result = []
    for e in entries:
        result.append(copy.deepcopy(e))
    seq = 1
    for e in result:
        if e["seq"] > 0:
            e["seq"] = seq
            seq += 1
    return result

extract_seq_range

extract_seq_range(entries: list[dict], start: int, end: int) -> list[dict]

Extract entries within [start, end] inclusive. Returns deep copies.

Source code in src/xil_pipeline/XILU006_splice_parsed.py
def extract_seq_range(entries: list[dict], start: int, end: int) -> list[dict]:
    """Extract entries within [start, end] inclusive.  Returns deep copies."""
    return [copy.deepcopy(e) for e in entries if start <= e["seq"] <= end]

splice_entries

splice_entries(entries: list[dict], insert_after_seq: int, new_entries: list[dict], section_override: str | None = None, scene_override: str | None = None) -> list[dict]

Insert new_entries after the entry with insert_after_seq, renumber all.

Preamble entries (seq <= 0) are never renumbered. New entries inherit section and scene from the insertion point unless overrides are given.

Raises:

  • ValueError

    If insert_after_seq is <= 0 (preamble zone) or not found.

Source code in src/xil_pipeline/XILU006_splice_parsed.py
def splice_entries(
    entries: list[dict],
    insert_after_seq: int,
    new_entries: list[dict],
    section_override: str | None = None,
    scene_override: str | None = None,
) -> list[dict]:
    """Insert *new_entries* after the entry with *insert_after_seq*, renumber all.

    Preamble entries (seq <= 0) are never renumbered.  New entries inherit
    ``section`` and ``scene`` from the insertion point unless overrides are given.

    Raises:
        ValueError: If *insert_after_seq* is <= 0 (preamble zone) or not found.
    """
    if insert_after_seq <= 0:
        raise ValueError(f"Cannot insert after seq {insert_after_seq} (preamble zone)")

    preamble = [copy.deepcopy(e) for e in entries if e["seq"] <= 0]
    body = [copy.deepcopy(e) for e in entries if e["seq"] > 0]

    # Find insertion index
    insert_idx = None
    anchor_entry = None
    for i, e in enumerate(body):
        if e["seq"] == insert_after_seq:
            insert_idx = i + 1
            anchor_entry = e
            break
    if insert_idx is None:
        raise ValueError(f"seq {insert_after_seq} not found in entries")

    # Prepare new entries
    prepared = []
    for e in new_entries:
        ne = copy.deepcopy(e)
        ne["section"] = section_override if section_override else anchor_entry["section"]
        ne["scene"] = scene_override if scene_override is not None else anchor_entry["scene"]
        prepared.append(ne)

    # Splice
    body = body[:insert_idx] + prepared + body[insert_idx:]

    # Renumber body
    for i, e in enumerate(body):
        e["seq"] = i + 1

    return preamble + body

delete_entries

delete_entries(entries: list[dict], seq_range: tuple[int, int]) -> list[dict]

Remove entries whose seq falls within [start, end] inclusive, renumber remainder.

Raises:

  • ValueError

    If seq_range includes preamble entries (seq <= 0).

Source code in src/xil_pipeline/XILU006_splice_parsed.py
def delete_entries(entries: list[dict], seq_range: tuple[int, int]) -> list[dict]:
    """Remove entries whose seq falls within [start, end] inclusive, renumber remainder.

    Raises:
        ValueError: If *seq_range* includes preamble entries (seq <= 0).
    """
    start, end = seq_range
    if start <= 0:
        raise ValueError(f"Cannot delete preamble entries (seq_range starts at {start})")

    preamble = [copy.deepcopy(e) for e in entries if e["seq"] <= 0]
    body = [copy.deepcopy(e) for e in entries if e["seq"] > 0 and not (start <= e["seq"] <= end)]

    for i, e in enumerate(body):
        e["seq"] = i + 1

    return preamble + body

update_stats

update_stats(data: dict) -> None

Recompute data['stats'] from data['entries'].

Counts only body entries (seq > 0). Mutates data in place.

Source code in src/xil_pipeline/XILU006_splice_parsed.py
def update_stats(data: dict) -> None:
    """Recompute ``data['stats']`` from ``data['entries']``.

    Counts only body entries (seq > 0).  Mutates *data* in place.
    """
    body = [e for e in data["entries"] if e["seq"] > 0]
    dialogue = [e for e in body if e["type"] == "dialogue"]
    directions = [e for e in body if e["type"] == "direction"]
    speakers = sorted({e["speaker"] for e in dialogue if e.get("speaker")})
    sections = sorted({e["section"] for e in body if e.get("section")})
    tts_chars = sum(len(e["text"]) for e in dialogue)

    data["stats"] = {
        "total_entries": len(body),
        "dialogue_lines": len(dialogue),
        "direction_lines": len(directions),
        "characters_for_tts": tts_chars,
        "speakers": speakers,
        "sections": sections,
    }

run_splice

run_splice(target_path: str, insert_after_seq: int | None = None, new_entries: list[dict] | None = None, delete_range: tuple[int, int] | None = None, section_override: str | None = None, scene_override: str | None = None, dry_run: bool = False, backup_path: str | None = 'AUTO', quiet: bool = False) -> dict

Load, splice/delete, and write back a parsed JSON file.

Parameters:

  • target_path (str) –

    Path to the parsed JSON file.

  • insert_after_seq (int | None, default: None ) –

    Seq number to insert after (None to skip insertion).

  • new_entries (list[dict] | None, default: None ) –

    Entries to insert (required if insert_after_seq is set).

  • delete_range (tuple[int, int] | None, default: None ) –

    (start, end) seq range to delete (None to skip deletion).

  • section_override (str | None, default: None ) –

    Override section on inserted entries.

  • scene_override (str | None, default: None ) –

    Override scene on inserted entries.

  • dry_run (bool, default: False ) –

    If True, print plan but do not write files.

  • backup_path (str | None, default: 'AUTO' ) –

    Path for backup file, None to skip, "AUTO" is not handled here.

  • quiet (bool, default: False ) –

    If True, suppress per-entry detail.

Returns:

  • dict

    The updated parsed data dict.

Source code in src/xil_pipeline/XILU006_splice_parsed.py
def run_splice(
    target_path: str,
    insert_after_seq: int | None = None,
    new_entries: list[dict] | None = None,
    delete_range: tuple[int, int] | None = None,
    section_override: str | None = None,
    scene_override: str | None = None,
    dry_run: bool = False,
    backup_path: str | None = "AUTO",
    quiet: bool = False,
) -> dict:
    """Load, splice/delete, and write back a parsed JSON file.

    Args:
        target_path: Path to the parsed JSON file.
        insert_after_seq: Seq number to insert after (None to skip insertion).
        new_entries: Entries to insert (required if *insert_after_seq* is set).
        delete_range: (start, end) seq range to delete (None to skip deletion).
        section_override: Override section on inserted entries.
        scene_override: Override scene on inserted entries.
        dry_run: If True, print plan but do not write files.
        backup_path: Path for backup file, None to skip, "AUTO" is not handled here.
        quiet: If True, suppress per-entry detail.

    Returns:
        The updated parsed data dict.
    """
    with open(target_path, encoding="utf-8") as f:
        data = json.load(f)

    entries = data["entries"]
    original_count = len([e for e in entries if e["seq"] > 0])

    # Delete phase
    if delete_range:
        start, end = delete_range
        deleted = [e for e in entries if start <= e["seq"] <= end]
        entries = delete_entries(entries, delete_range)
        if not quiet:
            logger.info(f"\n  DELETE seq {start}{end}: {len(deleted)} entries removed")
            for e in deleted:
                logger.info(f"    - seq {e['seq']} [{e['type']}] {e.get('speaker', '')}{e['text'][:60]}")

    # Insert phase
    if insert_after_seq is not None and new_entries:
        if not quiet:
            anchor = next((e for e in entries if e["seq"] == insert_after_seq), None)
            logger.info(f"\n  INSERT {len(new_entries)} entries after seq {insert_after_seq}"
                  f" ({anchor['text'][:40]}...)" if anchor else "")
            for e in new_entries:
                label = section_override or "(inherit)"
                logger.info(f"    + [{e['type']}] {e.get('speaker', '')}{e['text'][:60]}  [section={label}]")
        entries = splice_entries(
            entries, insert_after_seq, new_entries,
            section_override=section_override, scene_override=scene_override,
        )

    data["entries"] = entries
    update_stats(data)

    new_count = len([e for e in entries if e["seq"] > 0])
    logger.info(f"\n  Summary: {original_count}{new_count} entries (body)")

    if dry_run:
        logger.info("  [DRY RUN] No files written.")
        return data

    # Write backup
    if backup_path:
        with open(target_path, encoding="utf-8") as f:
            original_content = f.read()
        with open(backup_path, "w", encoding="utf-8") as f:
            f.write(original_content)
        logger.info(f"  Backup written: {backup_path}")

    # Write updated file
    with open(target_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
        f.write("\n")
    logger.info(f"  Updated: {target_path}")

    return data

get_parser

get_parser() -> argparse.ArgumentParser
Source code in src/xil_pipeline/XILU006_splice_parsed.py
def get_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="xil-splice",
        description="Splice Parsed JSON — insert/delete entries with automatic renumbering",
    )
    tag_group = parser.add_mutually_exclusive_group(required=True)
    tag_group.add_argument("--episode",
                           help="Episode tag (e.g. S02E03) — derives target parsed JSON path")
    tag_group.add_argument("--tag",
                           help="Raw tag for non-episodic content (e.g. V01C03, D01)")
    parser.add_argument("--show", default=None,
                        help="Show name override (default: from project.json)")
    parser.add_argument("--parsed", default=None,
                        help="Override target parsed JSON path")

    # Insert options
    parser.add_argument("--insert-after", type=int, default=None,
                        help="Seq number to insert after")
    parser.add_argument("--from-parsed", default=None, dest="from_parsed",
                        help="Source parsed JSON to extract entries from")
    parser.add_argument("--from-seq-range", default=None, dest="from_seq_range",
                        help="Seq range to extract from source (e.g. 232-233)")
    parser.add_argument("--from-json", default=None, dest="from_json",
                        help="Path to a JSON array of entries to insert")
    parser.add_argument("--section", default=None,
                        help="Override section on inserted entries")
    parser.add_argument("--scene", default=None,
                        help="Override scene on inserted entries")

    # Delete options
    parser.add_argument("--delete-seq-range", default=None, dest="delete_seq_range",
                        help="Seq range to delete (e.g. 100-105)")

    # Output options
    parser.add_argument("--dry-run", action="store_true",
                        help="Show plan without writing files")
    parser.add_argument("--no-backup", action="store_true",
                        help="Skip backup file")
    parser.add_argument("--quiet", action="store_true",
                        help="Summary only, no per-entry detail")
    return parser

main

main() -> None

Splice entries into or delete entries from a parsed episode JSON.

Source code in src/xil_pipeline/XILU006_splice_parsed.py
def main() -> None:
    """Splice entries into or delete entries from a parsed episode JSON."""
    configure_logging()
    with run_banner():
        args = get_parser().parse_args()

        # Resolve paths
        tag = args.episode or args.tag
        slug = resolve_slug(args.show)
        paths = derive_paths(slug, tag)
        target_path = args.parsed or paths["parsed"]

        if not os.path.exists(target_path):
            logger.error(f"Target parsed JSON not found: {target_path}")
            return

        logger.info(f"  Target: {target_path}")

        # Resolve new entries for insertion
        new_entries = None
        if args.insert_after is not None:
            if args.from_parsed and args.from_seq_range:
                start, end = _parse_range(args.from_seq_range)
                with open(args.from_parsed, encoding="utf-8") as f:
                    source_data = json.load(f)
                new_entries = extract_seq_range(source_data["entries"], start, end)
                if not new_entries:
                    logger.warning(f"No entries found in seq range {start}{end} of {args.from_parsed}")
                    return
                logger.info(f"  Source: {args.from_parsed} seq {start}{end} ({len(new_entries)} entries)")
            elif args.from_json:
                with open(args.from_json, encoding="utf-8") as f:
                    new_entries = json.load(f)
                logger.info(f"  Source: {args.from_json} ({len(new_entries)} entries)")
            else:
                logger.error("--insert-after requires --from-parsed + --from-seq-range or --from-json")
                return

        # Resolve delete range
        delete_range = None
        if args.delete_seq_range:
            delete_range = _parse_range(args.delete_seq_range)

        if new_entries is None and delete_range is None:
            logger.error("Nothing to do — specify --insert-after or --delete-seq-range")
            return

        # Resolve backup path
        backup_path = None
        if not args.no_backup and not args.dry_run:
            parsed_dir = os.path.dirname(target_path)
            backup_name = f"pre_splice_parsed_{slug}_{tag}.json"
            backup_path = os.path.join(parsed_dir, backup_name) if parsed_dir else backup_name

        # Run
        run_splice(
            target_path=target_path,
            insert_after_seq=args.insert_after,
            new_entries=new_entries,
            delete_range=delete_range,
            section_override=args.section,
            scene_override=args.scene,
            dry_run=args.dry_run,
            backup_path=backup_path,
            quiet=args.quiet,
        )

        # Next steps
        if not args.dry_run:
            orig_prefix = "pre_splice_"
            logger.info("\n  Next steps:")
            logger.info(f"    1. python XILP007_stem_migrator.py --episode {tag} --orig-prefix {orig_prefix}")
            logger.info(f"    2. python XILP002_producer.py --episode {tag}")