XILP010 — ElevenLabs Studio Export Importer.
Extracts dialogue and direction stems from an ElevenLabs Studio export ZIP
and renames them to the pipeline's stem naming convention, placing them in
stems/{TAG}/ ready for downstream tools (XILP003, XILP005).
ElevenLabs Studio exports one MP3 per parsed entry (including headers and
directions), sequentially numbered as NNN_Chapter N.mp3. Dialogue
entries are always extracted. Direction entries (SFX, MUSIC, BEAT) can be
included with --gen-sfx, --gen-music, --gen-beats, or --all.
Usage:
python XILP010_studio_import.py --episode S02E02 \
--zip "ElevenLabs_exports/export.zip" --dry-run
python XILP010_studio_import.py --episode S02E02 \
--zip "ElevenLabs_exports/export.zip" --gen-sfx --gen-music --gen-beats
python XILP010_studio_import.py --episode S02E02 --zip "..." --all
python XILP010_studio_import.py --episode S02E02 --zip "..." --force
logger
module-attribute
logger = get_logger(__name__)
SCRIPT_NAME
module-attribute
SCRIPT_NAME = 'XILP010 · Studio Import'
extract_stems(zip_path: str, parsed: dict, stems_dir: str, *, dry_run: bool = False, force: bool = False, include_dtypes: set[str] | None = None) -> dict
Extract and rename stems from an ElevenLabs Studio export ZIP.
Parameters:
-
zip_path
(str)
–
Path to the Studio export ZIP file.
-
parsed
(dict)
–
Parsed episode dict (from parse_script()).
-
stems_dir
(str)
–
Target directory for extracted stems.
-
dry_run
(bool, default:
False
)
–
If True, print the plan without writing files.
-
force
(bool, default:
False
)
–
If True, overwrite existing stems on disk.
-
include_dtypes
(set[str] | None, default:
None
)
–
Set of direction_type values to extract
(e.g. {"SFX", "MUSIC", "BEAT"}). Dialogue entries are
always extracted. Headers are always skipped. An empty set
or None extracts dialogue only.
Returns:
-
dict
–
A stats dict with counts: extracted, skipped_exists,
-
dict
–
skipped_type, skipped_header, missing_seq.
Source code in src/xil_pipeline/XILP010_studio_import.py
| def extract_stems(
zip_path: str,
parsed: dict,
stems_dir: str,
*,
dry_run: bool = False,
force: bool = False,
include_dtypes: set[str] | None = None,
) -> dict:
"""Extract and rename stems from an ElevenLabs Studio export ZIP.
Args:
zip_path: Path to the Studio export ZIP file.
parsed: Parsed episode dict (from ``parse_script()``).
stems_dir: Target directory for extracted stems.
dry_run: If ``True``, print the plan without writing files.
force: If ``True``, overwrite existing stems on disk.
include_dtypes: Set of ``direction_type`` values to extract
(e.g. ``{"SFX", "MUSIC", "BEAT"}``). Dialogue entries are
always extracted. Headers are always skipped. An empty set
or ``None`` extracts dialogue only.
Returns:
A stats dict with counts: ``extracted``, ``skipped_exists``,
``skipped_type``, ``skipped_header``, ``missing_seq``.
"""
if include_dtypes is None:
include_dtypes = set()
entries_by_seq = {e["seq"]: e for e in parsed["entries"]}
stats = {
"extracted": 0,
"skipped_exists": 0,
"skipped_type": 0,
"skipped_header": 0,
"missing_seq": 0,
}
if not dry_run:
os.makedirs(stems_dir, exist_ok=True)
with zipfile.ZipFile(zip_path, "r") as zf:
members = sorted(zf.namelist())
for member in members:
if not member.lower().endswith(".mp3"):
continue
seq = _parse_zip_seq(member)
if seq is None:
continue
entry = entries_by_seq.get(seq)
if entry is None:
stats["missing_seq"] += 1
logger.info(f" [MISSING] {member} → seq {seq} not in parsed JSON")
continue
entry_type = entry["type"]
text_preview = (entry.get("text") or "")[:50]
direction_type = entry.get("direction_type") or ""
# Always skip headers — no audio value
if entry_type in ("section_header", "scene_header"):
stats["skipped_header"] += 1
logger.info(f" [HEADER] {member} — {entry_type}: {text_preview}")
continue
# Skip directions whose direction_type is not in include_dtypes
if entry_type == "direction" and direction_type not in include_dtypes:
stats["skipped_type"] += 1
label = f"{direction_type}: " if direction_type else ""
logger.info(f" [SKIP] {member} — {label}{text_preview}")
continue
stem_name = make_stem_name(entry)
dest = os.path.join(stems_dir, stem_name)
if os.path.exists(dest) and not force:
stats["skipped_exists"] += 1
logger.info(f" [EXISTS] {member} → {stem_name}")
continue
marker = "EXTRACT" if not dry_run else "DRY-RUN"
speaker = entry.get("speaker") or "sfx"
logger.info(
f" [{marker}] {member} → {stem_name}"
f" ({speaker}: {text_preview})"
)
if not dry_run:
data = zf.read(member)
with open(dest, "wb") as f:
f.write(data)
stats["extracted"] += 1
return stats
|
print_summary
print_summary(stats: dict, dry_run: bool = False) -> None
Print a summary of the extraction results.
Parameters:
-
stats
(dict)
–
Stats dict from :func:extract_stems.
-
dry_run
(bool, default:
False
)
–
Whether the run was a dry run.
Source code in src/xil_pipeline/XILP010_studio_import.py
| def print_summary(stats: dict, dry_run: bool = False) -> None:
"""Print a summary of the extraction results.
Args:
stats: Stats dict from :func:`extract_stems`.
dry_run: Whether the run was a dry run.
"""
mode = "DRY-RUN" if dry_run else "COMPLETE"
logger.info(f"\n{'─'*50}")
logger.info(f" SUMMARY ({mode})")
logger.info(f"{'─'*50}")
logger.info(f" Extracted: {stats['extracted']:>4}")
logger.info(f" Skipped (exist): {stats['skipped_exists']:>4}")
logger.info(f" Skipped (type): {stats['skipped_type']:>4}")
logger.info(f" Skipped (header):{stats['skipped_header']:>4}")
if stats["missing_seq"]:
logger.info(f" Missing seq: {stats['missing_seq']:>4} ⚠")
logger.info("")
|
get_parser
get_parser() -> argparse.ArgumentParser
Source code in src/xil_pipeline/XILP010_studio_import.py
| def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="xil-import",
description="Import ElevenLabs Studio export ZIP into pipeline stems.",
)
tag_group = parser.add_mutually_exclusive_group(required=True)
tag_group.add_argument("--episode",
help="Episode tag (e.g. S02E02) — derives parsed JSON and stems dir")
tag_group.add_argument("--tag",
help="Raw tag for non-episodic content (e.g. V01C03, D01)")
parser.add_argument("--show", default=None, help="Show name override (default: from project.json)")
parser.add_argument(
"--zip",
required=True,
dest="zip_path",
help="Path to the ElevenLabs Studio export ZIP file",
)
parser.add_argument(
"--parsed",
help="Override parsed JSON path (default: parsed/parsed_<slug>_{TAG}.json)",
)
parser.add_argument(
"--stems-dir",
help="Override stems output directory (default: stems/{TAG})",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show extraction plan without writing files",
)
parser.add_argument(
"--force",
action="store_true",
help="Overwrite existing stems on disk",
)
parser.add_argument(
"--gen-sfx",
action="store_true",
help="Include SFX direction entries",
)
parser.add_argument(
"--gen-music",
action="store_true",
help="Include MUSIC direction entries",
)
parser.add_argument(
"--gen-beats",
action="store_true",
help="Include BEAT direction entries",
)
parser.add_argument(
"--all",
action="store_true",
dest="all_types",
help="Include all direction types (SFX, MUSIC, BEAT, AMBIENCE)",
)
return parser
|
main
Source code in src/xil_pipeline/XILP010_studio_import.py
| def main():
configure_logging()
args = get_parser().parse_args()
# Build set of included direction types
include_dtypes: set[str] = set()
if args.gen_sfx or args.all_types:
include_dtypes.add("SFX")
if args.gen_music or args.all_types:
include_dtypes.add("MUSIC")
if args.gen_beats or args.all_types:
include_dtypes.add("BEAT")
if args.all_types:
include_dtypes.add("AMBIENCE")
tag = args.episode or args.tag
slug = resolve_slug(args.show)
p = derive_paths(slug, tag)
parsed_path = args.parsed or p["parsed"]
stems_dir = args.stems_dir or p["stems"]
with run_banner(SCRIPT_NAME):
# Validate inputs
if not os.path.isfile(args.zip_path):
logger.error(f"ZIP file not found: {args.zip_path}")
return
if not os.path.isfile(parsed_path):
logger.error(f"Parsed JSON not found: {parsed_path}")
return
with open(parsed_path) as f:
parsed = json.load(f)
total = len(parsed["entries"])
dialogue = sum(1 for e in parsed["entries"] if e["type"] == "dialogue")
direction = sum(1 for e in parsed["entries"] if e["type"] == "direction")
headers = total - dialogue - direction
logger.info(f" Episode: {tag}")
logger.info(f" ZIP: {args.zip_path}")
logger.info(f" Parsed: {parsed_path} ({total} entries)")
logger.info(f" Stems dir: {stems_dir}")
logger.info(f" Entries: {dialogue} dialogue, {direction} directions, {headers} headers")
mode_parts = []
if args.dry_run:
mode_parts.append("dry-run")
if args.force:
mode_parts.append("force")
if include_dtypes:
mode_parts.append(f"include: {', '.join(sorted(include_dtypes))}")
if mode_parts:
logger.info(f" Mode: {', '.join(mode_parts)}")
logger.info("")
stats = extract_stems(
args.zip_path,
parsed,
stems_dir,
dry_run=args.dry_run,
force=args.force,
include_dtypes=include_dtypes,
)
print_summary(stats, dry_run=args.dry_run)
|