Stem generation log reporter.
Parses daily logs/xil_YYYY-MM-DD.log files and produces a CSV
chronology of every dialogue MP3 stem that was generated, including
its backend, SHA-256 hash, and approximate creation context.
Usage:
xil-stem-log # scan logs/ in CWD → stem_log_report.csv
xil-stem-log --logs-dir logs/ # explicit log directory
xil-stem-log --output report.csv # override output path
xil-stem-log --since 2026-04-01 # only logs on or after this date
xil-stem-log --episode S03E03 # filter to one episode tag
xil-stem-log --episode S03E03 --since 2026-04-01
xil-stem-log --slug the413 # filter to one show slug
xil-stem-log --show # print CSV to stdout instead of file
CSV columns
log_date YYYY-MM-DD date from log filename
log_file log filename
run_index integer incremented per Phase 1: Generating block (proxy for
distinct xil produce invocations within a day)
log_line line number of the Saved: entry (intra-day ordering)
seq dialogue sequence number
speaker speaker key (e.g. adam, sarah)
backend TTS engine: eleven_v3, gtts, or chatterbox
char_count character count sent to the TTS engine
stem_path relative path recorded in log (e.g. stems/the413/S03E03/…)
stem_filename basename only
sha256 SHA-256 hex digest of the generated file
get_parser
get_parser() -> argparse.ArgumentParser
Source code in src/xil_pipeline/XILU008_stem_log_report.py
| def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="xil-stem-log",
description="Parse xil-pipeline logs into a stem generation chronology CSV.",
)
parser.add_argument(
"--logs-dir",
default=None,
metavar="DIR",
help="Directory containing xil_YYYY-MM-DD.log files (default: <workspace>/logs/)",
)
parser.add_argument(
"--output", "-o",
default="stem_log_report.csv",
metavar="PATH",
help="Output CSV path (default: stem_log_report.csv); use - for stdout",
)
parser.add_argument(
"--since",
metavar="YYYY-MM-DD",
help="Only include log files on or after this date",
)
parser.add_argument(
"--episode", "--tag",
metavar="TAG",
dest="episode",
help="Filter records to a specific episode tag (e.g. S03E03)",
)
parser.add_argument(
"--slug",
metavar="SLUG",
help="Filter records to a specific show slug (e.g. the413)",
)
parser.add_argument(
"--show",
action="store_true",
help="Print CSV to stdout (equivalent to --output -)",
)
parser.add_argument(
"--audit",
metavar="PARSED_JSON",
help="Instead of writing CSV: compare logged char_counts against the given "
"parsed script JSON and flag stems whose audio may not match current text.",
)
parser.add_argument(
"--audit-threshold",
type=int,
default=20,
metavar="N",
help="Char-count delta threshold for --audit flagging (default: 20)",
)
return parser
|
main
Source code in src/xil_pipeline/XILU008_stem_log_report.py
| def main() -> None:
args = get_parser().parse_args()
if args.logs_dir is None:
from xil_pipeline.models import get_workspace_root
args.logs_dir = str(get_workspace_root() / "logs")
logs_dir = Path(args.logs_dir)
if not logs_dir.is_dir():
print(f"[ERROR] Logs directory not found: {logs_dir}", file=sys.stderr)
sys.exit(1)
log_files = sorted(logs_dir.glob("xil_*.log"))
if args.since:
log_files = [f for f in log_files if _date_from_filename(f.name) >= args.since]
if not log_files:
print("[!] No matching log files found.", file=sys.stderr)
sys.exit(0)
all_records: list[dict] = []
for lf in log_files:
records = _parse_log(lf)
all_records.extend(records)
print(f" {lf.name}: {len(records)} stems", file=sys.stderr)
if args.episode:
tag = args.episode.upper()
all_records = [r for r in all_records if r.get("stem_path") and tag in (r["stem_path"].upper())]
if args.slug:
slug = args.slug.lower()
all_records = [r for r in all_records if r.get("stem_path") and slug in (r["stem_path"].lower())]
print(f"Total: {len(all_records)} stem records", file=sys.stderr)
if args.audit:
_audit(all_records, args.audit, threshold=args.audit_threshold)
return
use_stdout = args.show or args.output == "-"
out_path = None if use_stdout else Path(args.output)
if use_stdout:
_write_csv(sys.stdout, all_records)
else:
with out_path.open("w", newline="", encoding="utf-8") as fh:
_write_csv(fh, all_records)
print(f"Written: {out_path}", file=sys.stderr)
|