Skip to content

Compare Stem Verify

tools.compare_stem_verify

Compare two stem_verify JSON reports by Whisper transcript similarity.

Matches stems across two reports using bipartite greedy matching on transcript text. Filenames and seq numbering need not align — matching is done on what Whisper actually heard.

Typical use: compare ElevenLabs Studio export stems against pipeline-generated stems for the same episode. Reports matched pairs, orphans (unmatched entries), and flags pairs where the transcripts diverge below a similarity threshold.

Usage::

python3 tools/compare_stem_verify.py \
    ElevenLabs_exports/.../stem_verify_report.json \
    parsed/the413/stem_verify_S01E01.json \
    --label-a "EL-Studio" --label-b "Pipeline" \
    --output /tmp/compare.json

get_parser

get_parser() -> argparse.ArgumentParser
Source code in tools/compare_stem_verify.py
def get_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="compare_stem_verify",
        description="Compare two stem_verify JSON reports by Whisper transcript similarity.",
    )
    parser.add_argument("report_a", metavar="REPORT_A", help="First stem_verify JSON (e.g. ElevenLabs export)")
    parser.add_argument("report_b", metavar="REPORT_B", help="Second stem_verify JSON (e.g. pipeline stems)")
    parser.add_argument("--label-a", default="A", metavar="LABEL", help="Display label for report A")
    parser.add_argument("--label-b", default="B", metavar="LABEL", help="Display label for report B")
    parser.add_argument("--threshold", type=float, default=0.75, metavar="FLOAT",
                        help="Flag pairs with similarity below this (default: 0.75)")
    parser.add_argument("--min-match", type=float, default=0.40, metavar="FLOAT",
                        help="Minimum similarity to treat as matched vs orphan (default: 0.40)")
    parser.add_argument("--output", "-o", default=None, metavar="FILE",
                        help="Write full JSON report to file")
    parser.add_argument("--no-sfx-filter", action="store_true",
                        help="Include sfx-speaker entries (excluded by default)")
    parser.add_argument("--verbose", "-v", action="store_true",
                        help="Print all matched pairs as a mapping table (A-filename → B-seq/speaker/scene)")
    return parser

main

main() -> None
Source code in tools/compare_stem_verify.py
def main() -> None:
    args = get_parser().parse_args()

    path_a = Path(args.report_a)
    path_b = Path(args.report_b)

    if not path_a.exists():
        print(f"Error: report_a not found: {path_a}", file=sys.stderr)
        sys.exit(1)
    if not path_b.exists():
        print(f"Error: report_b not found: {path_b}", file=sys.stderr)
        sys.exit(1)

    sfx_filter = not args.no_sfx_filter
    a_entries = _load_entries(path_a, sfx_filter=False)   # EL has no speaker field
    b_entries = _load_entries(path_b, sfx_filter=sfx_filter)

    print(f"Loaded {len(a_entries)} A-entries from {path_a.name}")
    print(f"Loaded {len(b_entries)} B-entries from {path_b.name}")
    print("Matching... ", end="", flush=True)

    pairs, a_orphans, b_orphans = _match(a_entries, b_entries, args.min_match)
    print("done.")

    result = _build_output(
        pairs, a_orphans, b_orphans,
        a_entries, b_entries,
        args.label_a, args.label_b,
        args.threshold,
        path_a, path_b,
    )

    _print_report(result, verbose=args.verbose)

    if args.output:
        out = Path(args.output)
        out.parent.mkdir(parents=True, exist_ok=True)
        with open(out, "w", encoding="utf-8") as f:
            json.dump(result, f, indent=2)
        print(f"\nWritten: {out}")