mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
* feat(skills): add memento-flashcards skill * docs(skills): clarify memento-flashcards interaction model * fix: use HERMES_HOME env var for profile-safe data path --------- Co-authored-by: Magnus Ahmad <magnus.ahmad@gmail.com>
88 lines
2.3 KiB
Python
88 lines
2.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Fetch YouTube transcripts for Memento quiz generation.
|
|
|
|
Requires: pip install youtube-transcript-api
|
|
The quiz question *generation* is done by the agent's LLM — this script only fetches transcripts.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
|
|
|
|
def _out(obj: object) -> None:
|
|
json.dump(obj, sys.stdout, indent=2, ensure_ascii=False)
|
|
sys.stdout.write("\n")
|
|
|
|
|
|
def _normalize_segments(segments: list) -> str:
|
|
parts = []
|
|
for seg in segments:
|
|
text = str(seg.get("text", "")).strip()
|
|
if text:
|
|
parts.append(text)
|
|
return re.sub(r"\s+", " ", " ".join(parts)).strip()
|
|
|
|
|
|
def cmd_fetch(args: argparse.Namespace) -> None:
|
|
try:
|
|
import youtube_transcript_api # noqa: F811
|
|
except ImportError:
|
|
_out({
|
|
"ok": False,
|
|
"error": "missing_dependency",
|
|
"message": "Run: pip install youtube-transcript-api",
|
|
})
|
|
sys.exit(1)
|
|
|
|
video_id = args.video_id
|
|
languages = ["en", "en-US", "en-GB", "en-CA", "en-AU"]
|
|
|
|
api = youtube_transcript_api.YouTubeTranscriptApi()
|
|
try:
|
|
raw = api.fetch(video_id, languages=languages)
|
|
except Exception as exc:
|
|
error_type = type(exc).__name__
|
|
_out({
|
|
"ok": False,
|
|
"error": "transcript_unavailable",
|
|
"error_type": error_type,
|
|
"message": f"Could not fetch transcript for {video_id}: {exc}",
|
|
})
|
|
sys.exit(1)
|
|
|
|
segments = raw
|
|
if hasattr(raw, "to_raw_data"):
|
|
segments = raw.to_raw_data()
|
|
|
|
text = _normalize_segments(segments)
|
|
if not text:
|
|
_out({
|
|
"ok": False,
|
|
"error": "empty_transcript",
|
|
"message": f"Transcript for {video_id} contained no usable text.",
|
|
})
|
|
sys.exit(1)
|
|
|
|
_out({
|
|
"ok": True,
|
|
"video_id": video_id,
|
|
"transcript": text,
|
|
})
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Memento YouTube transcript fetcher")
|
|
sub = parser.add_subparsers(dest="command", required=True)
|
|
|
|
p_fetch = sub.add_parser("fetch", help="Fetch transcript for a video")
|
|
p_fetch.add_argument("video_id", help="YouTube video ID")
|
|
|
|
args = parser.parse_args()
|
|
if args.command == "fetch":
|
|
cmd_fetch(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|