mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-10 03:22:05 +00:00
Third slice of the Microsoft Teams meeting pipeline stack, salvaged onto current main. Adds the standalone teams_pipeline plugin that consumes Graph change notifications from the webhook listener, resolves meeting artifacts (transcript first, recording + STT fallback later), persists job state in a durable store, and exposes an operator CLI for inspection, replay, subscription management, and validation. Design choices follow maintainer review feedback on PR #19815: - Standalone plugin rather than bolted-on core surface (plugins/teams_pipeline/, kind: standalone in plugin.yaml). - Zero new model tools. The agent drives the pipeline by invoking the operator CLI via the terminal tool, guided by the skill that ships with a follow-up PR. - Reuses the existing msgraph_webhook gateway platform for Graph ingress. Pipeline runtime is wired in via bind_gateway_runtime and gated on plugins.enabled so gateways that don't run the plugin boot cleanly. Additions: - plugins/teams_pipeline/: runtime (gateway wiring + config builder), pipeline core, durable SQLite store, subscription maintenance helpers, Graph artifact resolution, operator CLI (list, show, run/replay, fetch dry-run, subscriptions list, subscribe, renew-subscription, delete-subscription, maintain-subscriptions, token-health, validate). - hermes_cli/main.py: second-pass plugin CLI discovery so any standalone plugin registered via ctx.register_cli_command() outside the memory-plugin convention path gets its subcommand wired into argparse without touching core. - gateway/run.py: _teams_pipeline_plugin_enabled() config gate, _wire_teams_pipeline_runtime() binding after adapter setup, and the two runner attributes used by the runtime. Credit to @dlkakbs for the entire plugin implementation.
333 lines
12 KiB
Python
333 lines
12 KiB
Python
"""Graph-backed Teams meeting helpers for the plugin runtime."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from urllib.parse import quote
|
|
|
|
from plugins.teams_pipeline.models import MeetingArtifact, TeamsMeetingRef
|
|
from tools.microsoft_graph_client import MicrosoftGraphAPIError, MicrosoftGraphClient
|
|
|
|
|
|
class TeamsMeetingError(RuntimeError):
|
|
"""Base class for Teams meeting pipeline failures."""
|
|
|
|
|
|
class TeamsMeetingNotFoundError(TeamsMeetingError):
|
|
"""Raised when the meeting cannot be resolved from Graph."""
|
|
|
|
|
|
class TeamsMeetingArtifactNotFoundError(TeamsMeetingError):
|
|
"""Raised when a transcript or recording cannot be found."""
|
|
|
|
|
|
class TeamsMeetingPermissionError(TeamsMeetingError):
|
|
"""Raised when Graph access is denied for the requested resource."""
|
|
|
|
|
|
def _meeting_path(meeting_ref: TeamsMeetingRef | str) -> str:
|
|
meeting_id = meeting_ref.meeting_id if isinstance(meeting_ref, TeamsMeetingRef) else str(meeting_ref)
|
|
return f"/communications/onlineMeetings/{quote(meeting_id, safe='')}"
|
|
|
|
|
|
def _wrap_graph_error(exc: MicrosoftGraphAPIError, *, missing_message: str) -> TeamsMeetingError:
|
|
if exc.status_code in (401, 403):
|
|
return TeamsMeetingPermissionError(str(exc))
|
|
if exc.status_code == 404:
|
|
return TeamsMeetingNotFoundError(missing_message)
|
|
return TeamsMeetingError(str(exc))
|
|
|
|
|
|
def _parse_organizer_user_id(payload: dict[str, Any]) -> str | None:
|
|
organizer = payload.get("organizer")
|
|
if not isinstance(organizer, dict):
|
|
return None
|
|
identity = organizer.get("identity")
|
|
if not isinstance(identity, dict):
|
|
return None
|
|
user = identity.get("user")
|
|
if not isinstance(user, dict):
|
|
return None
|
|
return user.get("id")
|
|
|
|
|
|
def _parse_thread_id(payload: dict[str, Any]) -> str | None:
|
|
chat = payload.get("chatInfo")
|
|
if isinstance(chat, dict):
|
|
thread_id = chat.get("threadId")
|
|
if thread_id:
|
|
return str(thread_id)
|
|
return payload.get("threadId")
|
|
|
|
|
|
def _normalize_meeting_ref(payload: dict[str, Any], *, tenant_id: str | None = None) -> TeamsMeetingRef:
|
|
metadata = {
|
|
key: payload.get(key)
|
|
for key in ("subject", "startDateTime", "endDateTime", "createdDateTime")
|
|
if payload.get(key) is not None
|
|
}
|
|
participants = payload.get("participants")
|
|
if participants is not None:
|
|
metadata["participants"] = participants
|
|
return TeamsMeetingRef(
|
|
meeting_id=str(payload.get("id") or "").strip(),
|
|
organizer_user_id=_parse_organizer_user_id(payload),
|
|
join_web_url=payload.get("joinWebUrl"),
|
|
calendar_event_id=payload.get("calendarEventId"),
|
|
thread_id=_parse_thread_id(payload),
|
|
tenant_id=tenant_id or payload.get("tenantId"),
|
|
metadata=metadata,
|
|
)
|
|
|
|
|
|
def _normalize_artifact(
|
|
artifact_type: str,
|
|
payload: dict[str, Any],
|
|
*,
|
|
default_source_url: str | None = None,
|
|
) -> MeetingArtifact:
|
|
metadata = dict(payload)
|
|
download_url = (
|
|
payload.get("@microsoft.graph.downloadUrl")
|
|
or payload.get("downloadUrl")
|
|
or payload.get("recordingContentUrl")
|
|
or payload.get("transcriptContentUrl")
|
|
)
|
|
source_url = payload.get("webUrl") or payload.get("contentUrl") or default_source_url
|
|
return MeetingArtifact(
|
|
artifact_type=artifact_type, # type: ignore[arg-type]
|
|
artifact_id=str(payload.get("id") or "").strip(),
|
|
display_name=payload.get("displayName") or payload.get("name"),
|
|
content_type=payload.get("contentType") or payload.get("fileMimeType"),
|
|
source_url=source_url,
|
|
download_url=download_url,
|
|
created_at=payload.get("createdDateTime"),
|
|
available_at=payload.get("lastModifiedDateTime") or payload.get("meetingEndDateTime"),
|
|
size_bytes=payload.get("size"),
|
|
metadata=metadata,
|
|
)
|
|
|
|
|
|
def _transcript_sort_key(artifact: MeetingArtifact) -> tuple[int, int, str]:
|
|
status = str(artifact.metadata.get("status") or "").lower()
|
|
has_download = int(bool(artifact.download_url or artifact.source_url))
|
|
is_completed = int(status in {"available", "completed", "succeeded"})
|
|
timestamp = ""
|
|
if artifact.available_at is not None:
|
|
timestamp = artifact.available_at.isoformat()
|
|
elif artifact.created_at is not None:
|
|
timestamp = artifact.created_at.isoformat()
|
|
return (is_completed, has_download, timestamp)
|
|
|
|
|
|
def _recording_download_path(meeting_ref: TeamsMeetingRef, artifact: MeetingArtifact) -> str:
|
|
if artifact.download_url:
|
|
return artifact.download_url
|
|
return f"{_meeting_path(meeting_ref)}/recordings/{quote(artifact.artifact_id, safe='')}/content"
|
|
|
|
|
|
def _transcript_download_path(meeting_ref: TeamsMeetingRef, artifact: MeetingArtifact) -> str:
|
|
if artifact.download_url:
|
|
return artifact.download_url
|
|
return f"{_meeting_path(meeting_ref)}/transcripts/{quote(artifact.artifact_id, safe='')}/content"
|
|
|
|
|
|
async def resolve_meeting_reference(
|
|
client: MicrosoftGraphClient,
|
|
*,
|
|
meeting_id: str | None = None,
|
|
join_web_url: str | None = None,
|
|
tenant_id: str | None = None,
|
|
) -> TeamsMeetingRef:
|
|
if meeting_id:
|
|
try:
|
|
payload = await client.get_json(_meeting_path(meeting_id))
|
|
except MicrosoftGraphAPIError as exc:
|
|
raise _wrap_graph_error(exc, missing_message=f"Teams meeting not found: {meeting_id}") from exc
|
|
if not isinstance(payload, dict) or not payload.get("id"):
|
|
raise TeamsMeetingNotFoundError(f"Teams meeting not found: {meeting_id}")
|
|
return _normalize_meeting_ref(payload, tenant_id=tenant_id)
|
|
|
|
if join_web_url:
|
|
escaped_join_url = join_web_url.replace("'", "''")
|
|
try:
|
|
payload = await client.get_json(
|
|
"/communications/onlineMeetings",
|
|
params={"$filter": f"JoinWebUrl eq '{escaped_join_url}'"},
|
|
)
|
|
except MicrosoftGraphAPIError as exc:
|
|
raise _wrap_graph_error(
|
|
exc,
|
|
missing_message=f"Teams meeting not found for join URL: {join_web_url}",
|
|
) from exc
|
|
candidates = payload.get("value") if isinstance(payload, dict) else None
|
|
if not isinstance(candidates, list) or not candidates:
|
|
raise TeamsMeetingNotFoundError(f"Teams meeting not found for join URL: {join_web_url}")
|
|
return _normalize_meeting_ref(candidates[0], tenant_id=tenant_id)
|
|
|
|
raise ValueError("Either meeting_id or join_web_url is required.")
|
|
|
|
|
|
async def list_transcript_artifacts(
|
|
client: MicrosoftGraphClient,
|
|
meeting_ref: TeamsMeetingRef,
|
|
) -> list[MeetingArtifact]:
|
|
try:
|
|
payloads = await client.collect_paginated(f"{_meeting_path(meeting_ref)}/transcripts")
|
|
except MicrosoftGraphAPIError as exc:
|
|
raise _wrap_graph_error(
|
|
exc,
|
|
missing_message=f"No transcripts found for Teams meeting {meeting_ref.meeting_id}",
|
|
) from exc
|
|
return [_normalize_artifact("transcript", payload) for payload in payloads if isinstance(payload, dict)]
|
|
|
|
|
|
def select_preferred_transcript(candidates: list[MeetingArtifact]) -> MeetingArtifact | None:
|
|
transcripts = [candidate for candidate in candidates if candidate.artifact_type == "transcript"]
|
|
if not transcripts:
|
|
return None
|
|
return sorted(transcripts, key=_transcript_sort_key, reverse=True)[0]
|
|
|
|
|
|
async def download_transcript_text(
|
|
client: MicrosoftGraphClient,
|
|
meeting_ref: TeamsMeetingRef,
|
|
transcript: MeetingArtifact,
|
|
*,
|
|
encoding: str = "utf-8",
|
|
) -> str:
|
|
suffix = Path(transcript.display_name or "transcript.vtt").suffix or ".txt"
|
|
with tempfile.NamedTemporaryFile(prefix="teams-transcript-", suffix=suffix, delete=False) as handle:
|
|
destination = Path(handle.name)
|
|
try:
|
|
await client.download_to_file(_transcript_download_path(meeting_ref, transcript), destination)
|
|
text = destination.read_text(encoding=encoding).strip()
|
|
except MicrosoftGraphAPIError as exc:
|
|
raise _wrap_graph_error(
|
|
exc,
|
|
missing_message=(
|
|
f"Transcript {transcript.artifact_id} not found for meeting {meeting_ref.meeting_id}"
|
|
),
|
|
) from exc
|
|
finally:
|
|
try:
|
|
destination.unlink(missing_ok=True)
|
|
except OSError:
|
|
pass
|
|
|
|
if not text:
|
|
raise TeamsMeetingArtifactNotFoundError(
|
|
f"Transcript {transcript.artifact_id} for meeting {meeting_ref.meeting_id} was empty."
|
|
)
|
|
return text
|
|
|
|
|
|
async def fetch_preferred_transcript_text(
|
|
client: MicrosoftGraphClient,
|
|
meeting_ref: TeamsMeetingRef,
|
|
) -> tuple[MeetingArtifact | None, str | None]:
|
|
transcripts = await list_transcript_artifacts(client, meeting_ref)
|
|
transcript = select_preferred_transcript(transcripts)
|
|
if transcript is None:
|
|
return None, None
|
|
try:
|
|
return transcript, await download_transcript_text(client, meeting_ref, transcript)
|
|
except TeamsMeetingArtifactNotFoundError:
|
|
return None, None
|
|
|
|
|
|
async def list_recording_artifacts(
|
|
client: MicrosoftGraphClient,
|
|
meeting_ref: TeamsMeetingRef,
|
|
) -> list[MeetingArtifact]:
|
|
try:
|
|
payloads = await client.collect_paginated(f"{_meeting_path(meeting_ref)}/recordings")
|
|
except MicrosoftGraphAPIError as exc:
|
|
raise _wrap_graph_error(
|
|
exc,
|
|
missing_message=f"No recordings found for Teams meeting {meeting_ref.meeting_id}",
|
|
) from exc
|
|
return [_normalize_artifact("recording", payload) for payload in payloads if isinstance(payload, dict)]
|
|
|
|
|
|
async def download_recording_artifact(
|
|
client: MicrosoftGraphClient,
|
|
meeting_ref: TeamsMeetingRef,
|
|
recording: MeetingArtifact,
|
|
destination: str | Path,
|
|
) -> dict[str, Any]:
|
|
destination_path = Path(destination)
|
|
try:
|
|
result = await client.download_to_file(
|
|
_recording_download_path(meeting_ref, recording),
|
|
destination_path,
|
|
)
|
|
except MicrosoftGraphAPIError as exc:
|
|
raise _wrap_graph_error(
|
|
exc,
|
|
missing_message=f"Recording {recording.artifact_id} not found for meeting {meeting_ref.meeting_id}",
|
|
) from exc
|
|
return {
|
|
"artifact": recording.to_dict(),
|
|
"path": str(destination_path),
|
|
"size_bytes": result.get("size_bytes") or recording.size_bytes,
|
|
"content_type": result.get("content_type") or recording.content_type,
|
|
}
|
|
|
|
|
|
async def fetch_call_record_artifact(
|
|
client: MicrosoftGraphClient,
|
|
*,
|
|
call_record_id: str,
|
|
allow_permission_errors: bool = True,
|
|
) -> MeetingArtifact | None:
|
|
try:
|
|
payload = await client.get_json(f"/communications/callRecords/{quote(call_record_id, safe='')}")
|
|
except MicrosoftGraphAPIError as exc:
|
|
if exc.status_code in (401, 403) and allow_permission_errors:
|
|
return None
|
|
if exc.status_code == 404:
|
|
return None
|
|
raise _wrap_graph_error(exc, missing_message=f"Call record not found: {call_record_id}") from exc
|
|
|
|
if not isinstance(payload, dict) or not payload.get("id"):
|
|
return None
|
|
|
|
metrics = {
|
|
"version": payload.get("version"),
|
|
"modalities": payload.get("modalities"),
|
|
"participant_count": len(payload.get("participants") or []),
|
|
"organizer": _parse_organizer_user_id(payload),
|
|
}
|
|
sessions = payload.get("sessions") or []
|
|
if sessions:
|
|
metrics["session_count"] = len(sessions)
|
|
|
|
return MeetingArtifact(
|
|
artifact_type="call_record",
|
|
artifact_id=str(payload["id"]),
|
|
display_name=payload.get("type") or "call_record",
|
|
source_url=payload.get("webUrl"),
|
|
created_at=payload.get("startDateTime"),
|
|
available_at=payload.get("endDateTime"),
|
|
metadata={"call_record": payload, "metrics": metrics},
|
|
)
|
|
|
|
|
|
async def enrich_meeting_with_call_record(
|
|
client: MicrosoftGraphClient,
|
|
meeting_ref: TeamsMeetingRef,
|
|
*,
|
|
call_record_id: str | None = None,
|
|
allow_permission_errors: bool = True,
|
|
) -> MeetingArtifact | None:
|
|
resolved_call_record_id = call_record_id or meeting_ref.metadata.get("call_record_id")
|
|
if not resolved_call_record_id:
|
|
return None
|
|
return await fetch_call_record_artifact(
|
|
client,
|
|
call_record_id=str(resolved_call_record_id),
|
|
allow_permission_errors=allow_permission_errors,
|
|
)
|