hermes-agent/plugins/teams_pipeline/meetings.py
Dilee 07bbd93337 feat(teams-pipeline): add plugin runtime and operator cli
Third slice of the Microsoft Teams meeting pipeline stack, salvaged
onto current main. Adds the standalone teams_pipeline plugin that
consumes Graph change notifications from the webhook listener,
resolves meeting artifacts (transcript first, recording + STT fallback
later), persists job state in a durable store, and exposes an operator
CLI for inspection, replay, subscription management, and validation.

Design choices follow maintainer review feedback on PR #19815:

- Standalone plugin rather than bolted-on core surface
  (plugins/teams_pipeline/, kind: standalone in plugin.yaml).
- Zero new model tools. The agent drives the pipeline by invoking
  the operator CLI via the terminal tool, guided by the skill that
  ships with a follow-up PR.
- Reuses the existing msgraph_webhook gateway platform for Graph
  ingress. Pipeline runtime is wired in via bind_gateway_runtime and
  gated on plugins.enabled so gateways that don't run the plugin
  boot cleanly.

Additions:

- plugins/teams_pipeline/: runtime (gateway wiring + config builder),
  pipeline core, durable SQLite store, subscription maintenance
  helpers, Graph artifact resolution, operator CLI (list, show,
  run/replay, fetch dry-run, subscriptions list, subscribe,
  renew-subscription, delete-subscription, maintain-subscriptions,
  token-health, validate).
- hermes_cli/main.py: second-pass plugin CLI discovery so any
  standalone plugin registered via ctx.register_cli_command()
  outside the memory-plugin convention path gets its subcommand
  wired into argparse without touching core.
- gateway/run.py: _teams_pipeline_plugin_enabled() config gate,
  _wire_teams_pipeline_runtime() binding after adapter setup, and
  the two runner attributes used by the runtime.

Credit to @dlkakbs for the entire plugin implementation.
2026-05-08 11:18:14 -07:00

333 lines
12 KiB
Python

"""Graph-backed Teams meeting helpers for the plugin runtime."""
from __future__ import annotations
import tempfile
from pathlib import Path
from typing import Any
from urllib.parse import quote
from plugins.teams_pipeline.models import MeetingArtifact, TeamsMeetingRef
from tools.microsoft_graph_client import MicrosoftGraphAPIError, MicrosoftGraphClient
class TeamsMeetingError(RuntimeError):
"""Base class for Teams meeting pipeline failures."""
class TeamsMeetingNotFoundError(TeamsMeetingError):
"""Raised when the meeting cannot be resolved from Graph."""
class TeamsMeetingArtifactNotFoundError(TeamsMeetingError):
"""Raised when a transcript or recording cannot be found."""
class TeamsMeetingPermissionError(TeamsMeetingError):
"""Raised when Graph access is denied for the requested resource."""
def _meeting_path(meeting_ref: TeamsMeetingRef | str) -> str:
meeting_id = meeting_ref.meeting_id if isinstance(meeting_ref, TeamsMeetingRef) else str(meeting_ref)
return f"/communications/onlineMeetings/{quote(meeting_id, safe='')}"
def _wrap_graph_error(exc: MicrosoftGraphAPIError, *, missing_message: str) -> TeamsMeetingError:
if exc.status_code in (401, 403):
return TeamsMeetingPermissionError(str(exc))
if exc.status_code == 404:
return TeamsMeetingNotFoundError(missing_message)
return TeamsMeetingError(str(exc))
def _parse_organizer_user_id(payload: dict[str, Any]) -> str | None:
organizer = payload.get("organizer")
if not isinstance(organizer, dict):
return None
identity = organizer.get("identity")
if not isinstance(identity, dict):
return None
user = identity.get("user")
if not isinstance(user, dict):
return None
return user.get("id")
def _parse_thread_id(payload: dict[str, Any]) -> str | None:
chat = payload.get("chatInfo")
if isinstance(chat, dict):
thread_id = chat.get("threadId")
if thread_id:
return str(thread_id)
return payload.get("threadId")
def _normalize_meeting_ref(payload: dict[str, Any], *, tenant_id: str | None = None) -> TeamsMeetingRef:
metadata = {
key: payload.get(key)
for key in ("subject", "startDateTime", "endDateTime", "createdDateTime")
if payload.get(key) is not None
}
participants = payload.get("participants")
if participants is not None:
metadata["participants"] = participants
return TeamsMeetingRef(
meeting_id=str(payload.get("id") or "").strip(),
organizer_user_id=_parse_organizer_user_id(payload),
join_web_url=payload.get("joinWebUrl"),
calendar_event_id=payload.get("calendarEventId"),
thread_id=_parse_thread_id(payload),
tenant_id=tenant_id or payload.get("tenantId"),
metadata=metadata,
)
def _normalize_artifact(
artifact_type: str,
payload: dict[str, Any],
*,
default_source_url: str | None = None,
) -> MeetingArtifact:
metadata = dict(payload)
download_url = (
payload.get("@microsoft.graph.downloadUrl")
or payload.get("downloadUrl")
or payload.get("recordingContentUrl")
or payload.get("transcriptContentUrl")
)
source_url = payload.get("webUrl") or payload.get("contentUrl") or default_source_url
return MeetingArtifact(
artifact_type=artifact_type, # type: ignore[arg-type]
artifact_id=str(payload.get("id") or "").strip(),
display_name=payload.get("displayName") or payload.get("name"),
content_type=payload.get("contentType") or payload.get("fileMimeType"),
source_url=source_url,
download_url=download_url,
created_at=payload.get("createdDateTime"),
available_at=payload.get("lastModifiedDateTime") or payload.get("meetingEndDateTime"),
size_bytes=payload.get("size"),
metadata=metadata,
)
def _transcript_sort_key(artifact: MeetingArtifact) -> tuple[int, int, str]:
status = str(artifact.metadata.get("status") or "").lower()
has_download = int(bool(artifact.download_url or artifact.source_url))
is_completed = int(status in {"available", "completed", "succeeded"})
timestamp = ""
if artifact.available_at is not None:
timestamp = artifact.available_at.isoformat()
elif artifact.created_at is not None:
timestamp = artifact.created_at.isoformat()
return (is_completed, has_download, timestamp)
def _recording_download_path(meeting_ref: TeamsMeetingRef, artifact: MeetingArtifact) -> str:
if artifact.download_url:
return artifact.download_url
return f"{_meeting_path(meeting_ref)}/recordings/{quote(artifact.artifact_id, safe='')}/content"
def _transcript_download_path(meeting_ref: TeamsMeetingRef, artifact: MeetingArtifact) -> str:
if artifact.download_url:
return artifact.download_url
return f"{_meeting_path(meeting_ref)}/transcripts/{quote(artifact.artifact_id, safe='')}/content"
async def resolve_meeting_reference(
client: MicrosoftGraphClient,
*,
meeting_id: str | None = None,
join_web_url: str | None = None,
tenant_id: str | None = None,
) -> TeamsMeetingRef:
if meeting_id:
try:
payload = await client.get_json(_meeting_path(meeting_id))
except MicrosoftGraphAPIError as exc:
raise _wrap_graph_error(exc, missing_message=f"Teams meeting not found: {meeting_id}") from exc
if not isinstance(payload, dict) or not payload.get("id"):
raise TeamsMeetingNotFoundError(f"Teams meeting not found: {meeting_id}")
return _normalize_meeting_ref(payload, tenant_id=tenant_id)
if join_web_url:
escaped_join_url = join_web_url.replace("'", "''")
try:
payload = await client.get_json(
"/communications/onlineMeetings",
params={"$filter": f"JoinWebUrl eq '{escaped_join_url}'"},
)
except MicrosoftGraphAPIError as exc:
raise _wrap_graph_error(
exc,
missing_message=f"Teams meeting not found for join URL: {join_web_url}",
) from exc
candidates = payload.get("value") if isinstance(payload, dict) else None
if not isinstance(candidates, list) or not candidates:
raise TeamsMeetingNotFoundError(f"Teams meeting not found for join URL: {join_web_url}")
return _normalize_meeting_ref(candidates[0], tenant_id=tenant_id)
raise ValueError("Either meeting_id or join_web_url is required.")
async def list_transcript_artifacts(
client: MicrosoftGraphClient,
meeting_ref: TeamsMeetingRef,
) -> list[MeetingArtifact]:
try:
payloads = await client.collect_paginated(f"{_meeting_path(meeting_ref)}/transcripts")
except MicrosoftGraphAPIError as exc:
raise _wrap_graph_error(
exc,
missing_message=f"No transcripts found for Teams meeting {meeting_ref.meeting_id}",
) from exc
return [_normalize_artifact("transcript", payload) for payload in payloads if isinstance(payload, dict)]
def select_preferred_transcript(candidates: list[MeetingArtifact]) -> MeetingArtifact | None:
transcripts = [candidate for candidate in candidates if candidate.artifact_type == "transcript"]
if not transcripts:
return None
return sorted(transcripts, key=_transcript_sort_key, reverse=True)[0]
async def download_transcript_text(
client: MicrosoftGraphClient,
meeting_ref: TeamsMeetingRef,
transcript: MeetingArtifact,
*,
encoding: str = "utf-8",
) -> str:
suffix = Path(transcript.display_name or "transcript.vtt").suffix or ".txt"
with tempfile.NamedTemporaryFile(prefix="teams-transcript-", suffix=suffix, delete=False) as handle:
destination = Path(handle.name)
try:
await client.download_to_file(_transcript_download_path(meeting_ref, transcript), destination)
text = destination.read_text(encoding=encoding).strip()
except MicrosoftGraphAPIError as exc:
raise _wrap_graph_error(
exc,
missing_message=(
f"Transcript {transcript.artifact_id} not found for meeting {meeting_ref.meeting_id}"
),
) from exc
finally:
try:
destination.unlink(missing_ok=True)
except OSError:
pass
if not text:
raise TeamsMeetingArtifactNotFoundError(
f"Transcript {transcript.artifact_id} for meeting {meeting_ref.meeting_id} was empty."
)
return text
async def fetch_preferred_transcript_text(
client: MicrosoftGraphClient,
meeting_ref: TeamsMeetingRef,
) -> tuple[MeetingArtifact | None, str | None]:
transcripts = await list_transcript_artifacts(client, meeting_ref)
transcript = select_preferred_transcript(transcripts)
if transcript is None:
return None, None
try:
return transcript, await download_transcript_text(client, meeting_ref, transcript)
except TeamsMeetingArtifactNotFoundError:
return None, None
async def list_recording_artifacts(
client: MicrosoftGraphClient,
meeting_ref: TeamsMeetingRef,
) -> list[MeetingArtifact]:
try:
payloads = await client.collect_paginated(f"{_meeting_path(meeting_ref)}/recordings")
except MicrosoftGraphAPIError as exc:
raise _wrap_graph_error(
exc,
missing_message=f"No recordings found for Teams meeting {meeting_ref.meeting_id}",
) from exc
return [_normalize_artifact("recording", payload) for payload in payloads if isinstance(payload, dict)]
async def download_recording_artifact(
client: MicrosoftGraphClient,
meeting_ref: TeamsMeetingRef,
recording: MeetingArtifact,
destination: str | Path,
) -> dict[str, Any]:
destination_path = Path(destination)
try:
result = await client.download_to_file(
_recording_download_path(meeting_ref, recording),
destination_path,
)
except MicrosoftGraphAPIError as exc:
raise _wrap_graph_error(
exc,
missing_message=f"Recording {recording.artifact_id} not found for meeting {meeting_ref.meeting_id}",
) from exc
return {
"artifact": recording.to_dict(),
"path": str(destination_path),
"size_bytes": result.get("size_bytes") or recording.size_bytes,
"content_type": result.get("content_type") or recording.content_type,
}
async def fetch_call_record_artifact(
client: MicrosoftGraphClient,
*,
call_record_id: str,
allow_permission_errors: bool = True,
) -> MeetingArtifact | None:
try:
payload = await client.get_json(f"/communications/callRecords/{quote(call_record_id, safe='')}")
except MicrosoftGraphAPIError as exc:
if exc.status_code in (401, 403) and allow_permission_errors:
return None
if exc.status_code == 404:
return None
raise _wrap_graph_error(exc, missing_message=f"Call record not found: {call_record_id}") from exc
if not isinstance(payload, dict) or not payload.get("id"):
return None
metrics = {
"version": payload.get("version"),
"modalities": payload.get("modalities"),
"participant_count": len(payload.get("participants") or []),
"organizer": _parse_organizer_user_id(payload),
}
sessions = payload.get("sessions") or []
if sessions:
metrics["session_count"] = len(sessions)
return MeetingArtifact(
artifact_type="call_record",
artifact_id=str(payload["id"]),
display_name=payload.get("type") or "call_record",
source_url=payload.get("webUrl"),
created_at=payload.get("startDateTime"),
available_at=payload.get("endDateTime"),
metadata={"call_record": payload, "metrics": metrics},
)
async def enrich_meeting_with_call_record(
client: MicrosoftGraphClient,
meeting_ref: TeamsMeetingRef,
*,
call_record_id: str | None = None,
allow_permission_errors: bool = True,
) -> MeetingArtifact | None:
resolved_call_record_id = call_record_id or meeting_ref.metadata.get("call_record_id")
if not resolved_call_record_id:
return None
return await fetch_call_record_artifact(
client,
call_record_id=str(resolved_call_record_id),
allow_permission_errors=allow_permission_errors,
)