mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-14 04:02:26 +00:00
feat(teams-pipeline): add plugin runtime and operator cli
Third slice of the Microsoft Teams meeting pipeline stack, salvaged onto current main. Adds the standalone teams_pipeline plugin that consumes Graph change notifications from the webhook listener, resolves meeting artifacts (transcript first, recording + STT fallback later), persists job state in a durable store, and exposes an operator CLI for inspection, replay, subscription management, and validation. Design choices follow maintainer review feedback on PR #19815: - Standalone plugin rather than bolted-on core surface (plugins/teams_pipeline/, kind: standalone in plugin.yaml). - Zero new model tools. The agent drives the pipeline by invoking the operator CLI via the terminal tool, guided by the skill that ships with a follow-up PR. - Reuses the existing msgraph_webhook gateway platform for Graph ingress. Pipeline runtime is wired in via bind_gateway_runtime and gated on plugins.enabled so gateways that don't run the plugin boot cleanly. Additions: - plugins/teams_pipeline/: runtime (gateway wiring + config builder), pipeline core, durable SQLite store, subscription maintenance helpers, Graph artifact resolution, operator CLI (list, show, run/replay, fetch dry-run, subscriptions list, subscribe, renew-subscription, delete-subscription, maintain-subscriptions, token-health, validate). - hermes_cli/main.py: second-pass plugin CLI discovery so any standalone plugin registered via ctx.register_cli_command() outside the memory-plugin convention path gets its subcommand wired into argparse without touching core. - gateway/run.py: _teams_pipeline_plugin_enabled() config gate, _wire_teams_pipeline_runtime() binding after adapter setup, and the two runner attributes used by the runtime. Credit to @dlkakbs for the entire plugin implementation.
This commit is contained in:
parent
ea86714cc0
commit
07bbd93337
14 changed files with 3332 additions and 1 deletions
688
plugins/teams_pipeline/pipeline.py
Normal file
688
plugins/teams_pipeline/pipeline.py
Normal file
|
|
@ -0,0 +1,688 @@
|
|||
"""Pipeline orchestration for Microsoft Teams meeting summaries."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
from hermes_constants import get_hermes_home
|
||||
from plugins.teams_pipeline.meetings import (
|
||||
TeamsMeetingArtifactNotFoundError,
|
||||
download_recording_artifact,
|
||||
enrich_meeting_with_call_record,
|
||||
fetch_preferred_transcript_text,
|
||||
list_recording_artifacts,
|
||||
resolve_meeting_reference,
|
||||
)
|
||||
from plugins.teams_pipeline.models import (
|
||||
MeetingArtifact,
|
||||
TeamsMeetingPipelineJob,
|
||||
TeamsMeetingRef,
|
||||
TeamsMeetingSummaryPayload,
|
||||
)
|
||||
from plugins.teams_pipeline.store import TeamsPipelineStore
|
||||
from tools.transcription_tools import transcribe_audio
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TERMINAL_PIPELINE_STATES = {"completed", "failed", "retry_scheduled"}
|
||||
ACTIVE_PIPELINE_STATES = {
|
||||
"received",
|
||||
"resolving_meeting",
|
||||
"fetching_transcript",
|
||||
"downloading_recording",
|
||||
"transcribing_audio",
|
||||
"summarizing",
|
||||
"writing_notion",
|
||||
"writing_linear",
|
||||
"sending_teams",
|
||||
}
|
||||
|
||||
|
||||
class TeamsPipelineError(RuntimeError):
|
||||
"""Base class for Teams meeting pipeline failures."""
|
||||
|
||||
|
||||
class TeamsPipelineRetryableError(TeamsPipelineError):
|
||||
"""Raised when the pipeline should be retried later."""
|
||||
|
||||
|
||||
class TeamsPipelineSinkError(TeamsPipelineError):
|
||||
"""Raised when an output sink fails."""
|
||||
|
||||
|
||||
class TeamsPipelineArtifactNotFoundError(TeamsPipelineRetryableError):
|
||||
"""Raised when meeting artifacts are not yet available."""
|
||||
|
||||
|
||||
TranscribeFn = Callable[[str, Optional[str]], dict[str, Any]]
|
||||
SummarizeFn = Callable[..., Awaitable[dict[str, Any] | TeamsMeetingSummaryPayload]]
|
||||
SinkFn = Callable[
|
||||
[TeamsMeetingSummaryPayload, dict[str, Any], Optional[dict[str, Any]]],
|
||||
Awaitable[dict[str, Any]],
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TeamsPipelineConfig:
|
||||
transcript_preferred: bool = True
|
||||
transcript_required: bool = False
|
||||
transcription_fallback: bool = True
|
||||
stt_model: str | None = None
|
||||
ffmpeg_extract_audio: bool = True
|
||||
transcript_min_chars: int = 80
|
||||
tmp_dir: Path | None = None
|
||||
notion: dict[str, Any] | None = None
|
||||
linear: dict[str, Any] | None = None
|
||||
teams_delivery: dict[str, Any] | None = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, payload: Optional[dict[str, Any]]) -> "TeamsPipelineConfig":
|
||||
data = dict(payload or {})
|
||||
tmp_dir = data.get("tmp_dir") or data.get("tmpDir")
|
||||
return cls(
|
||||
transcript_preferred=bool(data.get("transcript_preferred", True)),
|
||||
transcript_required=bool(data.get("transcript_required", False)),
|
||||
transcription_fallback=bool(data.get("transcription_fallback", True)),
|
||||
stt_model=data.get("stt_model") or data.get("sttModel"),
|
||||
ffmpeg_extract_audio=bool(data.get("ffmpeg_extract_audio", True)),
|
||||
transcript_min_chars=int(data.get("transcript_min_chars", 80)),
|
||||
tmp_dir=Path(tmp_dir) if tmp_dir else None,
|
||||
notion=data.get("notion"),
|
||||
linear=data.get("linear"),
|
||||
teams_delivery=data.get("teams_delivery") or data.get("teamsDelivery"),
|
||||
)
|
||||
|
||||
|
||||
class NotionWriter:
|
||||
API_BASE = "https://api.notion.com/v1"
|
||||
API_VERSION = "2025-09-03"
|
||||
|
||||
def __init__(self, *, api_key: str | None = None, transport: httpx.AsyncBaseTransport | None = None) -> None:
|
||||
self.api_key = (api_key or os.getenv("NOTION_API_KEY", "")).strip()
|
||||
self._transport = transport
|
||||
|
||||
async def write_summary(
|
||||
self,
|
||||
payload: TeamsMeetingSummaryPayload,
|
||||
config: dict[str, Any],
|
||||
existing_record: Optional[dict[str, Any]] = None,
|
||||
) -> dict[str, Any]:
|
||||
if not self.api_key:
|
||||
raise TeamsPipelineSinkError("NOTION_API_KEY is not configured.")
|
||||
|
||||
database_id = str(config.get("database_id") or config.get("databaseId") or "").strip()
|
||||
page_id = (existing_record or {}).get("page_id")
|
||||
if not database_id and not page_id:
|
||||
raise TeamsPipelineSinkError("Notion sink requires database_id or an existing page_id.")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Notion-Version": self.API_VERSION,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
async with httpx.AsyncClient(timeout=30.0, transport=self._transport) as client:
|
||||
if page_id:
|
||||
response = await client.patch(
|
||||
f"{self.API_BASE}/pages/{page_id}",
|
||||
headers=headers,
|
||||
json={"properties": self._build_properties(payload, config)},
|
||||
)
|
||||
response.raise_for_status()
|
||||
record = response.json()
|
||||
else:
|
||||
response = await client.post(
|
||||
f"{self.API_BASE}/pages",
|
||||
headers=headers,
|
||||
json={
|
||||
"parent": {"database_id": database_id},
|
||||
"properties": self._build_properties(payload, config),
|
||||
"children": self._build_blocks(payload),
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
record = response.json()
|
||||
|
||||
return {"page_id": record["id"], "url": record.get("url")}
|
||||
|
||||
def _build_properties(self, payload: TeamsMeetingSummaryPayload, config: dict[str, Any]) -> dict[str, Any]:
|
||||
title_property = config.get("title_property", "Name")
|
||||
summary_property = config.get("summary_property")
|
||||
meeting_id_property = config.get("meeting_id_property")
|
||||
|
||||
properties: dict[str, Any] = {
|
||||
title_property: {
|
||||
"title": [{"text": {"content": payload.title or f"Meeting {payload.meeting_ref.meeting_id}"}}]
|
||||
}
|
||||
}
|
||||
if summary_property:
|
||||
properties[summary_property] = {
|
||||
"rich_text": [{"text": {"content": (payload.summary or "")[:1900]}}]
|
||||
}
|
||||
if meeting_id_property:
|
||||
properties[meeting_id_property] = {
|
||||
"rich_text": [{"text": {"content": payload.meeting_ref.meeting_id}}]
|
||||
}
|
||||
return properties
|
||||
|
||||
def _build_blocks(self, payload: TeamsMeetingSummaryPayload) -> list[dict[str, Any]]:
|
||||
sections = [
|
||||
("Summary", payload.summary or ""),
|
||||
("Key Decisions", "\n".join(f"- {item}" for item in payload.key_decisions)),
|
||||
("Action Items", "\n".join(f"- {item}" for item in payload.action_items)),
|
||||
("Risks", "\n".join(f"- {item}" for item in payload.risks)),
|
||||
]
|
||||
blocks: list[dict[str, Any]] = []
|
||||
for heading, body in sections:
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "heading_2",
|
||||
"heading_2": {"rich_text": [{"text": {"content": heading}}]},
|
||||
}
|
||||
)
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "paragraph",
|
||||
"paragraph": {"rich_text": [{"text": {"content": body or "None"}}]},
|
||||
}
|
||||
)
|
||||
return blocks
|
||||
|
||||
|
||||
class LinearWriter:
|
||||
API_URL = "https://api.linear.app/graphql"
|
||||
|
||||
def __init__(self, *, api_key: str | None = None, transport: httpx.AsyncBaseTransport | None = None) -> None:
|
||||
self.api_key = (api_key or os.getenv("LINEAR_API_KEY", "")).strip()
|
||||
self._transport = transport
|
||||
|
||||
async def write_summary(
|
||||
self,
|
||||
payload: TeamsMeetingSummaryPayload,
|
||||
config: dict[str, Any],
|
||||
existing_record: Optional[dict[str, Any]] = None,
|
||||
) -> dict[str, Any]:
|
||||
if not self.api_key:
|
||||
raise TeamsPipelineSinkError("LINEAR_API_KEY is not configured.")
|
||||
|
||||
headers = {"Authorization": self.api_key, "Content-Type": "application/json"}
|
||||
team_id = str(config.get("team_id") or config.get("teamId") or "").strip()
|
||||
title = payload.title or f"Meeting Summary: {payload.meeting_ref.meeting_id}"
|
||||
description = _render_summary_markdown(payload)
|
||||
existing_issue_id = (existing_record or {}).get("issue_id")
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, transport=self._transport) as client:
|
||||
if existing_issue_id:
|
||||
response = await client.post(
|
||||
self.API_URL,
|
||||
headers=headers,
|
||||
json={
|
||||
"query": (
|
||||
"mutation($id: String!, $input: IssueUpdateInput!) "
|
||||
"{ issueUpdate(id: $id, input: $input) { success issue { id identifier url } } }"
|
||||
),
|
||||
"variables": {
|
||||
"id": existing_issue_id,
|
||||
"input": {"title": title, "description": description},
|
||||
},
|
||||
},
|
||||
)
|
||||
else:
|
||||
if not team_id:
|
||||
raise TeamsPipelineSinkError("Linear sink requires team_id when creating a new issue.")
|
||||
response = await client.post(
|
||||
self.API_URL,
|
||||
headers=headers,
|
||||
json={
|
||||
"query": (
|
||||
"mutation($input: IssueCreateInput!) "
|
||||
"{ issueCreate(input: $input) { success issue { id identifier url } } }"
|
||||
),
|
||||
"variables": {"input": {"teamId": team_id, "title": title, "description": description}},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload_json = response.json()
|
||||
|
||||
issue = (
|
||||
(((payload_json.get("data") or {}).get("issueUpdate") or {}).get("issue"))
|
||||
or (((payload_json.get("data") or {}).get("issueCreate") or {}).get("issue"))
|
||||
)
|
||||
if not isinstance(issue, dict) or not issue.get("id"):
|
||||
raise TeamsPipelineSinkError(f"Linear write failed: {payload_json}")
|
||||
|
||||
return {"issue_id": issue["id"], "identifier": issue.get("identifier"), "url": issue.get("url")}
|
||||
|
||||
|
||||
class TeamsMeetingPipeline:
|
||||
"""Transcript-first Teams meeting pipeline with durable lifecycle state."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
graph_client: Any,
|
||||
store: TeamsPipelineStore,
|
||||
config: TeamsPipelineConfig | dict[str, Any] | None = None,
|
||||
transcribe_fn: TranscribeFn = transcribe_audio,
|
||||
summarize_fn: Optional[SummarizeFn] = None,
|
||||
notion_writer: Optional[NotionWriter] = None,
|
||||
linear_writer: Optional[LinearWriter] = None,
|
||||
teams_sender: Optional[SinkFn] = None,
|
||||
) -> None:
|
||||
self.graph_client = graph_client
|
||||
self.store = store
|
||||
self.config = config if isinstance(config, TeamsPipelineConfig) else TeamsPipelineConfig.from_dict(config)
|
||||
self.transcribe_fn = transcribe_fn
|
||||
self.summarize_fn = summarize_fn or self._generate_summary_payload
|
||||
self.notion_writer = notion_writer
|
||||
self.linear_writer = linear_writer
|
||||
self.teams_sender = teams_sender
|
||||
|
||||
def create_job_from_notification(self, notification: dict[str, Any]) -> TeamsMeetingPipelineJob:
|
||||
event_id = TeamsPipelineStore.build_notification_receipt_key(notification)
|
||||
self.store.record_notification_receipt(event_id, notification)
|
||||
existing_job = self._find_job_by_dedupe_key(event_id)
|
||||
if existing_job is not None:
|
||||
return existing_job
|
||||
resource_data = notification.get("resourceData") or {}
|
||||
meeting_id = (
|
||||
resource_data.get("id")
|
||||
or notification.get("meetingId")
|
||||
or _extract_meeting_id_from_resource(str(notification.get("resource") or ""))
|
||||
or notification.get("resource")
|
||||
or event_id
|
||||
)
|
||||
job = TeamsMeetingPipelineJob(
|
||||
job_id=f"teams-job-{uuid.uuid4().hex[:12]}",
|
||||
event_id=event_id,
|
||||
source_event_type=str(notification.get("changeType") or "graph.notification"),
|
||||
dedupe_key=event_id,
|
||||
status="received",
|
||||
meeting_ref=TeamsMeetingRef(
|
||||
meeting_id=str(meeting_id),
|
||||
tenant_id=resource_data.get("tenantId") or notification.get("tenantId"),
|
||||
metadata={
|
||||
"notification": dict(notification),
|
||||
"join_web_url": resource_data.get("joinWebUrl"),
|
||||
"call_record_id": resource_data.get("callRecordId") or notification.get("callRecordId"),
|
||||
},
|
||||
),
|
||||
)
|
||||
self.store.upsert_job(job.job_id, job.to_dict())
|
||||
return job
|
||||
|
||||
async def run_notification(self, notification: dict[str, Any]) -> TeamsMeetingPipelineJob:
|
||||
job = self.create_job_from_notification(notification)
|
||||
if job.status in TERMINAL_PIPELINE_STATES or job.status in ACTIVE_PIPELINE_STATES - {"received"}:
|
||||
return job
|
||||
return await self.run_job(job.job_id)
|
||||
|
||||
async def run_job(self, job_or_id: TeamsMeetingPipelineJob | str) -> TeamsMeetingPipelineJob:
|
||||
job = self._coerce_job(job_or_id)
|
||||
meeting_ref = job.meeting_ref
|
||||
if meeting_ref is None:
|
||||
raise TeamsPipelineError(f"Job {job.job_id} has no meeting_ref.")
|
||||
|
||||
artifacts: list[MeetingArtifact] = []
|
||||
|
||||
try:
|
||||
job = self._persist_job(job, status="resolving_meeting")
|
||||
notification = meeting_ref.metadata.get("notification") if isinstance(meeting_ref.metadata, dict) else {}
|
||||
resolved_meeting = await resolve_meeting_reference(
|
||||
self.graph_client,
|
||||
meeting_id=meeting_ref.meeting_id,
|
||||
join_web_url=meeting_ref.join_web_url or meeting_ref.metadata.get("join_web_url"),
|
||||
tenant_id=meeting_ref.tenant_id,
|
||||
)
|
||||
job.meeting_ref = resolved_meeting
|
||||
job = self._persist_job(job, meeting_ref=resolved_meeting.to_dict())
|
||||
|
||||
transcript_text: str | None = None
|
||||
if self.config.transcript_preferred:
|
||||
job = self._persist_job(job, status="fetching_transcript")
|
||||
transcript_artifact, transcript_text = await fetch_preferred_transcript_text(
|
||||
self.graph_client, resolved_meeting
|
||||
)
|
||||
if transcript_artifact and transcript_text:
|
||||
artifacts.append(transcript_artifact)
|
||||
if len(transcript_text.strip()) < self.config.transcript_min_chars:
|
||||
transcript_text = None
|
||||
|
||||
if not transcript_text:
|
||||
if self.config.transcript_required:
|
||||
raise TeamsPipelineRetryableError(
|
||||
f"Transcript unavailable for meeting {resolved_meeting.meeting_id}."
|
||||
)
|
||||
if not self.config.transcription_fallback:
|
||||
raise TeamsPipelineArtifactNotFoundError(
|
||||
"No transcript available and transcription fallback disabled "
|
||||
f"for {resolved_meeting.meeting_id}."
|
||||
)
|
||||
job = self._persist_job(job, status="downloading_recording")
|
||||
recordings = await list_recording_artifacts(self.graph_client, resolved_meeting)
|
||||
if not recordings:
|
||||
raise TeamsPipelineRetryableError(
|
||||
f"Recording unavailable for meeting {resolved_meeting.meeting_id}."
|
||||
)
|
||||
recording = recordings[0]
|
||||
artifacts.append(recording)
|
||||
transcript_text = await self._transcribe_recording(job, resolved_meeting, recording)
|
||||
job = self._persist_job(job, selected_artifact_strategy="recording_stt_fallback")
|
||||
else:
|
||||
job = self._persist_job(job, selected_artifact_strategy="transcript_first")
|
||||
|
||||
call_record_id = notification.get("callRecordId") or (meeting_ref.metadata or {}).get("call_record_id")
|
||||
call_record = await enrich_meeting_with_call_record(
|
||||
self.graph_client,
|
||||
resolved_meeting,
|
||||
call_record_id=call_record_id,
|
||||
)
|
||||
if call_record is not None:
|
||||
artifacts.append(call_record)
|
||||
|
||||
job = self._persist_job(job, status="summarizing")
|
||||
generated = await self.summarize_fn(
|
||||
resolved_meeting=resolved_meeting,
|
||||
transcript_text=transcript_text or "",
|
||||
artifacts=artifacts,
|
||||
)
|
||||
summary_payload = (
|
||||
generated
|
||||
if isinstance(generated, TeamsMeetingSummaryPayload)
|
||||
else TeamsMeetingSummaryPayload.from_dict(generated)
|
||||
)
|
||||
job.summary_payload = summary_payload
|
||||
job = self._persist_job(job, summary_payload=summary_payload.to_dict())
|
||||
|
||||
await self._write_sinks(job, summary_payload)
|
||||
job = self._persist_job(job, status="completed")
|
||||
return job
|
||||
except TeamsPipelineRetryableError as exc:
|
||||
job = self._persist_job(
|
||||
job,
|
||||
status="retry_scheduled",
|
||||
error_info={"message": str(exc), "retryable": True},
|
||||
)
|
||||
return job
|
||||
except Exception as exc:
|
||||
job = self._persist_job(
|
||||
job,
|
||||
status="failed",
|
||||
error_info={"message": str(exc), "type": type(exc).__name__},
|
||||
)
|
||||
return job
|
||||
|
||||
def _coerce_job(self, job_or_id: TeamsMeetingPipelineJob | str) -> TeamsMeetingPipelineJob:
|
||||
if isinstance(job_or_id, TeamsMeetingPipelineJob):
|
||||
return job_or_id
|
||||
payload = self.store.get_job(str(job_or_id))
|
||||
if not payload:
|
||||
raise TeamsPipelineError(f"Unknown Teams pipeline job: {job_or_id}")
|
||||
return TeamsMeetingPipelineJob.from_dict(payload)
|
||||
|
||||
def _find_job_by_dedupe_key(self, dedupe_key: str) -> TeamsMeetingPipelineJob | None:
|
||||
for payload in self.store.list_jobs().values():
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
if str(payload.get("dedupe_key") or "") != dedupe_key:
|
||||
continue
|
||||
return TeamsMeetingPipelineJob.from_dict(payload)
|
||||
return None
|
||||
|
||||
def _persist_job(self, job: TeamsMeetingPipelineJob, **updates: Any) -> TeamsMeetingPipelineJob:
|
||||
payload = job.to_dict()
|
||||
payload.update(updates)
|
||||
stored = self.store.upsert_job(job.job_id, payload)
|
||||
return TeamsMeetingPipelineJob.from_dict(stored)
|
||||
|
||||
async def _transcribe_recording(
|
||||
self,
|
||||
job: TeamsMeetingPipelineJob,
|
||||
meeting_ref: TeamsMeetingRef,
|
||||
recording: MeetingArtifact,
|
||||
) -> str:
|
||||
temp_root = self.config.tmp_dir or (get_hermes_home() / "tmp" / "teams_pipeline")
|
||||
temp_root.mkdir(parents=True, exist_ok=True)
|
||||
with tempfile.TemporaryDirectory(dir=str(temp_root), prefix="teams-recording-") as tmp_dir:
|
||||
recording_name = recording.display_name or f"{recording.artifact_id}.mp4"
|
||||
recording_path = Path(tmp_dir) / recording_name
|
||||
await download_recording_artifact(
|
||||
self.graph_client,
|
||||
meeting_ref,
|
||||
recording,
|
||||
recording_path,
|
||||
)
|
||||
audio_path = await self._prepare_audio_path(recording_path)
|
||||
job = self._persist_job(job, status="transcribing_audio")
|
||||
result = await asyncio.to_thread(self.transcribe_fn, str(audio_path), self.config.stt_model)
|
||||
if not result.get("success"):
|
||||
raise TeamsPipelineRetryableError(str(result.get("error") or "Unknown STT failure"))
|
||||
transcript = str(result.get("transcript") or "").strip()
|
||||
if not transcript:
|
||||
raise TeamsPipelineRetryableError("STT returned an empty transcript.")
|
||||
return transcript
|
||||
|
||||
async def _prepare_audio_path(self, recording_path: Path) -> Path:
|
||||
if recording_path.suffix.lower() in {".wav", ".mp3", ".m4a", ".ogg", ".flac", ".aac", ".webm"}:
|
||||
return recording_path
|
||||
if not self.config.ffmpeg_extract_audio:
|
||||
return recording_path
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if not ffmpeg:
|
||||
raise TeamsPipelineRetryableError(
|
||||
"Recording fallback requires ffmpeg for audio extraction, but ffmpeg was not found."
|
||||
)
|
||||
audio_path = recording_path.with_suffix(".wav")
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
ffmpeg,
|
||||
"-y",
|
||||
"-i",
|
||||
str(recording_path),
|
||||
str(audio_path),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
_stdout, stderr = await proc.communicate()
|
||||
if proc.returncode != 0:
|
||||
detail = stderr.decode("utf-8", errors="replace").strip()
|
||||
raise TeamsPipelineRetryableError(f"ffmpeg audio extraction failed: {detail}")
|
||||
return audio_path
|
||||
|
||||
async def _generate_summary_payload(
|
||||
self,
|
||||
*,
|
||||
resolved_meeting: TeamsMeetingRef,
|
||||
transcript_text: str,
|
||||
artifacts: list[MeetingArtifact],
|
||||
) -> TeamsMeetingSummaryPayload:
|
||||
prompt = _build_summary_prompt(resolved_meeting, transcript_text, artifacts)
|
||||
try:
|
||||
response = await async_call_llm(
|
||||
task="call",
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You summarize meeting transcripts. Return only valid JSON with keys: "
|
||||
"summary, key_decisions, action_items, risks, confidence, confidence_notes."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
temperature=0.2,
|
||||
max_tokens=900,
|
||||
)
|
||||
content = extract_content_or_reasoning(response)
|
||||
parsed = _parse_summary_json(content)
|
||||
except Exception as exc:
|
||||
logger.info("Teams pipeline LLM summary unavailable, using heuristic summary: %s", exc)
|
||||
parsed = _heuristic_summary(transcript_text)
|
||||
|
||||
metrics = _collect_call_metrics(artifacts)
|
||||
return TeamsMeetingSummaryPayload(
|
||||
meeting_ref=resolved_meeting,
|
||||
title=str(resolved_meeting.metadata.get("subject") or f"Meeting {resolved_meeting.meeting_id}"),
|
||||
start_time=resolved_meeting.metadata.get("startDateTime"),
|
||||
end_time=resolved_meeting.metadata.get("endDateTime"),
|
||||
participants=_collect_participants(resolved_meeting),
|
||||
transcript_text=transcript_text,
|
||||
summary=parsed.get("summary"),
|
||||
key_decisions=list(parsed.get("key_decisions") or []),
|
||||
action_items=list(parsed.get("action_items") or []),
|
||||
risks=list(parsed.get("risks") or []),
|
||||
call_metrics=metrics,
|
||||
source_artifacts=artifacts,
|
||||
confidence=parsed.get("confidence"),
|
||||
confidence_notes=parsed.get("confidence_notes"),
|
||||
notion_target=(self.config.notion or {}).get("database_id"),
|
||||
linear_target=(self.config.linear or {}).get("team_id"),
|
||||
teams_target=(self.config.teams_delivery or {}).get("channel_id"),
|
||||
)
|
||||
|
||||
async def _write_sinks(self, job: TeamsMeetingPipelineJob, payload: TeamsMeetingSummaryPayload) -> None:
|
||||
if self.config.notion and self.config.notion.get("enabled") and self.notion_writer:
|
||||
job = self._persist_job(job, status="writing_notion")
|
||||
sink_key = f"notion:{payload.meeting_ref.meeting_id}"
|
||||
existing = self.store.get_sink_record(sink_key)
|
||||
result = await self.notion_writer.write_summary(payload, self.config.notion, existing)
|
||||
self.store.upsert_sink_record(sink_key, result)
|
||||
|
||||
if self.config.linear and self.config.linear.get("enabled") and self.linear_writer:
|
||||
job = self._persist_job(job, status="writing_linear")
|
||||
sink_key = f"linear:{payload.meeting_ref.meeting_id}"
|
||||
existing = self.store.get_sink_record(sink_key)
|
||||
result = await self.linear_writer.write_summary(payload, self.config.linear, existing)
|
||||
self.store.upsert_sink_record(sink_key, result)
|
||||
|
||||
if self.config.teams_delivery and self.config.teams_delivery.get("enabled") and self.teams_sender:
|
||||
job = self._persist_job(job, status="sending_teams")
|
||||
sink_key = f"teams:{payload.meeting_ref.meeting_id}"
|
||||
existing = self.store.get_sink_record(sink_key)
|
||||
if hasattr(self.teams_sender, "write_summary"):
|
||||
result = await self.teams_sender.write_summary(payload, self.config.teams_delivery, existing)
|
||||
else:
|
||||
result = await self.teams_sender(payload, self.config.teams_delivery, existing)
|
||||
self.store.upsert_sink_record(sink_key, result)
|
||||
|
||||
|
||||
def _collect_call_metrics(artifacts: list[MeetingArtifact]) -> dict[str, Any]:
|
||||
metrics: dict[str, Any] = {}
|
||||
for artifact in artifacts:
|
||||
if artifact.artifact_type == "call_record":
|
||||
metrics.update(dict(artifact.metadata.get("metrics") or {}))
|
||||
metrics["artifact_count"] = len(artifacts)
|
||||
return metrics
|
||||
|
||||
|
||||
def _collect_participants(meeting_ref: TeamsMeetingRef) -> list[str]:
|
||||
participants = meeting_ref.metadata.get("participants") or []
|
||||
result: list[str] = []
|
||||
if isinstance(participants, list):
|
||||
for item in participants:
|
||||
if isinstance(item, dict):
|
||||
name = item.get("displayName") or (((item.get("identity") or {}).get("user") or {}).get("displayName"))
|
||||
if name:
|
||||
result.append(str(name))
|
||||
return result
|
||||
|
||||
|
||||
def _extract_meeting_id_from_resource(resource: str) -> str | None:
|
||||
if not resource:
|
||||
return None
|
||||
parts = [part for part in resource.split("/") if part]
|
||||
if not parts:
|
||||
return None
|
||||
if "onlineMeetings" in parts:
|
||||
index = parts.index("onlineMeetings")
|
||||
if index + 1 < len(parts):
|
||||
return parts[index + 1]
|
||||
return parts[-1]
|
||||
|
||||
|
||||
def _build_summary_prompt(
|
||||
meeting_ref: TeamsMeetingRef,
|
||||
transcript_text: str,
|
||||
artifacts: list[MeetingArtifact],
|
||||
) -> str:
|
||||
artifact_lines = [f"- {artifact.artifact_type}:{artifact.artifact_id}:{artifact.display_name or ''}" for artifact in artifacts]
|
||||
return (
|
||||
f"Meeting ID: {meeting_ref.meeting_id}\n"
|
||||
f"Title: {meeting_ref.metadata.get('subject') or 'Unknown'}\n"
|
||||
f"Artifacts:\n{chr(10).join(artifact_lines) or '- none'}\n\n"
|
||||
"Transcript:\n"
|
||||
f"{transcript_text[:18000]}"
|
||||
)
|
||||
|
||||
|
||||
def _parse_summary_json(content: str) -> dict[str, Any]:
|
||||
text = (content or "").strip()
|
||||
if not text:
|
||||
return _heuristic_summary("")
|
||||
start = text.find("{")
|
||||
end = text.rfind("}")
|
||||
if start >= 0 and end > start:
|
||||
text = text[start : end + 1]
|
||||
payload = json.loads(text)
|
||||
return {
|
||||
"summary": str(payload.get("summary") or "").strip(),
|
||||
"key_decisions": [str(item).strip() for item in payload.get("key_decisions", []) if str(item).strip()],
|
||||
"action_items": [str(item).strip() for item in payload.get("action_items", []) if str(item).strip()],
|
||||
"risks": [str(item).strip() for item in payload.get("risks", []) if str(item).strip()],
|
||||
"confidence": str(payload.get("confidence") or "medium").strip(),
|
||||
"confidence_notes": str(payload.get("confidence_notes") or "").strip(),
|
||||
}
|
||||
|
||||
|
||||
def _heuristic_summary(transcript_text: str) -> dict[str, Any]:
|
||||
lines = [line.strip(" -*\t") for line in transcript_text.splitlines() if line.strip()]
|
||||
summary = " ".join(lines[:3])[:1200] or "Transcript unavailable or too sparse for a confident summary."
|
||||
action_items = [
|
||||
line for line in lines if line.lower().startswith(("action:", "todo:", "next step:", "follow up:"))
|
||||
][:8]
|
||||
risks = [line for line in lines if "risk" in line.lower() or "blocker" in line.lower()][:6]
|
||||
decisions = [line for line in lines if "decide" in line.lower() or "decision" in line.lower()][:6]
|
||||
confidence = "low" if len(transcript_text.strip()) < 300 else "medium"
|
||||
return {
|
||||
"summary": summary,
|
||||
"key_decisions": decisions,
|
||||
"action_items": action_items,
|
||||
"risks": risks,
|
||||
"confidence": confidence,
|
||||
"confidence_notes": "Generated with heuristic fallback because no LLM summary response was available.",
|
||||
}
|
||||
|
||||
|
||||
def _render_summary_markdown(payload: TeamsMeetingSummaryPayload) -> str:
|
||||
lines = [
|
||||
f"# {payload.title or f'Meeting {payload.meeting_ref.meeting_id}'}",
|
||||
"",
|
||||
"## Summary",
|
||||
payload.summary or "No summary available.",
|
||||
"",
|
||||
"## Key Decisions",
|
||||
*([f"- {item}" for item in payload.key_decisions] or ["- None"]),
|
||||
"",
|
||||
"## Action Items",
|
||||
*([f"- {item}" for item in payload.action_items] or ["- None"]),
|
||||
"",
|
||||
"## Risks",
|
||||
*([f"- {item}" for item in payload.risks] or ["- None"]),
|
||||
"",
|
||||
f"Confidence: {payload.confidence or 'unknown'}",
|
||||
payload.confidence_notes or "",
|
||||
]
|
||||
return "\n".join(lines).strip()
|
||||
Loading…
Add table
Add a link
Reference in a new issue