mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-30 06:41:51 +00:00
fix(gateway): tighten MEDIA extraction regex + silent skip on file-not-found
Three related fixes for the MEDIA:<path> extraction pipeline that caused 'file not found' noise in platform channels: 1. run.py — tighten tool-result MEDIA regex from \S+ (any non- whitespace) to require a path pattern with known extensions. Prevents LLM-generated placeholder paths like 'MEDIA:/path/to/example.mp4' from being captured as real media. 2. base.py — remove the |\S+ fallback in extract_media() that catches anything non-whitespace as a potential MEDIA path. This was the primary cause of false positives — strings like '' in tool output were captured as MEDIA: paths. 3. mattermost.py — replace the file-not-found error message sent to the channel with a silent logger.warning() skip. When a path extracted by MEDIA doesn't exist on disk, the channel no longer gets a noisy '(file not found: ...)' message. Impact: eliminates the persistent 'file not found' spam in Mattermost channels caused by over-broad MEDIA regex patterns matching non-path text in tool output.
This commit is contained in:
parent
09b6dcc4f3
commit
ea49b38625
3 changed files with 20 additions and 5 deletions
|
|
@ -2137,7 +2137,7 @@ class BasePlatformAdapter(ABC):
|
|||
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
|
||||
# and quoted/backticked paths for LLM-formatted outputs.
|
||||
media_pattern = re.compile(
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$))[`"']?'''
|
||||
)
|
||||
for match in media_pattern.finditer(content):
|
||||
path = match.group("path").strip()
|
||||
|
|
|
|||
|
|
@ -471,9 +471,10 @@ class MattermostAdapter(BasePlatformAdapter):
|
|||
|
||||
p = Path(file_path)
|
||||
if not p.exists():
|
||||
return await self.send(
|
||||
chat_id, f"{caption or ''}\n(file not found: {file_path})", reply_to
|
||||
logger.warning(
|
||||
"Mattermost: local file not found, skipping: %s", file_path
|
||||
)
|
||||
return SendResult(success=True, message_id=None)
|
||||
|
||||
fname = file_name or p.name
|
||||
ct = mimetypes.guess_type(fname)[0] or "application/octet-stream"
|
||||
|
|
|
|||
|
|
@ -15778,7 +15778,14 @@ class GatewayRunner:
|
|||
if _hm.get("role") in {"tool", "function"}:
|
||||
_hc = _hm.get("content", "")
|
||||
if "MEDIA:" in _hc:
|
||||
for _match in re.finditer(r'MEDIA:(\S+)', _hc):
|
||||
_TOOL_MEDIA_RE = re.compile(
|
||||
r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
|
||||
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
|
||||
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
|
||||
r'txt|csv|apk|ipa))',
|
||||
re.IGNORECASE
|
||||
)
|
||||
for _match in _TOOL_MEDIA_RE.finditer(_hc):
|
||||
_p = _match.group(1).strip().rstrip('",}')
|
||||
if _p:
|
||||
_history_media_paths.add(_p)
|
||||
|
|
@ -16067,7 +16074,14 @@ class GatewayRunner:
|
|||
if msg.get("role") in {"tool", "function"}:
|
||||
content = msg.get("content", "")
|
||||
if "MEDIA:" in content:
|
||||
for match in re.finditer(r'MEDIA:(\S+)', content):
|
||||
_TOOL_MEDIA_RE = re.compile(
|
||||
r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
|
||||
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
|
||||
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
|
||||
r'txt|csv|apk|ipa))',
|
||||
re.IGNORECASE
|
||||
)
|
||||
for match in _TOOL_MEDIA_RE.finditer(content):
|
||||
path = match.group(1).strip().rstrip('",}')
|
||||
if path and path not in _history_media_paths:
|
||||
media_tags.append(f"MEDIA:{path}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue