mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Merge branch 'main' into rewbs/tool-use-charge-to-subscription
This commit is contained in:
commit
a2e56d044b
175 changed files with 18848 additions and 3772 deletions
|
|
@ -1130,24 +1130,26 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
|||
|
||||
|
||||
async def web_extract_tool(
|
||||
urls: List[str],
|
||||
format: str = None,
|
||||
urls: List[str],
|
||||
format: str = None,
|
||||
use_llm_processing: bool = True,
|
||||
model: Optional[str] = None,
|
||||
min_length: int = DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION
|
||||
) -> str:
|
||||
"""
|
||||
Extract content from specific web pages using available extraction API backend.
|
||||
|
||||
|
||||
This function provides a generic interface for web content extraction that
|
||||
can work with multiple backends. Currently uses Firecrawl.
|
||||
|
||||
|
||||
Args:
|
||||
urls (List[str]): List of URLs to extract content from
|
||||
format (str): Desired output format ("markdown" or "html", optional)
|
||||
use_llm_processing (bool): Whether to process content with LLM for summarization (default: True)
|
||||
model (Optional[str]): The model to use for LLM processing (defaults to current auxiliary backend model)
|
||||
min_length (int): Minimum content length to trigger LLM processing (default: 5000)
|
||||
|
||||
Security: URLs are checked for embedded secrets before fetching.
|
||||
|
||||
Returns:
|
||||
str: JSON string containing extracted content. If LLM processing is enabled and successful,
|
||||
|
|
@ -1156,6 +1158,16 @@ async def web_extract_tool(
|
|||
Raises:
|
||||
Exception: If extraction fails or API key is not set
|
||||
"""
|
||||
# Block URLs containing embedded secrets (exfiltration prevention)
|
||||
from agent.redact import _PREFIX_RE
|
||||
for _url in urls:
|
||||
if _PREFIX_RE.search(_url):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Blocked: URL contains what appears to be an API key or token. "
|
||||
"Secrets must not be sent in URLs.",
|
||||
})
|
||||
|
||||
debug_call_data = {
|
||||
"parameters": {
|
||||
"urls": urls,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue