mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
refactor: reorganize skills into sub-categories
The skills directory was getting disorganized — mlops alone had 40 skills in a flat list, and 12 categories were singletons with just one skill each. Code change: - prompt_builder.py: Support sub-categories in skill scanner. skills/mlops/training/axolotl/SKILL.md now shows as category 'mlops/training' instead of just 'mlops'. Backwards-compatible with existing flat structure. Split mlops (40 skills) into 7 sub-categories: - mlops/training (12): accelerate, axolotl, flash-attention, grpo-rl-training, peft, pytorch-fsdp, pytorch-lightning, simpo, slime, torchtitan, trl-fine-tuning, unsloth - mlops/inference (8): gguf, guidance, instructor, llama-cpp, obliteratus, outlines, tensorrt-llm, vllm - mlops/models (6): audiocraft, clip, llava, segment-anything, stable-diffusion, whisper - mlops/vector-databases (4): chroma, faiss, pinecone, qdrant - mlops/evaluation (5): huggingface-tokenizers, lm-evaluation-harness, nemo-curator, saelens, weights-and-biases - mlops/cloud (2): lambda-labs, modal - mlops/research (1): dspy Merged singleton categories: - gifs → media (gif-search joins youtube-content) - music-creation → media (heartmula, songsee) - diagramming → creative (excalidraw joins ascii-art) - ocr-and-documents → productivity - domain → research (domain-intel) - feeds → research (blogwatcher) - market-data → research (polymarket) Fixed misplaced skills: - mlops/code-review → software-development (not ML-specific) - mlops/ml-paper-writing → research (academic writing) Added DESCRIPTION.md files for all new/updated categories.
This commit is contained in:
parent
d6c710706f
commit
732c66b0f3
217 changed files with 39 additions and 4 deletions
107
skills/mlops/inference/instructor/references/examples.md
Normal file
107
skills/mlops/inference/instructor/references/examples.md
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Real-World Examples
|
||||
|
||||
Practical examples of using Instructor for structured data extraction.
|
||||
|
||||
## Data Extraction
|
||||
|
||||
```python
|
||||
class CompanyInfo(BaseModel):
|
||||
name: str
|
||||
founded: int
|
||||
industry: str
|
||||
employees: int
|
||||
|
||||
text = "Apple was founded in 1976 in the technology industry with 164,000 employees."
|
||||
|
||||
company = client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": f"Extract: {text}"}],
|
||||
response_model=CompanyInfo
|
||||
)
|
||||
```
|
||||
|
||||
## Classification
|
||||
|
||||
```python
|
||||
class Sentiment(str, Enum):
|
||||
POSITIVE = "positive"
|
||||
NEGATIVE = "negative"
|
||||
NEUTRAL = "neutral"
|
||||
|
||||
class Review(BaseModel):
|
||||
sentiment: Sentiment
|
||||
confidence: float = Field(ge=0.0, le=1.0)
|
||||
|
||||
review = client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "This product is amazing!"}],
|
||||
response_model=Review
|
||||
)
|
||||
```
|
||||
|
||||
## Multi-Entity Extraction
|
||||
|
||||
```python
|
||||
class Person(BaseModel):
|
||||
name: str
|
||||
role: str
|
||||
|
||||
class Entities(BaseModel):
|
||||
people: list[Person]
|
||||
organizations: list[str]
|
||||
locations: list[str]
|
||||
|
||||
entities = client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "Tim Cook, CEO of Apple, spoke in Cupertino..."}],
|
||||
response_model=Entities
|
||||
)
|
||||
```
|
||||
|
||||
## Structured Analysis
|
||||
|
||||
```python
|
||||
class Analysis(BaseModel):
|
||||
summary: str
|
||||
key_points: list[str]
|
||||
sentiment: Sentiment
|
||||
actionable_items: list[str]
|
||||
|
||||
analysis = client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "Analyze: [long text]"}],
|
||||
response_model=Analysis
|
||||
)
|
||||
```
|
||||
|
||||
## Batch Processing
|
||||
|
||||
```python
|
||||
texts = ["text1", "text2", "text3"]
|
||||
results = [
|
||||
client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": text}],
|
||||
response_model=YourModel
|
||||
)
|
||||
for text in texts
|
||||
]
|
||||
```
|
||||
|
||||
## Streaming
|
||||
|
||||
```python
|
||||
for partial in client.messages.create_partial(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "Generate report..."}],
|
||||
response_model=Report
|
||||
):
|
||||
print(f"Progress: {partial.title}")
|
||||
# Update UI in real-time
|
||||
```
|
||||
70
skills/mlops/inference/instructor/references/providers.md
Normal file
70
skills/mlops/inference/instructor/references/providers.md
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
# Provider Configuration
|
||||
|
||||
Guide to using Instructor with different LLM providers.
|
||||
|
||||
## Anthropic Claude
|
||||
|
||||
```python
|
||||
import instructor
|
||||
from anthropic import Anthropic
|
||||
|
||||
# Basic setup
|
||||
client = instructor.from_anthropic(Anthropic())
|
||||
|
||||
# With API key
|
||||
client = instructor.from_anthropic(
|
||||
Anthropic(api_key="your-api-key")
|
||||
)
|
||||
|
||||
# Recommended mode
|
||||
client = instructor.from_anthropic(
|
||||
Anthropic(),
|
||||
mode=instructor.Mode.ANTHROPIC_TOOLS
|
||||
)
|
||||
|
||||
# Usage
|
||||
result = client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": "..."}],
|
||||
response_model=YourModel
|
||||
)
|
||||
```
|
||||
|
||||
## OpenAI
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
|
||||
client = instructor.from_openai(OpenAI())
|
||||
|
||||
result = client.chat.completions.create(
|
||||
model="gpt-4o-mini",
|
||||
response_model=YourModel,
|
||||
messages=[{"role": "user", "content": "..."}]
|
||||
)
|
||||
```
|
||||
|
||||
## Local Models (Ollama)
|
||||
|
||||
```python
|
||||
client = instructor.from_openai(
|
||||
OpenAI(
|
||||
base_url="http://localhost:11434/v1",
|
||||
api_key="ollama"
|
||||
),
|
||||
mode=instructor.Mode.JSON
|
||||
)
|
||||
|
||||
result = client.chat.completions.create(
|
||||
model="llama3.1",
|
||||
response_model=YourModel,
|
||||
messages=[...]
|
||||
)
|
||||
```
|
||||
|
||||
## Modes
|
||||
|
||||
- `Mode.ANTHROPIC_TOOLS`: Recommended for Claude
|
||||
- `Mode.TOOLS`: OpenAI function calling
|
||||
- `Mode.JSON`: Fallback for unsupported providers
|
||||
606
skills/mlops/inference/instructor/references/validation.md
Normal file
606
skills/mlops/inference/instructor/references/validation.md
Normal file
|
|
@ -0,0 +1,606 @@
|
|||
# Advanced Validation Patterns
|
||||
|
||||
Complete guide to validation in Instructor using Pydantic.
|
||||
|
||||
## Table of Contents
|
||||
- Built-in Validators
|
||||
- Custom Field Validators
|
||||
- Model-Level Validation
|
||||
- Complex Validation Patterns
|
||||
- Error Handling
|
||||
|
||||
## Built-in Validators
|
||||
|
||||
### Numeric Constraints
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class Product(BaseModel):
|
||||
price: float = Field(gt=0, description="Price must be positive")
|
||||
discount: float = Field(ge=0, le=100, description="Discount 0-100%")
|
||||
quantity: int = Field(ge=1, description="At least 1 item")
|
||||
rating: float = Field(ge=0.0, le=5.0, description="Rating 0-5 stars")
|
||||
|
||||
# If LLM provides invalid values, automatic retry with error feedback
|
||||
```
|
||||
|
||||
**Available constraints:**
|
||||
- `gt`: Greater than
|
||||
- `ge`: Greater than or equal
|
||||
- `lt`: Less than
|
||||
- `le`: Less than or equal
|
||||
- `multiple_of`: Must be multiple of this number
|
||||
|
||||
### String Constraints
|
||||
|
||||
```python
|
||||
class User(BaseModel):
|
||||
username: str = Field(
|
||||
min_length=3,
|
||||
max_length=20,
|
||||
pattern=r'^[a-zA-Z0-9_]+$',
|
||||
description="3-20 alphanumeric characters"
|
||||
)
|
||||
bio: str = Field(max_length=500, description="Bio up to 500 chars")
|
||||
status: str = Field(pattern=r'^(active|inactive|pending)$')
|
||||
|
||||
# pattern validates against regex
|
||||
```
|
||||
|
||||
### Email and URL Validation
|
||||
|
||||
```python
|
||||
from pydantic import EmailStr, HttpUrl, AnyUrl
|
||||
|
||||
class Contact(BaseModel):
|
||||
email: EmailStr # Validates email format
|
||||
website: HttpUrl # Validates HTTP/HTTPS URLs
|
||||
portfolio: AnyUrl # Any valid URL scheme
|
||||
|
||||
contact = client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "Extract: john@example.com, https://example.com"
|
||||
}],
|
||||
response_model=Contact
|
||||
)
|
||||
```
|
||||
|
||||
### Date and DateTime Validation
|
||||
|
||||
```python
|
||||
from datetime import date, datetime
|
||||
from pydantic import Field, field_validator
|
||||
|
||||
class Event(BaseModel):
|
||||
event_date: date # Validates date format
|
||||
created_at: datetime # Validates datetime format
|
||||
year: int = Field(ge=1900, le=2100)
|
||||
|
||||
@field_validator('event_date')
|
||||
def future_date(cls, v):
|
||||
"""Ensure event is in the future."""
|
||||
if v < date.today():
|
||||
raise ValueError('Event must be in the future')
|
||||
return v
|
||||
```
|
||||
|
||||
### List and Dict Validation
|
||||
|
||||
```python
|
||||
class Document(BaseModel):
|
||||
tags: list[str] = Field(min_length=1, max_length=10)
|
||||
keywords: list[str] = Field(min_length=3, description="At least 3 keywords")
|
||||
metadata: dict[str, str] = Field(description="String key-value pairs")
|
||||
|
||||
@field_validator('tags')
|
||||
def unique_tags(cls, v):
|
||||
"""Ensure tags are unique."""
|
||||
if len(v) != len(set(v)):
|
||||
raise ValueError('Tags must be unique')
|
||||
return v
|
||||
```
|
||||
|
||||
## Custom Field Validators
|
||||
|
||||
### Basic Field Validator
|
||||
|
||||
```python
|
||||
from pydantic import field_validator
|
||||
|
||||
class Person(BaseModel):
|
||||
name: str
|
||||
age: int
|
||||
|
||||
@field_validator('name')
|
||||
def name_must_not_be_empty(cls, v):
|
||||
"""Validate name is not empty or just whitespace."""
|
||||
if not v or not v.strip():
|
||||
raise ValueError('Name cannot be empty')
|
||||
return v.strip()
|
||||
|
||||
@field_validator('age')
|
||||
def age_must_be_reasonable(cls, v):
|
||||
"""Validate age is between 0 and 120."""
|
||||
if v < 0 or v > 120:
|
||||
raise ValueError('Age must be between 0 and 120')
|
||||
return v
|
||||
```
|
||||
|
||||
### Validator with Field Info
|
||||
|
||||
```python
|
||||
from pydantic import ValidationInfo
|
||||
|
||||
class Article(BaseModel):
|
||||
title: str
|
||||
content: str
|
||||
|
||||
@field_validator('content')
|
||||
def content_length(cls, v, info: ValidationInfo):
|
||||
"""Validate content is longer than title."""
|
||||
if 'title' in info.data:
|
||||
title_len = len(info.data['title'])
|
||||
if len(v) < title_len * 2:
|
||||
raise ValueError('Content should be at least 2x title length')
|
||||
return v
|
||||
```
|
||||
|
||||
### Multiple Fields Validation
|
||||
|
||||
```python
|
||||
class TimeRange(BaseModel):
|
||||
start_time: str
|
||||
end_time: str
|
||||
|
||||
@field_validator('start_time', 'end_time')
|
||||
def valid_time_format(cls, v):
|
||||
"""Validate both times are in HH:MM format."""
|
||||
import re
|
||||
if not re.match(r'^\d{2}:\d{2}$', v):
|
||||
raise ValueError('Time must be in HH:MM format')
|
||||
return v
|
||||
```
|
||||
|
||||
### Transform and Validate
|
||||
|
||||
```python
|
||||
class URL(BaseModel):
|
||||
url: str
|
||||
|
||||
@field_validator('url')
|
||||
def normalize_url(cls, v):
|
||||
"""Add https:// if missing."""
|
||||
if not v.startswith(('http://', 'https://')):
|
||||
v = f'https://{v}'
|
||||
return v
|
||||
```
|
||||
|
||||
## Model-Level Validation
|
||||
|
||||
### Cross-Field Validation
|
||||
|
||||
```python
|
||||
from pydantic import model_validator
|
||||
|
||||
class DateRange(BaseModel):
|
||||
start_date: str
|
||||
end_date: str
|
||||
|
||||
@model_validator(mode='after')
|
||||
def check_dates(self):
|
||||
"""Ensure end_date is after start_date."""
|
||||
from datetime import datetime
|
||||
start = datetime.strptime(self.start_date, '%Y-%m-%d')
|
||||
end = datetime.strptime(self.end_date, '%Y-%m-%d')
|
||||
|
||||
if end < start:
|
||||
raise ValueError('end_date must be after start_date')
|
||||
return self
|
||||
|
||||
class PriceRange(BaseModel):
|
||||
min_price: float
|
||||
max_price: float
|
||||
|
||||
@model_validator(mode='after')
|
||||
def check_price_range(self):
|
||||
"""Ensure max > min."""
|
||||
if self.max_price <= self.min_price:
|
||||
raise ValueError('max_price must be greater than min_price')
|
||||
return self
|
||||
```
|
||||
|
||||
### Conditional Validation
|
||||
|
||||
```python
|
||||
class Order(BaseModel):
|
||||
order_type: str # "standard" or "express"
|
||||
delivery_date: str
|
||||
delivery_time: Optional[str] = None
|
||||
|
||||
@model_validator(mode='after')
|
||||
def check_delivery_time(self):
|
||||
"""Express orders need delivery time."""
|
||||
if self.order_type == "express" and not self.delivery_time:
|
||||
raise ValueError('Express orders require delivery_time')
|
||||
return self
|
||||
```
|
||||
|
||||
### Complex Business Logic
|
||||
|
||||
```python
|
||||
class Discount(BaseModel):
|
||||
code: str
|
||||
percentage: float = Field(ge=0, le=100)
|
||||
min_purchase: float = Field(ge=0)
|
||||
max_discount: float = Field(ge=0)
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_discount(self):
|
||||
"""Ensure discount logic is sound."""
|
||||
# Max discount can't exceed percentage of min_purchase
|
||||
theoretical_max = (self.percentage / 100) * self.min_purchase
|
||||
if self.max_discount > theoretical_max:
|
||||
self.max_discount = theoretical_max
|
||||
return self
|
||||
```
|
||||
|
||||
## Complex Validation Patterns
|
||||
|
||||
### Nested Model Validation
|
||||
|
||||
```python
|
||||
class Address(BaseModel):
|
||||
street: str
|
||||
city: str
|
||||
country: str
|
||||
postal_code: str
|
||||
|
||||
@field_validator('postal_code')
|
||||
def validate_postal_code(cls, v, info: ValidationInfo):
|
||||
"""Validate postal code format based on country."""
|
||||
if 'country' in info.data:
|
||||
country = info.data['country']
|
||||
if country == "USA":
|
||||
import re
|
||||
if not re.match(r'^\d{5}(-\d{4})?$', v):
|
||||
raise ValueError('Invalid US postal code')
|
||||
elif country == "Canada":
|
||||
if not re.match(r'^[A-Z]\d[A-Z] \d[A-Z]\d$', v):
|
||||
raise ValueError('Invalid Canadian postal code')
|
||||
return v
|
||||
|
||||
class Person(BaseModel):
|
||||
name: str
|
||||
address: Address
|
||||
|
||||
# Nested validation runs automatically
|
||||
```
|
||||
|
||||
### List of Models
|
||||
|
||||
```python
|
||||
class Task(BaseModel):
|
||||
title: str = Field(min_length=1)
|
||||
priority: int = Field(ge=1, le=5)
|
||||
|
||||
class Project(BaseModel):
|
||||
name: str
|
||||
tasks: list[Task] = Field(min_length=1, description="At least 1 task")
|
||||
|
||||
@field_validator('tasks')
|
||||
def at_least_one_high_priority(cls, v):
|
||||
"""Ensure at least one task has priority >= 4."""
|
||||
if not any(task.priority >= 4 for task in v):
|
||||
raise ValueError('Project needs at least one high-priority task')
|
||||
return v
|
||||
```
|
||||
|
||||
### Union Type Validation
|
||||
|
||||
```python
|
||||
from typing import Union
|
||||
|
||||
class TextBlock(BaseModel):
|
||||
type: str = "text"
|
||||
content: str = Field(min_length=1)
|
||||
|
||||
class ImageBlock(BaseModel):
|
||||
type: str = "image"
|
||||
url: HttpUrl
|
||||
alt_text: str
|
||||
|
||||
class Page(BaseModel):
|
||||
title: str
|
||||
blocks: list[Union[TextBlock, ImageBlock]]
|
||||
|
||||
@field_validator('blocks')
|
||||
def validate_block_types(cls, v):
|
||||
"""Ensure first block is TextBlock."""
|
||||
if v and not isinstance(v[0], TextBlock):
|
||||
raise ValueError('First block must be text')
|
||||
return v
|
||||
```
|
||||
|
||||
### Dependent Fields
|
||||
|
||||
```python
|
||||
class Subscription(BaseModel):
|
||||
plan: str # "free", "pro", "enterprise"
|
||||
max_users: int
|
||||
features: list[str]
|
||||
|
||||
@model_validator(mode='after')
|
||||
def validate_plan_limits(self):
|
||||
"""Enforce plan-specific limits."""
|
||||
limits = {
|
||||
"free": {"max_users": 1, "required_features": ["basic"]},
|
||||
"pro": {"max_users": 10, "required_features": ["basic", "advanced"]},
|
||||
"enterprise": {"max_users": 999, "required_features": ["basic", "advanced", "premium"]}
|
||||
}
|
||||
|
||||
if self.plan in limits:
|
||||
limit = limits[self.plan]
|
||||
|
||||
if self.max_users > limit["max_users"]:
|
||||
raise ValueError(f'{self.plan} plan limited to {limit["max_users"]} users')
|
||||
|
||||
for feature in limit["required_features"]:
|
||||
if feature not in self.features:
|
||||
raise ValueError(f'{self.plan} plan requires {feature} feature')
|
||||
|
||||
return self
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Graceful Degradation
|
||||
|
||||
```python
|
||||
class OptionalExtraction(BaseModel):
|
||||
# Required fields
|
||||
title: str
|
||||
|
||||
# Optional fields with defaults
|
||||
author: Optional[str] = None
|
||||
date: Optional[str] = None
|
||||
tags: list[str] = Field(default_factory=list)
|
||||
|
||||
# LLM can succeed even if it can't extract everything
|
||||
```
|
||||
|
||||
### Partial Validation
|
||||
|
||||
```python
|
||||
from pydantic import ValidationError
|
||||
|
||||
def extract_with_fallback(text: str):
|
||||
"""Try full extraction, fall back to partial."""
|
||||
try:
|
||||
# Try full extraction
|
||||
return client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": text}],
|
||||
response_model=FullModel
|
||||
)
|
||||
except ValidationError:
|
||||
# Fall back to partial model
|
||||
return client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": text}],
|
||||
response_model=PartialModel
|
||||
)
|
||||
```
|
||||
|
||||
### Validation Error Inspection
|
||||
|
||||
```python
|
||||
from pydantic import ValidationError
|
||||
|
||||
try:
|
||||
result = client.messages.create(
|
||||
model="claude-sonnet-4-5-20250929",
|
||||
max_tokens=1024,
|
||||
messages=[...],
|
||||
response_model=MyModel,
|
||||
max_retries=3
|
||||
)
|
||||
except ValidationError as e:
|
||||
# Inspect specific errors
|
||||
for error in e.errors():
|
||||
field = error['loc'][0]
|
||||
message = error['msg']
|
||||
print(f"Field '{field}' failed: {message}")
|
||||
|
||||
# Custom handling per field
|
||||
if field == 'email':
|
||||
# Handle email validation failure
|
||||
pass
|
||||
```
|
||||
|
||||
### Custom Error Messages
|
||||
|
||||
```python
|
||||
class DetailedModel(BaseModel):
|
||||
name: str = Field(
|
||||
min_length=2,
|
||||
max_length=100,
|
||||
description="Name between 2-100 characters"
|
||||
)
|
||||
age: int = Field(
|
||||
ge=0,
|
||||
le=120,
|
||||
description="Age between 0 and 120 years"
|
||||
)
|
||||
|
||||
@field_validator('name')
|
||||
def validate_name(cls, v):
|
||||
"""Provide helpful error message."""
|
||||
if not v.strip():
|
||||
raise ValueError(
|
||||
'Name cannot be empty. '
|
||||
'Please provide a valid name from the text.'
|
||||
)
|
||||
return v
|
||||
|
||||
# When validation fails, LLM sees these helpful messages
|
||||
```
|
||||
|
||||
## Validation Best Practices
|
||||
|
||||
### 1. Be Specific
|
||||
|
||||
```python
|
||||
# ❌ Bad: Vague validation
|
||||
class Item(BaseModel):
|
||||
name: str
|
||||
|
||||
# ✅ Good: Specific constraints
|
||||
class Item(BaseModel):
|
||||
name: str = Field(
|
||||
min_length=1,
|
||||
max_length=200,
|
||||
description="Item name, 1-200 characters"
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Provide Context
|
||||
|
||||
```python
|
||||
# ✅ Good: Explain why validation failed
|
||||
@field_validator('price')
|
||||
def validate_price(cls, v):
|
||||
if v <= 0:
|
||||
raise ValueError(
|
||||
'Price must be positive. '
|
||||
'Extract numeric price from text without currency symbols.'
|
||||
)
|
||||
return v
|
||||
```
|
||||
|
||||
### 3. Use Enums for Fixed Sets
|
||||
|
||||
```python
|
||||
# ❌ Bad: String validation
|
||||
status: str
|
||||
|
||||
@field_validator('status')
|
||||
def validate_status(cls, v):
|
||||
if v not in ['active', 'inactive', 'pending']:
|
||||
raise ValueError('Invalid status')
|
||||
return v
|
||||
|
||||
# ✅ Good: Enum
|
||||
class Status(str, Enum):
|
||||
ACTIVE = "active"
|
||||
INACTIVE = "inactive"
|
||||
PENDING = "pending"
|
||||
|
||||
status: Status # Validation automatic
|
||||
```
|
||||
|
||||
### 4. Balance Strictness
|
||||
|
||||
```python
|
||||
# Too strict: May fail unnecessarily
|
||||
class StrictModel(BaseModel):
|
||||
date: str = Field(pattern=r'^\d{4}-\d{2}-\d{2}$')
|
||||
# Fails if LLM uses "2024-1-5" instead of "2024-01-05"
|
||||
|
||||
# Better: Normalize in validator
|
||||
class FlexibleModel(BaseModel):
|
||||
date: str
|
||||
|
||||
@field_validator('date')
|
||||
def normalize_date(cls, v):
|
||||
from datetime import datetime
|
||||
# Parse flexible formats
|
||||
for fmt in ['%Y-%m-%d', '%Y/%m/%d', '%m/%d/%Y']:
|
||||
try:
|
||||
dt = datetime.strptime(v, fmt)
|
||||
return dt.strftime('%Y-%m-%d') # Normalize
|
||||
except ValueError:
|
||||
continue
|
||||
raise ValueError('Invalid date format')
|
||||
```
|
||||
|
||||
### 5. Test Validation
|
||||
|
||||
```python
|
||||
# Test your validators with edge cases
|
||||
def test_validation():
|
||||
# Should succeed
|
||||
valid = MyModel(field="valid_value")
|
||||
|
||||
# Should fail
|
||||
try:
|
||||
invalid = MyModel(field="invalid")
|
||||
assert False, "Should have raised ValidationError"
|
||||
except ValidationError:
|
||||
pass # Expected
|
||||
|
||||
# Run tests before using in production
|
||||
```
|
||||
|
||||
## Advanced Techniques
|
||||
|
||||
### Conditional Required Fields
|
||||
|
||||
```python
|
||||
from typing import Optional
|
||||
|
||||
class ConditionalModel(BaseModel):
|
||||
type: str
|
||||
detail_a: Optional[str] = None
|
||||
detail_b: Optional[str] = None
|
||||
|
||||
@model_validator(mode='after')
|
||||
def check_required_details(self):
|
||||
"""Require different fields based on type."""
|
||||
if self.type == "type_a" and not self.detail_a:
|
||||
raise ValueError('type_a requires detail_a')
|
||||
if self.type == "type_b" and not self.detail_b:
|
||||
raise ValueError('type_b requires detail_b')
|
||||
return self
|
||||
```
|
||||
|
||||
### Validation with External Data
|
||||
|
||||
```python
|
||||
class Product(BaseModel):
|
||||
sku: str
|
||||
name: str
|
||||
|
||||
@field_validator('sku')
|
||||
def validate_sku(cls, v):
|
||||
"""Check SKU exists in database."""
|
||||
# Query database or API
|
||||
if not database.sku_exists(v):
|
||||
raise ValueError(f'SKU {v} not found in catalog')
|
||||
return v
|
||||
```
|
||||
|
||||
### Progressive Validation
|
||||
|
||||
```python
|
||||
# Start with loose validation
|
||||
class Stage1(BaseModel):
|
||||
data: str # Any string
|
||||
|
||||
# Then strict validation
|
||||
class Stage2(BaseModel):
|
||||
data: str = Field(pattern=r'^[A-Z]{3}-\d{6}$')
|
||||
|
||||
# Use Stage1 for initial extraction
|
||||
# Use Stage2 for final validation
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
- **Pydantic Docs**: https://docs.pydantic.dev/latest/concepts/validators/
|
||||
- **Instructor Examples**: https://python.useinstructor.com/examples
|
||||
Loading…
Add table
Add a link
Reference in a new issue