from timbal import Agent, Workflow
from timbal.state import get_run_context
def fetch_content(url: str) -> str:
"""Fetch raw content from a URL."""
import urllib.request
with urllib.request.urlopen(url) as response:
return response.read().decode("utf-8")
def extract_metadata(html: str) -> dict:
"""Extract title and text from HTML content."""
import re
title_match = re.search(r"<title>(.*?)</title>", html)
text = re.sub(r"<[^>]+>", " ", html)
text = re.sub(r"\s+", " ", text).strip()
return {
"title": title_match.group(1) if title_match else "Untitled",
"text": text[:5000],
}
summarizer = Agent(
name="summarizer",
model="openai/gpt-4.1-mini",
system_prompt="Summarize the given text in 3 bullet points. Be concise."
)
def format_report(title: str, summary: str) -> str:
"""Format the final report."""
return f"# {title}\n\n{summary}"
pipeline = (
Workflow(name="document_pipeline")
.step(fetch_content, url="https://example.com")
.step(extract_metadata,
html=lambda: get_run_context().step_span("fetch_content").output)
.step(summarizer,
prompt=lambda: get_run_context().step_span("extract_metadata").output["text"])
.step(format_report,
title=lambda: get_run_context().step_span("extract_metadata").output["title"],
summary=lambda: get_run_context().step_span("summarizer").output.collect_text())
)