diff --git a/skills/.experimental/olostep-web-research/SKILL.md b/skills/.experimental/olostep-web-research/SKILL.md new file mode 100644 index 00000000..f9ca2a98 --- /dev/null +++ b/skills/.experimental/olostep-web-research/SKILL.md @@ -0,0 +1,43 @@ +--- +name: olostep-web-research +description: > + Use when the user needs to scrape a URL, crawl a website, search the web, + or get an AI-powered answer grounded in live web data using the Olostep API. + Do NOT use for questions answerable from training data alone. +--- + +## Authentication + +Require OLOSTEP_API_KEY before any live API call. +Check: `echo $OLOSTEP_API_KEY` +If unset, tell the user: "Please run: export OLOSTEP_API_KEY='your_key'" +Get API keys at: https://www.olostep.com/dashboard/api-keys +Never ask the user to paste their key in chat. + +## Available actions + +Use the bundled scripts — never write new one-off HTTP code. + +**Scrape a URL** → `python scripts/scrape.py --url [--format markdown]` +Returns: clean markdown/html/text/json from the page. + +**Crawl a website** → `python scripts/crawl.py --url [--max-pages 20]` +Returns: markdown content from each crawled page. + +**Search the web** → `python scripts/search.py --query ""` +Returns: ranked links with titles and descriptions. + +**AI answer** → `python scripts/answer.py --task ""` +Returns: AI-synthesized answer with source citations. + +## When to use each + +- Single page content needed → scrape.py +- Entire site or docs section → crawl.py +- Find sources on a topic → search.py +- Research question needing live data → answer.py + +## Error handling + +If a script fails with auth error → re-check OLOSTEP_API_KEY is set correctly. +If a URL fails → verify it's publicly accessible and includes https://. diff --git a/skills/.experimental/olostep-web-research/references/api_docs.md b/skills/.experimental/olostep-web-research/references/api_docs.md new file mode 100644 index 00000000..22e5cb13 --- /dev/null +++ b/skills/.experimental/olostep-web-research/references/api_docs.md @@ -0,0 +1,59 @@ +# Olostep API Quick Reference + +## Scrape Endpoint + +**Base:** `client.scrapes.create()` + +### Parameters +- `url_to_scrape` (required): URL to scrape +- `formats` (list): Content formats to return. Options: `"markdown"`, `"text"`, `"html"`, `"json"` +- `country` (optional): ISO country code for geo-specific scraping (e.g., `"us"`, `"gb"`) +- `parser` (optional): Dict with `{"id": "parser_id"}` for custom parsing + +### Response +- `markdown_content`: Cleaned markdown +- `text_content`: Plain text +- `html_content`: Raw HTML +- `json_content`: Structured JSON +- Returns first available content in priority order + +## Search Endpoint + +**Base:** `client.search.create()` + +### Parameters +- `query` (required): Search query string +- `max_results` (optional): Limit results (default varies by implementation) + +### Response +- List of results with `title`, `url`, `description` + +## Crawl Endpoint + +**Base:** `client.crawl.create()` + +### Parameters +- `url` (required): Starting URL +- `max_pages` (optional): Maximum pages to crawl (default 20) + +### Response +- List of pages with `url` and `markdown_content` + +## Answer Endpoint + +**Base:** `client.answer.create()` + +### Parameters +- `task` (required): Question or research task +- `sources` (optional): Pre-selected URLs to use + +### Response +- `answer`: AI-synthesized response +- `sources`: List of cited URLs + +## Error Handling + +All endpoints raise `Olostep_BaseError` on failure. Check: +- `OLOSTEP_API_KEY` environment variable is set +- URL is publicly accessible +- API quota is not exceeded diff --git a/skills/.experimental/olostep-web-research/requirements.txt b/skills/.experimental/olostep-web-research/requirements.txt new file mode 100644 index 00000000..c2cd98b7 --- /dev/null +++ b/skills/.experimental/olostep-web-research/requirements.txt @@ -0,0 +1 @@ +olostep>=1.0.0 diff --git a/skills/.experimental/olostep-web-research/scripts/scrape.py b/skills/.experimental/olostep-web-research/scripts/scrape.py new file mode 100644 index 00000000..a1c853c9 --- /dev/null +++ b/skills/.experimental/olostep-web-research/scripts/scrape.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +"""Scrape a URL using Olostep and return its content.""" +import argparse, os, sys +from olostep import Olostep, Olostep_BaseError + +parser = argparse.ArgumentParser() +parser.add_argument("--url", required=True) +parser.add_argument("--format", default="markdown") +parser.add_argument("--country", default=None) +parser.add_argument("--parser-id", default=None) +args = parser.parse_args() + +api_key = os.getenv("OLOSTEP_API_KEY") +if not api_key: + print("Error: OLOSTEP_API_KEY not set.", file=sys.stderr) + sys.exit(1) + +try: + client = Olostep(api_key=api_key) + kwargs = {"url_to_scrape": args.url, "formats": [args.format]} + if args.country: + kwargs["country"] = args.country + if args.parser_id: + kwargs["parser"] = {"id": args.parser_id} + result = client.scrapes.create(**kwargs) + content = (result.markdown_content or result.text_content + or result.html_content or result.json_content or "") + print(content) +except Olostep_BaseError as e: + print(f"Olostep API error: {e}", file=sys.stderr) + sys.exit(1)