Selective web content extraction for AI agents — URL + query returns only the chunks that matter (Python library + MCP server)
Drift inferred · capture-to-capture
No drift recorded — single capability capture; advisories appear once its surface changes.
transport stdio · streamable-http · http counts 0 tools · 0 res
· 0 prompts
permission surface via code analysis
No tools enumerated yet for this server.
prompt-surface
shipped agent-instruction files + hidden-content / dangerous-code findings —
quoted from the analyzed source
analyzed commit c5e4271 · analyzer v20 · 11h ago
skills & prompt files 2
- agent-rulesbbulb-trawl-c5e4271/AGENTS.md
- agent-rulesbbulb-trawl-c5e4271/CLAUDE.md
evidence-backed
findings quoted directly from the published source artifact — not inferred
filesystem 10
- fs bbulb-trawl-c5e4271/src/trawl/diagnostics.py :14
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/embedding_cache.py :12
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/fetch_cache.py :43
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/fetchers/pdf_backends.py :14
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/host_stats.py :37
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/profiles/__init__.py :25
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/profiles/cache.py :17
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/profiles/profile.py :19
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/profiles/vlm.py :28
from pathlib import Path - fs bbulb-trawl-c5e4271/src/trawl/telemetry.py :15
from pathlib import Path
network 18
- net bbulb-trawl-c5e4271/src/trawl/diagnostics.py :17
import httpx - net bbulb-trawl-c5e4271/src/trawl/enrichment.py :30
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl/fetch_cache.py :45
import httpx - net bbulb-trawl-c5e4271/src/trawl/fetchers/github.py :16
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl/fetchers/passthrough.py :21
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl/fetchers/pdf.py :17
import httpx - net bbulb-trawl-c5e4271/src/trawl/fetchers/stackexchange.py :16
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl/fetchers/wikipedia.py :17
from urllib.parse import unquote, urlsplit - net bbulb-trawl-c5e4271/src/trawl/fetchers/youtube.py :15
from urllib.parse import parse_qs, urlsplit - net bbulb-trawl-c5e4271/src/trawl/host_stats.py :38
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl/hyde.py :46
import httpx - net bbulb-trawl-c5e4271/src/trawl/pipeline.py :22
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl/profiles/profile.py :20
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl/profiles/vlm.py :30
import httpx - net bbulb-trawl-c5e4271/src/trawl/reranking.py :13
import httpx - net bbulb-trawl-c5e4271/src/trawl/retrieval.py :17
import httpx - net bbulb-trawl-c5e4271/src/trawl/telemetry.py :17
from urllib.parse import urlsplit - net bbulb-trawl-c5e4271/src/trawl_mcp/server.py :22
from urllib.parse import urlsplit
secrets 3
- secrets bbulb-trawl-c5e4271/src/trawl/fetchers/github.py :26
_GH_TOKEN = os.environ.get("GITHUB_TOKEN", "") - secrets bbulb-trawl-c5e4271/src/trawl/fetchers/stackexchange.py :27
_SE_API_KEY = os.environ.get("TRAWL_SE_API_KEY", "") - secrets bbulb-trawl-c5e4271/src/trawl/profiles/vlm.py :40
VLM_MAX_TOKENS = int(os.environ.get("TRAWL_VLM_MAX_TOKENS", "2048"))
declared dependencies 17
- httpx@>=0.27
- playwright@==1.58.0
- playwright-stealth@>=2.0
- trafilatura@>=1.6
- beautifulsoup4@>=4.12
- lxml@>=5.0
- pymupdf@>=1.24
- pyyaml@>=6.0
- rank_bm25@>=0.2.2
- mcp@>=1.27
- starlette@>=0.37
- uvicorn@>=0.30
- youtube-transcript-api@>=1.0
- readability-lxml@>=0.8
- markitdown@>=0.1.0
- pytest@>=8.0
- pytest-asyncio@>=0.23
obfuscation 4
- dynamic require()/import() bbulb-trawl-c5e4271/src/trawl/fetchers/playwright.py :26
from playwright.sync_api import ( - dynamic require()/import() bbulb-trawl-c5e4271/src/trawl/fetchers/youtube.py :121
from youtube_transcript_api import ( - dynamic require()/import() bbulb-trawl-c5e4271/src/trawl/pipeline.py :24
from . import ( - dynamic require()/import() bbulb-trawl-c5e4271/src/trawl/profiles/__init__.py :29
from .profile import (