# techlogia.de – robots.txt
# Ziel: Suchmaschinen und KI-Crawler explizit erlauben,
# aggressive Scraper und Security-Tools blockieren.

# --- Allgemein ---
# Google nutzt "longest match wins": die spezifischen Allow-Regeln fuer
# /_next/image und /_next/static/media/ schlagen das generelle /_next/-Disallow.
# Damit kann Googlebot-Image die per next/image transformierten Bilder + statisch
# gebundelte Assets crawlen, waehrend JS-Chunks weiter ausgeschlossen bleiben.
User-agent: *
Allow: /
Allow: /_next/image
Allow: /_next/static/media/
Disallow: /_next/
Disallow: /api/
# /admin/ wird hier bewusst NICHT mehr gelistet (Pfad-"Werbung" vermeiden).
# Der Admin-Bereich ist stattdessen via X-Robots-Tag: noindex garantiert
# unindexiert — gesetzt in next.config.ts (greift auch wenn robots ignoriert wird).

# --- Suchmaschinen & KI-Crawler explizit erlaubt ---
User-agent: Googlebot
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: Meta-ExternalAgent
Allow: /

User-agent: FacebookBot
Allow: /

User-agent: YandexBot
Allow: /

# --- Bekannte Scraper / Security-Scanner blockieren ---
User-agent: SemrushBot
Disallow: /

User-agent: AhrefsBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: ia_archiver
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: Bytespider
Disallow: /

# --- Sitemaps ---
Sitemap: https://techlogia.de/sitemap.xml
Sitemap: https://techlogia.de/docs/sitemap.xml