{"data":{"ai_crawlers":{"openai":[{"name":"GPTBot","userAgent":"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)","description":"OpenAI web crawler used to collect training data for future GPT models","status":"active","firstSeen":"2023-08-01","crawlDelay":1,"respectsRobots":true},{"name":"ChatGPT-User","userAgent":"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ChatGPT-User/1.0; +https://openai.com/bot)","description":"Crawler leveraged by ChatGPT browsing and plugin features","status":"active","firstSeen":"2023-03-01","crawlDelay":1,"respectsRobots":true}],"anthropic":[{"name":"Claude-Web","userAgent":"Mozilla/5.0 (compatible; Claude-Web/1.0; +https://anthropic.com/claude)","description":"Web crawler that feeds Anthropic’s Claude assistant","status":"active","firstSeen":"2024-01-01","crawlDelay":1,"respectsRobots":true},{"name":"ClaudeBot","userAgent":"Mozilla/5.0 (compatible; ClaudeBot/1.0; +https://anthropic.com)","description":"Anthropic general-purpose web crawler","status":"active","firstSeen":"2024-06-01","crawlDelay":1,"respectsRobots":true}],"google":[{"name":"Google-Extended","userAgent":"Mozilla/5.0 (compatible; Google-Extended/1.0; +https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers)","description":"Google crawler dedicated to AI training data collection","status":"active","firstSeen":"2023-08-01","crawlDelay":1,"respectsRobots":true},{"name":"Bard","userAgent":"Mozilla/5.0 (compatible; Bard/1.0; +https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers)","description":"Legacy crawler used by Google Bard search integrations","status":"deprecated","firstSeen":"2023-03-01","crawlDelay":1,"respectsRobots":true}],"microsoft":[{"name":"BingPreview","userAgent":"Mozilla/5.0 (compatible; BingPreview/1.0b; +https://www.bing.com/bingbot.htm)","description":"Microsoft Bing crawler for preview and AI-powered features","status":"active","firstSeen":"2023-02-01","crawlDelay":1,"respectsRobots":true}],"meta":[{"name":"Meta-ExternalAgent","userAgent":"facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)","description":"Meta (Facebook) external content crawler","status":"active","firstSeen":"2020-01-01","crawlDelay":2,"respectsRobots":true}],"others":[{"name":"CCBot","userAgent":"CCBot/2.0 (https://commoncrawl.org/faq/)","description":"Common Crawl project bot supplying open training corpora","status":"active","firstSeen":"2008-01-01","crawlDelay":2,"respectsRobots":true},{"name":"PerplexityBot","userAgent":"PerplexityBot/1.0 (+https://perplexity.ai)","description":"Perplexity AI search crawler","status":"active","firstSeen":"2023-01-01","crawlDelay":1,"respectsRobots":true},{"name":"YouBot","userAgent":"YouBot/1.0 (+https://you.com)","description":"You.com search crawler with AI enrichment","status":"active","firstSeen":"2022-01-01","crawlDelay":1,"respectsRobots":true},{"name":"Bytespider","userAgent":"Bytespider/1.0 (+https://www.bytedance.com/robot)","description":"ByteDance (TikTok) network crawler","status":"active","firstSeen":"2020-01-01","crawlDelay":2,"respectsRobots":false}]},"search_engines":[{"name":"Googlebot","userAgent":"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","description":"Google Search web crawler","status":"active","crawlDelay":1,"respectsRobots":true},{"name":"Bingbot","userAgent":"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)","description":"Microsoft Bing Search crawler","status":"active","crawlDelay":1,"respectsRobots":true},{"name":"Baiduspider","userAgent":"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)","description":"Baidu Search web crawler","status":"active","crawlDelay":1,"respectsRobots":true}],"malicious_bots":[{"category":"wordpress_scanners","patterns":["/wp-admin/","/wp-login.php","/xmlrpc.php","/wp-includes/wlwmanifest.xml"],"description":"Typical access patterns used by WordPress vulnerability scanners"},{"category":"git_scanners","patterns":["/.git/config","/.git/HEAD","/.gitignore"],"description":"Scanners targeting Git repository metadata"},{"category":"config_scanners","patterns":["/.env","/config.php","/.htaccess","/composer.json"],"description":"Scanners looking for configuration and sensitive files"}]},"metadata":{"timestamp":"2026-04-17T08:08:36.622Z","total_count":18,"api_version":"1.0","documentation":"https://www.aivboost.com/research/api-documentation"}}