// AI crawler detection - patterns auto-generated from xSeek's bot database
// Add to .env: XSEEK_API_KEY=your_api_key, XSEEK_WEBSITE_ID=your_website_id
const AI_BOTS = [
{ name: 'anthropic-ai', pattern: /anthropic-ai/i },
{ name: 'claudebot', pattern: /ClaudeBot/i },
{ name: 'claude-web', pattern: /claude-web/i },
{ name: 'claude-user', pattern: /Claude-User/i },
{ name: 'claude-searchbot', pattern: /Claude-SearchBot/i },
{ name: 'claude-code', pattern: /claude-code\//i },
{ name: 'perplexitybot', pattern: /PerplexityBot/i },
{ name: 'perplexity-user', pattern: /Perplexity-User/i },
{ name: 'grokbot', pattern: /GrokBot(?!.*DeepSearch)/i },
{ name: 'grok-search', pattern: /xAI-Grok/i },
{ name: 'grok-deepsearch', pattern: /Grok-DeepSearch/i },
{ name: 'GPTBot', pattern: /GPTBot/i },
{ name: 'chatgpt-user', pattern: /ChatGPT-User/i },
{ name: 'oai-searchbot', pattern: /OAI-SearchBot/i },
{ name: 'google-extended', pattern: /Google-Extended/i },
{ name: 'Google-Agent', pattern: /Google-Agent/i },
{ name: 'applebot', pattern: /Applebot(?!-Extended)/i },
{ name: 'applebot-extended', pattern: /Applebot-Extended/i },
{ name: 'meta-external', pattern: /meta-externalagent/i },
{ name: 'meta-externalfetcher', pattern: /meta-externalfetcher/i },
{ name: 'bingbot', pattern: /Bingbot(?!.*AI)/i },
{ name: 'bingpreview', pattern: /bingbot.*Chrome/i },
{ name: 'microsoftpreview', pattern: /MicrosoftPreview/i },
{ name: 'cohere-ai', pattern: /cohere-ai/i },
{ name: 'cohere-training-data-crawler', pattern: /cohere-training-data-crawler/i },
{ name: 'youbot', pattern: /YouBot/i },
{ name: 'duckassistbot', pattern: /DuckAssistBot/i },
{ name: 'semanticscholarbot', pattern: /SemanticScholarBot/i },
{ name: 'ccbot', pattern: /CCBot/i },
{ name: 'ai2bot', pattern: /AI2Bot/i },
{ name: 'ai2bot-dolma', pattern: /AI2Bot-Dolma/i },
{ name: 'aihitbot', pattern: /aiHitBot/i },
{ name: 'amazonbot', pattern: /Amazonbot/i },
{ name: 'novaact', pattern: /NovaAct/i },
{ name: 'brightbot', pattern: /Brightbot/i },
{ name: 'bytespider', pattern: /Bytespider/i },
{ name: 'tiktokspider', pattern: /TikTokSpider/i },
{ name: 'cotoyogi', pattern: /Cotoyogi/i },
{ name: 'crawlspace', pattern: /Crawlspace/i },
{ name: 'pangubot', pattern: /PanguBot/i },
{ name: 'petalbot', pattern: /PetalBot/i },
{ name: 'sidetrade-indexer', pattern: /Sidetrade indexer bot/i },
{ name: 'timpibot', pattern: /Timpibot/i },
{ name: 'omgili', pattern: /omgili/i },
{ name: 'omgilibot', pattern: /omgilibot/i },
{ name: 'webzio-extended', pattern: /Webzio-Extended/i },
{ name: 'baiduspider', pattern: /Baiduspider/i },
{ name: 'mistralai-user', pattern: /MistralAI-User/i }
];
export async function detectAndLogAIBot(req) {
const userAgent = req.headers.get('user-agent') || '';
let detectedBot = null;
for (const bot of AI_BOTS) {
if (bot.pattern.test(userAgent)) {
detectedBot = bot.name;
break;
}
}
if (detectedBot) {
const apiKey = process.env.XSEEK_API_KEY;
const websiteId = process.env.XSEEK_WEBSITE_ID;
if (!apiKey || !websiteId) {
console.warn('XSEEK_API_KEY or XSEEK_WEBSITE_ID not configured');
return;
}
const ip = req.headers.get('x-forwarded-for')?.split(',')[0]?.trim() || '';
try {
const res = await fetch('https://www.xseek.io/api/track-ai-bot', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
},
body: JSON.stringify({
botName: detectedBot,
userAgent,
url: req.nextUrl.toString(),
ip: ip || undefined,
referer: req.headers.get('referer') || undefined,
websiteId,
}),
});
if (!res.ok) {
const text = await res.text();
console.error('[xSeek] Tracking failed', {
status: res.status,
statusText: res.statusText,
body: text,
});
} else {
console.log(`[xSeek] AI crawler detected: ${detectedBot}`);
}
} catch (err) {
console.error('[xSeek] Tracking request error', {
error: err?.message || err,
botName: detectedBot,
url: req.nextUrl.toString(),
});
}
}
}