// AWS Lambda@Edge (Viewer Request) - AI bot tracking (ESM)
// Also supports CloudFront log record events.
// Set Lambda environment variables: XSEEK_API_KEY, XSEEK_WEBSITE_ID
const AI_BOTS = [
{ name: 'anthropic-ai', pattern: /anthropic-ai/i },
{ name: 'claudebot', pattern: /ClaudeBot/i },
{ name: 'claude-web', pattern: /claude-web/i },
{ name: 'claude-user', pattern: /Claude-User/i },
{ name: 'claude-searchbot', pattern: /Claude-SearchBot/i },
{ name: 'claude-code', pattern: /claude-code\//i },
{ name: 'perplexitybot', pattern: /PerplexityBot/i },
{ name: 'perplexity-user', pattern: /Perplexity-User/i },
{ name: 'grokbot', pattern: /GrokBot(?!.*DeepSearch)/i },
{ name: 'grok-search', pattern: /xAI-Grok/i },
{ name: 'grok-deepsearch', pattern: /Grok-DeepSearch/i },
{ name: 'GPTBot', pattern: /GPTBot/i },
{ name: 'chatgpt-user', pattern: /ChatGPT-User/i },
{ name: 'oai-searchbot', pattern: /OAI-SearchBot/i },
{ name: 'google-extended', pattern: /Google-Extended/i },
{ name: 'Google-Agent', pattern: /Google-Agent/i },
{ name: 'applebot', pattern: /Applebot(?!-Extended)/i },
{ name: 'applebot-extended', pattern: /Applebot-Extended/i },
{ name: 'meta-external', pattern: /meta-externalagent/i },
{ name: 'meta-externalfetcher', pattern: /meta-externalfetcher/i },
{ name: 'bingbot', pattern: /Bingbot(?!.*AI)/i },
{ name: 'bingpreview', pattern: /bingbot.*Chrome/i },
{ name: 'microsoftpreview', pattern: /MicrosoftPreview/i },
{ name: 'cohere-ai', pattern: /cohere-ai/i },
{ name: 'cohere-training-data-crawler', pattern: /cohere-training-data-crawler/i },
{ name: 'youbot', pattern: /YouBot/i },
{ name: 'duckassistbot', pattern: /DuckAssistBot/i },
{ name: 'semanticscholarbot', pattern: /SemanticScholarBot/i },
{ name: 'ccbot', pattern: /CCBot/i },
{ name: 'ai2bot', pattern: /AI2Bot/i },
{ name: 'ai2bot-dolma', pattern: /AI2Bot-Dolma/i },
{ name: 'aihitbot', pattern: /aiHitBot/i },
{ name: 'amazonbot', pattern: /Amazonbot/i },
{ name: 'novaact', pattern: /NovaAct/i },
{ name: 'brightbot', pattern: /Brightbot/i },
{ name: 'bytespider', pattern: /Bytespider/i },
{ name: 'tiktokspider', pattern: /TikTokSpider/i },
{ name: 'cotoyogi', pattern: /Cotoyogi/i },
{ name: 'crawlspace', pattern: /Crawlspace/i },
{ name: 'pangubot', pattern: /PanguBot/i },
{ name: 'petalbot', pattern: /PetalBot/i },
{ name: 'sidetrade-indexer', pattern: /Sidetrade indexer bot/i },
{ name: 'timpibot', pattern: /Timpibot/i },
{ name: 'omgili', pattern: /omgili/i },
{ name: 'omgilibot', pattern: /omgilibot/i },
{ name: 'webzio-extended', pattern: /Webzio-Extended/i },
{ name: 'baiduspider', pattern: /Baiduspider/i },
{ name: 'mistralai-user', pattern: /MistralAI-User/i }
];
import https from 'node:https';
function safeDecode(value) {
if (!value || value === '-') {
return '';
}
try {
return decodeURIComponent(value);
} catch {
return value;
}
}
function postToXseek(payload) {
return new Promise((resolve) => {
const body = JSON.stringify(payload);
const req = https.request(
{
hostname: 'www.xseek.io',
path: '/api/track-ai-bot',
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(body),
'x-api-key': 'PUT YOUR API KEY HERE',
},
},
(res) => {
res.on('data', () => {});
res.on('end', resolve);
}
);
req.on('error', resolve);
req.write(body);
req.end();
});
}
function parseViewerRequest(request) {
const headers = request.headers || {};
const userAgent = headers['user-agent']?.[0]?.value || '';
const host = headers.host?.[0]?.value || headers['x-host-header']?.[0]?.value || '';
const query = request.querystring ? `?${request.querystring}` : '';
const url = host ? `https://${host}${request.uri}${query}` : `${request.uri}${query}`;
const ip =
headers['x-forwarded-for']?.[0]?.value?.split(',')[0]?.trim() ||
request.clientIp ||
'';
const referer = headers.referer?.[0]?.value || undefined;
return { userAgent, url, ip, referer };
}
function parseLogRecord(record) {
const rawUserAgent = record['cs(User-Agent)'] || '';
const userAgent = safeDecode(rawUserAgent);
const host = record['cs(Host)'] || record['x-host-header'] || '';
const uri = record['cs-uri-stem'] || '/';
const query = record['cs-uri-query'];
const queryString = query && query !== '-' ? `?${query}` : '';
const url = host ? `https://${host}${uri}${queryString}` : `${uri}${queryString}`;
const ip =
record['c-ip'] ||
(record['x-forwarded-for'] || '').split(',')[0]?.trim() ||
'';
const refererRaw = record['cs(Referer)'];
const referer = refererRaw && refererRaw !== '-' ? safeDecode(refererRaw) : undefined;
return { userAgent, url, ip, referer };
}
function extractRequestInfo(event) {
const viewerRequest = event?.Records?.[0]?.cf?.request;
if (viewerRequest) {
return parseViewerRequest(viewerRequest);
}
const record = Array.isArray(event) ? event[0] : event?.Records?.[0] ?? event;
if (record && typeof record === 'object' && (record['cs(User-Agent)'] || record['cs-uri-stem'])) {
return parseLogRecord(record);
}
return null;
}
export const handler = async (event) => {
const request = event?.Records?.[0]?.cf?.request;
const info = extractRequestInfo(event);
const userAgent = info?.userAgent || '';
let detectedBot = null;
for (const bot of AI_BOTS) {
if (bot.pattern.test(userAgent)) {
detectedBot = bot.name;
break;
}
}
if (detectedBot && info) {
await postToXseek({
botName: detectedBot,
userAgent,
url: info.url,
ip: info.ip || undefined,
referer: info.referer,
websiteId: 'PUT YOUR WEBSITE ID HERE',
});
}
// Always continue to the origin (Viewer Request). For log events, return the event.
return request || event;
};