crawl-me-not/examples/nextjs-middleware.ts

46 lines
1.2 KiB
TypeScript

import { shouldBlockCrawler, extractUserAgent } from 'crawl-me-not';
import { NextRequest, NextResponse } from 'next/server';
export function middleware(request: NextRequest) {
const userAgent = extractUserAgent(request.headers);
const result = shouldBlockCrawler(userAgent, {
blockAI: true,
blockSEO: false,
customBlocked: [/scrapy/i, /curl/i], // Block additional patterns
debug: true
});
if (result.isBlocked) {
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
return NextResponse.json(
{
error: 'Access denied',
reason: `${result.crawlerType} crawler detected`,
userAgent: result.userAgent
},
{
status: 403,
headers: {
'X-Blocked-Reason': 'Automated traffic detected'
}
}
);
}
return NextResponse.next();
}
// Configure which paths to run middleware on
export const config = {
matcher: [
/*
* Match all request paths except for the ones starting with:
* - api (API routes)
* - _next/static (static files)
* - _next/image (image optimization files)
* - favicon.ico (favicon file)
*/
'/((?!api|_next/static|_next/image|favicon.ico).*)',
],
};