init commit

This commit is contained in:
2025-05-31 12:58:55 -04:00
commit d13ea65209
18 changed files with 6436 additions and 0 deletions

39
examples/express.js Normal file
View File

@@ -0,0 +1,39 @@
const express = require('express');
const { shouldBlockCrawler, extractUserAgent } = require('crawl-me-not');
const app = express();
// Middleware to block AI crawlers
app.use((req, res, next) => {
const userAgent = extractUserAgent(req.headers);
const result = shouldBlockCrawler(userAgent, {
blockAI: true,
blockSEO: false,
debug: true
});
if (result.isBlocked) {
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
return res.status(403).json({
error: 'Access denied',
reason: `${result.crawlerType} crawler detected`,
userAgent: result.userAgent
});
}
next();
});
app.get('/', (req, res) => {
res.json({ message: 'Hello World! AI crawlers are blocked.' });
});
app.get('/api/data', (req, res) => {
res.json({ data: 'This API is protected from AI crawlers' });
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
console.log('AI crawlers will receive a 403 response');
});

View File

@@ -0,0 +1,46 @@
import { shouldBlockCrawler, extractUserAgent } from 'crawl-me-not';
import { NextRequest, NextResponse } from 'next/server';
export function middleware(request: NextRequest) {
const userAgent = extractUserAgent(request.headers);
const result = shouldBlockCrawler(userAgent, {
blockAI: true,
blockSEO: false,
customBlocked: [/scrapy/i, /curl/i], // Block additional patterns
debug: true
});
if (result.isBlocked) {
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
return NextResponse.json(
{
error: 'Access denied',
reason: `${result.crawlerType} crawler detected`,
userAgent: result.userAgent
},
{
status: 403,
headers: {
'X-Blocked-Reason': 'Automated traffic detected'
}
}
);
}
return NextResponse.next();
}
// Configure which paths to run middleware on
export const config = {
matcher: [
/*
* Match all request paths except for the ones starting with:
* - api (API routes)
* - _next/static (static files)
* - _next/image (image optimization files)
* - favicon.ico (favicon file)
*/
'/((?!api|_next/static|_next/image|favicon.ico).*)',
],
};

View File

@@ -0,0 +1,32 @@
import { shouldBlockCrawler, extractUserAgent } from 'crawl-me-not';
import type { Handle } from '@sveltejs/kit';
export const handle: Handle = async ({ event, resolve }) => {
const userAgent = extractUserAgent(event.request.headers);
const result = shouldBlockCrawler(userAgent, {
blockAI: true,
blockSEO: false,
debug: true
});
if (result.isBlocked) {
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
return new Response('Access denied', {
status: 403,
headers: {
'Content-Type': 'application/json',
'X-Blocked-Reason': `${result.crawlerType} crawler detected`
}
});
}
return resolve(event);
};
// If you need to compose multiple handles:
// import { sequence } from '@sveltejs/kit/hooks';
//
// export const handle = sequence(
// crawlerBlockingHandle,
// // your other handles...
// );

39
examples/vanilla-node.js Normal file
View File

@@ -0,0 +1,39 @@
const http = require('node:http');
const { shouldBlockCrawler, extractUserAgent } = require('crawl-me-not');
const server = http.createServer((req, res) => {
const userAgent = extractUserAgent(req.headers);
const result = shouldBlockCrawler(userAgent, {
blockAI: true,
blockSEO: false,
debug: true
});
if (result.isBlocked) {
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
res.statusCode = 403;
res.setHeader('Content-Type', 'application/json');
res.setHeader('X-Blocked-Reason', 'AI crawler detected');
res.end(JSON.stringify({
error: 'Access denied',
reason: `${result.crawlerType} crawler detected`,
userAgent: result.userAgent
}));
return;
}
// Normal request handling
res.statusCode = 200;
res.setHeader('Content-Type', 'application/json');
res.end(JSON.stringify({
message: 'Hello World!',
timestamp: new Date().toISOString()
}));
});
const port = process.env.PORT || 3000;
server.listen(port, () => {
console.log(`Server running on port ${port}`);
console.log('AI crawlers will receive a 403 response');
});