init commit
This commit is contained in:
39
examples/express.js
Normal file
39
examples/express.js
Normal file
@@ -0,0 +1,39 @@
|
||||
const express = require('express');
|
||||
const { shouldBlockCrawler, extractUserAgent } = require('crawl-me-not');
|
||||
|
||||
const app = express();
|
||||
|
||||
// Middleware to block AI crawlers
|
||||
app.use((req, res, next) => {
|
||||
const userAgent = extractUserAgent(req.headers);
|
||||
const result = shouldBlockCrawler(userAgent, {
|
||||
blockAI: true,
|
||||
blockSEO: false,
|
||||
debug: true
|
||||
});
|
||||
|
||||
if (result.isBlocked) {
|
||||
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
|
||||
return res.status(403).json({
|
||||
error: 'Access denied',
|
||||
reason: `${result.crawlerType} crawler detected`,
|
||||
userAgent: result.userAgent
|
||||
});
|
||||
}
|
||||
|
||||
next();
|
||||
});
|
||||
|
||||
app.get('/', (req, res) => {
|
||||
res.json({ message: 'Hello World! AI crawlers are blocked.' });
|
||||
});
|
||||
|
||||
app.get('/api/data', (req, res) => {
|
||||
res.json({ data: 'This API is protected from AI crawlers' });
|
||||
});
|
||||
|
||||
const port = process.env.PORT || 3000;
|
||||
app.listen(port, () => {
|
||||
console.log(`Server running on port ${port}`);
|
||||
console.log('AI crawlers will receive a 403 response');
|
||||
});
|
46
examples/nextjs-middleware.ts
Normal file
46
examples/nextjs-middleware.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import { shouldBlockCrawler, extractUserAgent } from 'crawl-me-not';
|
||||
import { NextRequest, NextResponse } from 'next/server';
|
||||
|
||||
export function middleware(request: NextRequest) {
|
||||
const userAgent = extractUserAgent(request.headers);
|
||||
const result = shouldBlockCrawler(userAgent, {
|
||||
blockAI: true,
|
||||
blockSEO: false,
|
||||
customBlocked: [/scrapy/i, /curl/i], // Block additional patterns
|
||||
debug: true
|
||||
});
|
||||
|
||||
if (result.isBlocked) {
|
||||
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: 'Access denied',
|
||||
reason: `${result.crawlerType} crawler detected`,
|
||||
userAgent: result.userAgent
|
||||
},
|
||||
{
|
||||
status: 403,
|
||||
headers: {
|
||||
'X-Blocked-Reason': 'Automated traffic detected'
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return NextResponse.next();
|
||||
}
|
||||
|
||||
// Configure which paths to run middleware on
|
||||
export const config = {
|
||||
matcher: [
|
||||
/*
|
||||
* Match all request paths except for the ones starting with:
|
||||
* - api (API routes)
|
||||
* - _next/static (static files)
|
||||
* - _next/image (image optimization files)
|
||||
* - favicon.ico (favicon file)
|
||||
*/
|
||||
'/((?!api|_next/static|_next/image|favicon.ico).*)',
|
||||
],
|
||||
};
|
32
examples/sveltekit-hooks.server.ts
Normal file
32
examples/sveltekit-hooks.server.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { shouldBlockCrawler, extractUserAgent } from 'crawl-me-not';
|
||||
import type { Handle } from '@sveltejs/kit';
|
||||
|
||||
export const handle: Handle = async ({ event, resolve }) => {
|
||||
const userAgent = extractUserAgent(event.request.headers);
|
||||
const result = shouldBlockCrawler(userAgent, {
|
||||
blockAI: true,
|
||||
blockSEO: false,
|
||||
debug: true
|
||||
});
|
||||
|
||||
if (result.isBlocked) {
|
||||
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
|
||||
return new Response('Access denied', {
|
||||
status: 403,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Blocked-Reason': `${result.crawlerType} crawler detected`
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return resolve(event);
|
||||
};
|
||||
|
||||
// If you need to compose multiple handles:
|
||||
// import { sequence } from '@sveltejs/kit/hooks';
|
||||
//
|
||||
// export const handle = sequence(
|
||||
// crawlerBlockingHandle,
|
||||
// // your other handles...
|
||||
// );
|
39
examples/vanilla-node.js
Normal file
39
examples/vanilla-node.js
Normal file
@@ -0,0 +1,39 @@
|
||||
const http = require('node:http');
|
||||
const { shouldBlockCrawler, extractUserAgent } = require('crawl-me-not');
|
||||
|
||||
const server = http.createServer((req, res) => {
|
||||
const userAgent = extractUserAgent(req.headers);
|
||||
const result = shouldBlockCrawler(userAgent, {
|
||||
blockAI: true,
|
||||
blockSEO: false,
|
||||
debug: true
|
||||
});
|
||||
|
||||
if (result.isBlocked) {
|
||||
console.log(`Blocked ${result.crawlerType} crawler: ${result.userAgent}`);
|
||||
|
||||
res.statusCode = 403;
|
||||
res.setHeader('Content-Type', 'application/json');
|
||||
res.setHeader('X-Blocked-Reason', 'AI crawler detected');
|
||||
res.end(JSON.stringify({
|
||||
error: 'Access denied',
|
||||
reason: `${result.crawlerType} crawler detected`,
|
||||
userAgent: result.userAgent
|
||||
}));
|
||||
return;
|
||||
}
|
||||
|
||||
// Normal request handling
|
||||
res.statusCode = 200;
|
||||
res.setHeader('Content-Type', 'application/json');
|
||||
res.end(JSON.stringify({
|
||||
message: 'Hello World!',
|
||||
timestamp: new Date().toISOString()
|
||||
}));
|
||||
});
|
||||
|
||||
const port = process.env.PORT || 3000;
|
||||
server.listen(port, () => {
|
||||
console.log(`Server running on port ${port}`);
|
||||
console.log('AI crawlers will receive a 403 response');
|
||||
});
|
Reference in New Issue
Block a user