diff --git a/lib/hits.ts b/lib/hits.ts index 42ff447..96a21d9 100644 --- a/lib/hits.ts +++ b/lib/hits.ts @@ -2,14 +2,127 @@ import { getDb, hashIp } from "./db"; import { lookupCountry } from "./geo"; // Patterns we don't want polluting analytics -const BOT_UA = /bot|crawl|spider|slurp|curl|wget|httpclient|python-requests|axios|node-fetch|monitor|uptime|pingdom|datadog|prometheus|scanner|fetch|preview|whatsapp|telegrambot|facebookexternalhit|linkedinbot|twitterbot|discordbot|skypeuripreview|mastodon|matrix-bot|preconnect|dnsperf|sentry|newrelic|gtmetrix|lighthouse|headlesschrome|phantomjs|puppeteer|playwright|chrome-lighthouse/i; -const SKIP_PATHS = /^\/(favicon\.ico|robots\.txt|sitemap\.xml|apple-touch-icon[\w-]*\.png|browserconfig\.xml|\.well-known\/|ads\.txt)/i; +const BOT_UA = /bot|crawl|spider|slurp|curl|wget|httpclient|python-requests|axios|node-fetch|monitor|uptime|pingdom|datadog|prometheus|scanner|fetch|preview|whatsapp|telegrambot|facebookexternalhit|linkedinbot|twitterbot|discordbot|skypeuripreview|mastodon|matrix-bot|preconnect|dnsperf|sentry|newrelic|gtmetrix|lighthouse|headlesschrome|phantomjs|puppeteer|playwright|chrome-lighthouse|go-http-client|java\/|okhttp|libwww|mechanize|nikto|sqlmap|nmap|masscan|zgrab|nuclei|acunetix|netcraft|expanse|censys|shodan|fuzz|burp|arachni|w3af|nikto|wpscan|gobuster|ffuf|dirb|dirbuster/i; -export function shouldRecord(method: string, path: string | null, userAgent: string | null): boolean { +// Known scanner / non-user paths. Treat any match as bot and skip recording. +const SKIP_PATHS = new RegExp([ + // Standard browser/bot probes + "^/favicon\\.", + "^/apple-touch-icon", + "^/robots\\.txt$", + "^/sitemap[\\w.-]*\\.xml", + "^/ads\\.txt$", + "^/browserconfig\\.xml$", + "^/\\.well-known/", + // Source-control / config leaks + "^/\\.git", + "^/\\.env", + "^/\\.DS_Store", + "^/\\.svn", + "^/\\.hg", + "^/\\.vscode", + "^/\\.idea", + // Common admin / app paths scanners poke + "^/wp-(admin|login|content|includes|json)", + "^/xmlrpc\\.php", + "^/wordpress/", + "^/admin/", + "^/administrator/", + "^/phpmyadmin", + "^/pma/", + "^/myadmin", + "^/mysql/", + "^/server-status", + "^/server-info", + "^/server\\b", + "^/info\\.php", + "^/test\\.php", + "^/login\\.action", + "^/console/", + "^/manager/", + "^/jenkins", + "^/jolokia", + "^/actuator", + "^/telescope", + "^/horizon", + "^/debug", + "^/trace\\.axd", + "^/elmah\\.axd", + "^/cgi-bin", + "^/about$", + // API / docs probing + "^/swagger", + "^/api-docs", + "^/api/swagger", + "^/v2/api-docs", + "^/v3/api-docs", + "^/v2/_catalog", + "^/webjars/", + "^/graphql", + "^/api/graphql", + "^/api/gql", + "^/api/?$", + // Vite / Next.js source-map probes + "^/_next/", + "^/@vite", + "^/__webpack", + "^/sourcemaps?/", + // Exchange / cpanel / WHM enumeration + "^/ecp/", + "^/owa/", + "^/___proxy_subdomain", + // Random JS/CSS-files scanners reach for (phishing-kit recon) + "^/(js|assets/js|static)/", + "^/(css|assets/css)/", + "^/bot-connect\\.", + "^/config\\.json$", + "^/composer\\.(json|lock)", + "^/package\\.(json|lock)", + // Path traversal / encoded probes + "\\.\\./", + "%2e%2e", + "/s/[0-9a-f]{20,}", + // Generic scanner queries + "rest_route=", +].join("|"), "i"); + +// In-memory per-IP scan-detector. If the same hashed IP hits >SCAN_THRESHOLD distinct +// paths in SCAN_WINDOW_MS, treat further requests in that window as a scan. +const SCAN_THRESHOLD = 4; +const SCAN_WINDOW_MS = 30_000; +type Tracker = { paths: Set; firstSeen: number }; +const ipTracker = new Map(); + +function ipScanCheck(ipHash: string, path: string): boolean { + const now = Date.now(); + const cur = ipTracker.get(ipHash); + if (!cur || now - cur.firstSeen > SCAN_WINDOW_MS) { + ipTracker.set(ipHash, { paths: new Set([path]), firstSeen: now }); + return false; + } + cur.paths.add(path); + if (cur.paths.size > SCAN_THRESHOLD) return true; + return false; +} + +// Periodically prune (best-effort) +if (typeof setInterval !== "undefined") { + setInterval(() => { + const cutoff = Date.now() - SCAN_WINDOW_MS; + for (const [k, v] of ipTracker.entries()) { + if (v.firstSeen < cutoff) ipTracker.delete(k); + } + }, 60_000).unref?.(); +} + +export function shouldRecord(method: string, path: string | null, userAgent: string | null, ipHash?: string): boolean { const m = (method || "GET").toUpperCase(); if (m === "HEAD" || m === "OPTIONS") return false; if (path && SKIP_PATHS.test(path)) return false; if (userAgent && BOT_UA.test(userAgent)) return false; + // Heuristic: missing / very short UA is likely a script + if (!userAgent || userAgent.length < 15) return false; + if (ipHash && path && ipScanCheck(ipHash, path)) return false; return true; } diff --git a/package.json b/package.json index 992a63c..805f909 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "corex-nexredirect", - "version": "0.1.17", + "version": "0.1.18", "license": "MIT", "overrides": { "postcss": "^8.5.13", diff --git a/server.ts b/server.ts index 41da0cc..e6c4659 100644 --- a/server.ts +++ b/server.ts @@ -32,7 +32,10 @@ app.prepare().then(() => { req.socket.remoteAddress || "unknown"; const ua = (req.headers["user-agent"] as string) || null; - if (shouldRecord(req.method || "GET", req.url || "/", ua)) { + // Hash IP early so we can use it for the scan-detector check + const { hashIp } = await import("./lib/db"); + const ipHash = hashIp(ip); + if (shouldRecord(req.method || "GET", req.url || "/", ua, ipHash)) { recordHit({ domain_id: resolved.domain_id, ip,