v0.1.19 — browser-signal heuristic: require Sec-Fetch + Accept-Language + Accept html (2 von 3)
This commit is contained in:
parent
47690ff96d
commit
91bb41ed05
3 changed files with 41 additions and 3 deletions
33
lib/hits.ts
33
lib/hits.ts
|
|
@ -115,14 +115,45 @@ if (typeof setInterval !== "undefined") {
|
|||
}, 60_000).unref?.();
|
||||
}
|
||||
|
||||
export function shouldRecord(method: string, path: string | null, userAgent: string | null, ipHash?: string): boolean {
|
||||
export type RequestSignals = {
|
||||
accept?: string | null;
|
||||
acceptLanguage?: string | null;
|
||||
secFetchMode?: string | null;
|
||||
secFetchDest?: string | null;
|
||||
secFetchSite?: string | null;
|
||||
};
|
||||
|
||||
export function shouldRecord(
|
||||
method: string,
|
||||
path: string | null,
|
||||
userAgent: string | null,
|
||||
ipHash?: string,
|
||||
signals?: RequestSignals,
|
||||
): boolean {
|
||||
const m = (method || "GET").toUpperCase();
|
||||
if (m === "HEAD" || m === "OPTIONS") return false;
|
||||
if (path && SKIP_PATHS.test(path)) return false;
|
||||
if (userAgent && BOT_UA.test(userAgent)) return false;
|
||||
// Heuristic: missing / very short UA is likely a script
|
||||
if (!userAgent || userAgent.length < 15) return false;
|
||||
// Real browsers always send Mozilla/ prefix; bots that fake it usually still match BOT_UA
|
||||
if (!/^Mozilla\//i.test(userAgent)) return false;
|
||||
if (ipHash && path && ipScanCheck(ipHash, path)) return false;
|
||||
|
||||
// Browser-signal check: a navigation request from a real browser sends Sec-Fetch-Mode
|
||||
// (Chrome/FF/Safari/Edge since 2020) AND Accept-Language. Curl/scanners almost never send both.
|
||||
// For redirect-server use-case a "real" hit is always a top-level navigation, so this is
|
||||
// safe to require.
|
||||
if (signals) {
|
||||
const hasSecFetch = !!signals.secFetchMode;
|
||||
const hasAcceptLang = !!signals.acceptLanguage;
|
||||
const acceptHtml = !!signals.accept && /text\/html/i.test(signals.accept);
|
||||
// Need at least 2 of 3 to pass (real browsers send all 3; old browsers may miss Sec-Fetch)
|
||||
const score = (hasSecFetch ? 1 : 0) + (hasAcceptLang ? 1 : 0) + (acceptHtml ? 1 : 0);
|
||||
if (score < 2) return false;
|
||||
// Sec-Fetch-Dest should be document/empty for navigation; reject script/image probes
|
||||
if (signals.secFetchDest && !/^(document|empty|iframe)$/i.test(signals.secFetchDest)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "corex-nexredirect",
|
||||
"version": "0.1.18",
|
||||
"version": "0.1.19",
|
||||
"license": "MIT",
|
||||
"overrides": {
|
||||
"postcss": "^8.5.13",
|
||||
|
|
|
|||
|
|
@ -35,7 +35,14 @@ app.prepare().then(() => {
|
|||
// Hash IP early so we can use it for the scan-detector check
|
||||
const { hashIp } = await import("./lib/db");
|
||||
const ipHash = hashIp(ip);
|
||||
if (shouldRecord(req.method || "GET", req.url || "/", ua, ipHash)) {
|
||||
const signals = {
|
||||
accept: (req.headers["accept"] as string) || null,
|
||||
acceptLanguage: (req.headers["accept-language"] as string) || null,
|
||||
secFetchMode: (req.headers["sec-fetch-mode"] as string) || null,
|
||||
secFetchDest: (req.headers["sec-fetch-dest"] as string) || null,
|
||||
secFetchSite: (req.headers["sec-fetch-site"] as string) || null,
|
||||
};
|
||||
if (shouldRecord(req.method || "GET", req.url || "/", ua, ipHash, signals)) {
|
||||
recordHit({
|
||||
domain_id: resolved.domain_id,
|
||||
ip,
|
||||
|
|
|
|||
Loading…
Reference in a new issue