|
@@ -159,6 +159,98 @@ def parse_created_time(s):
|
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def record_field_value(record, field_path):
|
|
|
|
|
+ if not isinstance(record, dict) or not field_path:
|
|
|
|
|
+ return None
|
|
|
|
|
+ return get_by_json_path(record, field_path)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def rule_matches(value, rule):
|
|
|
|
|
+ if value is None or not isinstance(rule, dict):
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+ values = flatten_values(value)
|
|
|
|
|
+ if not values:
|
|
|
|
|
+ values = [value]
|
|
|
|
|
+
|
|
|
|
|
+ case_sensitive = bool(rule.get("case_sensitive", False))
|
|
|
|
|
+
|
|
|
|
|
+ if "contains" in rule:
|
|
|
|
|
+ needle = str(rule.get("contains", ""))
|
|
|
|
|
+ if not needle:
|
|
|
|
|
+ return False
|
|
|
|
|
+ for item in values:
|
|
|
|
|
+ hay = str(item)
|
|
|
|
|
+ if case_sensitive:
|
|
|
|
|
+ if needle in hay:
|
|
|
|
|
+ return True
|
|
|
|
|
+ else:
|
|
|
|
|
+ if needle.lower() in hay.lower():
|
|
|
|
|
+ return True
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+ if "equals" in rule:
|
|
|
|
|
+ target = str(rule.get("equals", ""))
|
|
|
|
|
+ for item in values:
|
|
|
|
|
+ item_s = str(item)
|
|
|
|
|
+ if case_sensitive:
|
|
|
|
|
+ if item_s == target:
|
|
|
|
|
+ return True
|
|
|
|
|
+ else:
|
|
|
|
|
+ if item_s.lower() == target.lower():
|
|
|
|
|
+ return True
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+ if "regex" in rule:
|
|
|
|
|
+ pattern = str(rule.get("regex", ""))
|
|
|
|
|
+ if not pattern:
|
|
|
|
|
+ return False
|
|
|
|
|
+ flags = 0 if case_sensitive else re.IGNORECASE
|
|
|
|
|
+ try:
|
|
|
|
|
+ rx = re.compile(pattern, flags)
|
|
|
|
|
+ except Exception:
|
|
|
|
|
+ return False
|
|
|
|
|
+ for item in values:
|
|
|
|
|
+ if rx.search(str(item)):
|
|
|
|
|
+ return True
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+ return False
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def collect_excluded_domains(payload, record_filter_cfg, scoring_cfg):
|
|
|
|
|
+ if not record_filter_cfg.get("enabled", False):
|
|
|
|
|
+ return set()
|
|
|
|
|
+
|
|
|
|
|
+ rules = record_filter_cfg.get("exclude_if_any", [])
|
|
|
|
|
+ if not rules:
|
|
|
|
|
+ return set()
|
|
|
|
|
+
|
|
|
|
|
+ records_path = record_filter_cfg.get("records_path", scoring_cfg.get("records_path", "data.good[]"))
|
|
|
|
|
+ domain_field = record_filter_cfg.get("domain_field", scoring_cfg.get("ip_field", "ip"))
|
|
|
|
|
+
|
|
|
|
|
+ blocked = set()
|
|
|
|
|
+ for record in get_values_by_path(payload, records_path):
|
|
|
|
|
+ if not isinstance(record, dict):
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ domain_raw = record_field_value(record, domain_field)
|
|
|
|
|
+ domain = str(domain_raw or "").strip().lower().rstrip(".")
|
|
|
|
|
+ if not domain:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ for rule in rules:
|
|
|
|
|
+ field_path = str(rule.get("field_path", "")).strip()
|
|
|
|
|
+ if not field_path:
|
|
|
|
|
+ continue
|
|
|
|
|
+ value = record_field_value(record, field_path)
|
|
|
|
|
+ if rule_matches(value, rule):
|
|
|
|
|
+ blocked.add(domain)
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ return blocked
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def parse_scored_records(payload, scoring_cfg):
|
|
def parse_scored_records(payload, scoring_cfg):
|
|
|
if not scoring_cfg.get("enabled", False):
|
|
if not scoring_cfg.get("enabled", False):
|
|
|
return []
|
|
return []
|
|
@@ -393,6 +485,11 @@ def main():
|
|
|
parsed = parse_domains(payload, cfg.get("parser", {}))
|
|
parsed = parse_domains(payload, cfg.get("parser", {}))
|
|
|
filtered = apply_filter(parsed, cfg.get("domain_filter", {}))
|
|
filtered = apply_filter(parsed, cfg.get("domain_filter", {}))
|
|
|
|
|
|
|
|
|
|
+ record_filter_cfg = cfg.get("record_filter", {})
|
|
|
|
|
+ blocked_domains = collect_excluded_domains(payload, record_filter_cfg, cfg.get("scoring", {}))
|
|
|
|
|
+ if blocked_domains:
|
|
|
|
|
+ filtered = [d for d in filtered if d not in blocked_domains]
|
|
|
|
|
+
|
|
|
scored_records = parse_scored_records(payload, cfg.get("scoring", {}))
|
|
scored_records = parse_scored_records(payload, cfg.get("scoring", {}))
|
|
|
scored_records = [r for r in scored_records if r["domain"] in set(filtered)]
|
|
scored_records = [r for r in scored_records if r["domain"] in set(filtered)]
|
|
|
ranked_scored = rank_scored_records(scored_records, cfg.get("scoring", {}))
|
|
ranked_scored = rank_scored_records(scored_records, cfg.get("scoring", {}))
|