|
@@ -789,32 +789,25 @@ def choose_top_candidate_domains(filtered_domains, top_n, ranked_scored):
|
|
|
return filtered_domains[:top_n]
|
|
return filtered_domains[:top_n]
|
|
|
|
|
|
|
|
|
|
|
|
|
-def blank_top_candidate(domain="", source_type=""):
|
|
|
|
|
- return {
|
|
|
|
|
- "domain": domain,
|
|
|
|
|
- "ip": domain if IPV4_RE.match(domain) else "",
|
|
|
|
|
- "source_type": source_type,
|
|
|
|
|
- "sent": "",
|
|
|
|
|
- "received": "",
|
|
|
|
|
- "loss_rate": "",
|
|
|
|
|
- "avg_latency": "",
|
|
|
|
|
- "download_speed": "",
|
|
|
|
|
- "region": "",
|
|
|
|
|
- "location_country": "",
|
|
|
|
|
- "location_city": "",
|
|
|
|
|
- "host_provider": "",
|
|
|
|
|
- "score_value": None,
|
|
|
|
|
- "scores": [],
|
|
|
|
|
- "created_raw": "",
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
def text_or_blank(value):
|
|
def text_or_blank(value):
|
|
|
if value is None:
|
|
if value is None:
|
|
|
return ""
|
|
return ""
|
|
|
return str(value).strip()
|
|
return str(value).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def set_if_nonempty_text(obj, key, value):
|
|
|
|
|
+ text = text_or_blank(value)
|
|
|
|
|
+ if text:
|
|
|
|
|
+ obj[key] = text
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def base_top_candidate(domain, source_type):
|
|
|
|
|
+ candidate = {"domain": domain, "source_type": source_type}
|
|
|
|
|
+ if IPV4_RE.match(domain):
|
|
|
|
|
+ candidate["ip"] = domain
|
|
|
|
|
+ return candidate
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def maybe_resolve_field(record, field_name, field_map):
|
|
def maybe_resolve_field(record, field_name, field_map):
|
|
|
if not isinstance(record, dict):
|
|
if not isinstance(record, dict):
|
|
|
return None
|
|
return None
|
|
@@ -825,36 +818,36 @@ def maybe_resolve_field(record, field_name, field_map):
|
|
|
|
|
|
|
|
def build_cfst_candidate(row):
|
|
def build_cfst_candidate(row):
|
|
|
domain = row.get("domain", "")
|
|
domain = row.get("domain", "")
|
|
|
- candidate = blank_top_candidate(domain=domain, source_type="cfst_local")
|
|
|
|
|
- candidate["ip"] = text_or_blank(row.get("ip") or domain)
|
|
|
|
|
- candidate["sent"] = text_or_blank(row.get("sent"))
|
|
|
|
|
- candidate["received"] = text_or_blank(row.get("received"))
|
|
|
|
|
- candidate["loss_rate"] = text_or_blank(row.get("loss_rate"))
|
|
|
|
|
- candidate["avg_latency"] = text_or_blank(row.get("avg_latency"))
|
|
|
|
|
- candidate["download_speed"] = text_or_blank(row.get("download_speed"))
|
|
|
|
|
- candidate["region"] = text_or_blank(row.get("region"))
|
|
|
|
|
|
|
+ candidate = base_top_candidate(domain=domain, source_type="cfst_local")
|
|
|
|
|
+ set_if_nonempty_text(candidate, "ip", row.get("ip") or domain)
|
|
|
|
|
+ set_if_nonempty_text(candidate, "loss_rate", row.get("loss_rate"))
|
|
|
|
|
+ set_if_nonempty_text(candidate, "avg_latency", row.get("avg_latency"))
|
|
|
|
|
+ set_if_nonempty_text(candidate, "download_speed", row.get("download_speed"))
|
|
|
|
|
+ set_if_nonempty_text(candidate, "region", row.get("region"))
|
|
|
return candidate
|
|
return candidate
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_api_candidate(domain, record, field_map, scored_record=None):
|
|
def build_api_candidate(domain, record, field_map, scored_record=None):
|
|
|
- candidate = blank_top_candidate(domain=domain, source_type="api")
|
|
|
|
|
- candidate["ip"] = domain if IPV4_RE.match(domain) else ""
|
|
|
|
|
|
|
+ candidate = base_top_candidate(domain=domain, source_type="api")
|
|
|
|
|
|
|
|
if record:
|
|
if record:
|
|
|
- candidate["created_raw"] = text_or_blank(maybe_resolve_field(record, "created_at", field_map))
|
|
|
|
|
- candidate["avg_latency"] = text_or_blank(maybe_resolve_field(record, "avg_latency", field_map))
|
|
|
|
|
- candidate["loss_rate"] = text_or_blank(maybe_resolve_field(record, "avg_pkg_lost_rate", field_map))
|
|
|
|
|
- candidate["location_country"] = text_or_blank(maybe_resolve_field(record, "location_country", field_map))
|
|
|
|
|
- candidate["location_city"] = text_or_blank(maybe_resolve_field(record, "location_city", field_map))
|
|
|
|
|
- candidate["host_provider"] = text_or_blank(maybe_resolve_field(record, "host_provider", field_map))
|
|
|
|
|
- region_parts = [candidate["location_country"], candidate["location_city"]]
|
|
|
|
|
- candidate["region"] = "/".join([x for x in region_parts if x])
|
|
|
|
|
|
|
+ set_if_nonempty_text(candidate, "created_raw", maybe_resolve_field(record, "created_at", field_map))
|
|
|
|
|
+ set_if_nonempty_text(candidate, "avg_latency", maybe_resolve_field(record, "avg_latency", field_map))
|
|
|
|
|
+ set_if_nonempty_text(candidate, "loss_rate", maybe_resolve_field(record, "avg_pkg_lost_rate", field_map))
|
|
|
|
|
+ set_if_nonempty_text(candidate, "download_speed", maybe_resolve_field(record, "download_speed", field_map))
|
|
|
|
|
+ location_country = text_or_blank(maybe_resolve_field(record, "location_country", field_map))
|
|
|
|
|
+ location_city = text_or_blank(maybe_resolve_field(record, "location_city", field_map))
|
|
|
|
|
+ region = "/".join([x for x in [location_country, location_city] if x])
|
|
|
|
|
+ set_if_nonempty_text(candidate, "region", region)
|
|
|
|
|
|
|
|
if scored_record:
|
|
if scored_record:
|
|
|
- candidate["score_value"] = scored_record.get("score_value")
|
|
|
|
|
- candidate["scores"] = list(scored_record.get("scores", []))
|
|
|
|
|
- if not candidate["created_raw"]:
|
|
|
|
|
- candidate["created_raw"] = text_or_blank(scored_record.get("created_raw"))
|
|
|
|
|
|
|
+ if scored_record.get("score_value") is not None:
|
|
|
|
|
+ candidate["score_value"] = scored_record.get("score_value")
|
|
|
|
|
+ scores = list(scored_record.get("scores", []))
|
|
|
|
|
+ if scores:
|
|
|
|
|
+ candidate["scores"] = scores
|
|
|
|
|
+ if "created_raw" not in candidate:
|
|
|
|
|
+ set_if_nonempty_text(candidate, "created_raw", scored_record.get("created_raw"))
|
|
|
|
|
|
|
|
return candidate
|
|
return candidate
|
|
|
|
|
|