domain_updater.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. #!/usr/bin/env python3
  2. import argparse
  3. import datetime as dt
  4. import json
  5. import os
  6. import re
  7. import socket
  8. import ssl
  9. import subprocess
  10. import sys
  11. import time
  12. import urllib.parse
  13. import urllib.request
  14. DOMAIN_RE = re.compile(r"^(?=.{1,253}$)(?!-)[A-Za-z0-9-]{1,63}(?<!-)(\.(?!-)[A-Za-z0-9-]{1,63}(?<!-))+$")
  15. IPV4_RE = re.compile(r"^(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}$")
  16. def utc_now_iso():
  17. return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
  18. def read_json_file(path, default=None):
  19. if default is None:
  20. default = {}
  21. if not os.path.exists(path):
  22. return default
  23. with open(path, "r", encoding="utf-8") as f:
  24. return json.load(f)
  25. def write_json_file(path, data):
  26. os.makedirs(os.path.dirname(path), exist_ok=True)
  27. with open(path, "w", encoding="utf-8") as f:
  28. json.dump(data, f, ensure_ascii=True, indent=2)
  29. def write_text_file(path, data):
  30. os.makedirs(os.path.dirname(path), exist_ok=True)
  31. with open(path, "w", encoding="utf-8") as f:
  32. f.write(data)
  33. def build_url(base_url, params):
  34. if not params:
  35. return base_url
  36. parsed = urllib.parse.urlparse(base_url)
  37. current = urllib.parse.parse_qs(parsed.query)
  38. for k, v in params.items():
  39. current[k] = [str(v)]
  40. query = urllib.parse.urlencode(current, doseq=True)
  41. return urllib.parse.urlunparse(parsed._replace(query=query))
  42. def fetch_api_json(cfg):
  43. api = cfg["api"]
  44. url = build_url(api["url"], api.get("params", {}))
  45. method = api.get("method", "GET").upper()
  46. headers = api.get("headers", {})
  47. timeout = int(api.get("timeout_sec", 10))
  48. body_obj = api.get("body")
  49. body = None
  50. if body_obj is not None:
  51. body = json.dumps(body_obj).encode("utf-8")
  52. headers = {**headers, "Content-Type": "application/json"}
  53. req = urllib.request.Request(url=url, data=body, headers=headers, method=method)
  54. with urllib.request.urlopen(req, timeout=timeout) as resp:
  55. raw = resp.read().decode("utf-8", errors="replace")
  56. return json.loads(raw)
  57. def flatten_values(value):
  58. out = []
  59. if isinstance(value, str):
  60. out.append(value)
  61. elif isinstance(value, list):
  62. for item in value:
  63. out.extend(flatten_values(item))
  64. elif isinstance(value, dict):
  65. for item in value.values():
  66. out.extend(flatten_values(item))
  67. return out
  68. def get_by_json_path(data, path):
  69. cur = data
  70. for part in path.split("."):
  71. if isinstance(cur, dict) and part in cur:
  72. cur = cur[part]
  73. else:
  74. return None
  75. return cur
  76. def get_values_by_path(data, path):
  77. parts = path.split(".")
  78. def walk(cur, idx):
  79. if idx >= len(parts):
  80. return [cur]
  81. part = parts[idx]
  82. if part.endswith("[]"):
  83. key = part[:-2]
  84. if isinstance(cur, dict):
  85. arr = cur.get(key)
  86. else:
  87. arr = None
  88. if not isinstance(arr, list):
  89. return []
  90. out = []
  91. for item in arr:
  92. out.extend(walk(item, idx + 1))
  93. return out
  94. if isinstance(cur, dict) and part in cur:
  95. return walk(cur[part], idx + 1)
  96. return []
  97. return walk(data, 0)
  98. def parse_domains(payload, parser_cfg):
  99. domains = []
  100. for p in parser_cfg.get("field_paths", []):
  101. values = get_values_by_path(payload, p)
  102. domains.extend(flatten_values(values))
  103. for p in parser_cfg.get("json_paths", []):
  104. v = get_by_json_path(payload, p)
  105. if v is not None:
  106. domains.extend(flatten_values(v))
  107. if not domains:
  108. regex_s = parser_cfg.get("regex", r"[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
  109. text = json.dumps(payload, ensure_ascii=True)
  110. domains.extend(re.findall(regex_s, text))
  111. clean = []
  112. seen = set()
  113. for d in domains:
  114. d = d.strip().lower().rstrip(".")
  115. if (DOMAIN_RE.match(d) or IPV4_RE.match(d)) and d not in seen:
  116. seen.add(d)
  117. clean.append(d)
  118. return clean
  119. def parse_created_time(s):
  120. if not s:
  121. return None
  122. try:
  123. return dt.datetime.strptime(str(s).strip(), "%Y-%m-%d %H:%M:%S").replace(tzinfo=dt.timezone.utc)
  124. except Exception:
  125. return None
  126. def parse_scored_records(payload, scoring_cfg):
  127. if not scoring_cfg.get("enabled", False):
  128. return []
  129. records_path = scoring_cfg.get("records_path", "data.good[]")
  130. ip_field = scoring_cfg.get("ip_field", "ip")
  131. created_time_field = scoring_cfg.get("created_time_field", "createdTime")
  132. score_fields = scoring_cfg.get("score_fields", ["avgScore", "ydScore", "dxScore", "ltScore"])
  133. raw_records = get_values_by_path(payload, records_path)
  134. out = []
  135. for r in raw_records:
  136. if not isinstance(r, dict):
  137. continue
  138. domain = str(r.get(ip_field, "")).strip().lower().rstrip(".")
  139. if not domain:
  140. continue
  141. created = parse_created_time(r.get(created_time_field))
  142. scores = []
  143. for f in score_fields:
  144. v = r.get(f)
  145. try:
  146. scores.append(float(v))
  147. except Exception:
  148. scores.append(float("inf"))
  149. out.append(
  150. {
  151. "domain": domain,
  152. "created_at": created,
  153. "created_raw": r.get(created_time_field),
  154. "scores": scores,
  155. "raw": r,
  156. }
  157. )
  158. return out
  159. def rank_scored_records(records, scoring_cfg):
  160. if not records:
  161. return []
  162. within_hours = float(scoring_cfg.get("within_hours", 24))
  163. prefer_lower = bool(scoring_cfg.get("prefer_lower", True))
  164. use_api_order = bool(scoring_cfg.get("use_api_order", False))
  165. now = dt.datetime.now(dt.timezone.utc)
  166. cutoff = now - dt.timedelta(hours=within_hours)
  167. recent = [r for r in records if r.get("created_at") is not None and r["created_at"] >= cutoff]
  168. candidates = recent if recent else records
  169. if use_api_order:
  170. seen = set()
  171. ordered = []
  172. for r in candidates:
  173. d = r["domain"]
  174. if d in seen:
  175. continue
  176. seen.add(d)
  177. ordered.append(r)
  178. return ordered
  179. def key_lower(r):
  180. return tuple(r["scores"] + [r["domain"]])
  181. def key_higher(r):
  182. return tuple([-x if x != float("inf") else float("inf") for x in r["scores"]] + [r["domain"]])
  183. ranked = sorted(candidates, key=key_lower if prefer_lower else key_higher)
  184. return ranked
  185. def apply_filter(domains, filter_cfg):
  186. include_suffixes = [s.lower() for s in filter_cfg.get("include_suffixes", []) if s]
  187. exclude_regex = [re.compile(x) for x in filter_cfg.get("exclude_regex", []) if x]
  188. out = []
  189. for d in domains:
  190. if include_suffixes and not any(d.endswith(s) for s in include_suffixes):
  191. continue
  192. if any(rx.search(d) for rx in exclude_regex):
  193. continue
  194. out.append(d)
  195. return out
  196. def single_tls_check(domain, timeout_ms, port, tls_verify=True):
  197. start = time.perf_counter()
  198. timeout_sec = max(0.2, timeout_ms / 1000.0)
  199. try:
  200. infos = socket.getaddrinfo(domain, port, proto=socket.IPPROTO_TCP)
  201. if not infos:
  202. return False, None, "dns_empty"
  203. af, socktype, proto, _, sockaddr = infos[0]
  204. with socket.socket(af, socktype, proto) as sock:
  205. sock.settimeout(timeout_sec)
  206. sock.connect(sockaddr)
  207. if tls_verify:
  208. ctx = ssl.create_default_context()
  209. else:
  210. ctx = ssl._create_unverified_context()
  211. with ctx.wrap_socket(sock, server_hostname=domain) as ssock:
  212. ssock.do_handshake()
  213. elapsed = int((time.perf_counter() - start) * 1000)
  214. return True, elapsed, "ok"
  215. except Exception as e:
  216. return False, None, str(e)
  217. def check_domains(domains, hc_cfg):
  218. attempts = int(hc_cfg.get("attempts", 2))
  219. timeout_ms = int(hc_cfg.get("timeout_ms", 1800))
  220. port = int(hc_cfg.get("port", 443))
  221. tls_verify = bool(hc_cfg.get("tls_verify", True))
  222. results = []
  223. for d in domains:
  224. ok_count = 0
  225. latencies = []
  226. errors = []
  227. for _ in range(attempts):
  228. ok, latency, err = single_tls_check(d, timeout_ms, port, tls_verify=tls_verify)
  229. if ok:
  230. ok_count += 1
  231. latencies.append(latency)
  232. else:
  233. errors.append(err)
  234. success_ratio = ok_count / attempts if attempts else 0.0
  235. avg_latency = int(sum(latencies) / len(latencies)) if latencies else 999999
  236. results.append(
  237. {
  238. "domain": d,
  239. "success_ratio": success_ratio,
  240. "avg_latency_ms": avg_latency,
  241. "ok_count": ok_count,
  242. "attempts": attempts,
  243. "errors": errors[:3],
  244. }
  245. )
  246. results.sort(key=lambda x: (-x["success_ratio"], x["avg_latency_ms"], x["domain"]))
  247. return results
  248. def render_v2ray(template_file, output_file, token, domain):
  249. if not template_file or not output_file:
  250. return False
  251. if not os.path.exists(template_file):
  252. return False
  253. with open(template_file, "r", encoding="utf-8") as f:
  254. tpl = f.read()
  255. rendered = tpl.replace(token, domain)
  256. os.makedirs(os.path.dirname(output_file), exist_ok=True)
  257. with open(output_file, "w", encoding="utf-8") as f:
  258. f.write(rendered)
  259. return True
  260. def run_notify(cmd, domain, status):
  261. if not cmd:
  262. return
  263. env = os.environ.copy()
  264. env["AUTODOMAIN"] = domain
  265. env["AUTODOMAIN_STATUS"] = status
  266. subprocess.run(cmd, shell=True, check=False, env=env)
  267. def choose_domain(filtered_domains, check_results, top_n, ranked_scored):
  268. if ranked_scored:
  269. domains_by_score = [x["domain"] for x in ranked_scored]
  270. domain_set = set(domains_by_score)
  271. if check_results:
  272. check_map = {x["domain"]: x for x in check_results}
  273. top = []
  274. for d in domains_by_score:
  275. if d in check_map and check_map[d]["success_ratio"] > 0:
  276. top.append(check_map[d])
  277. if len(top) >= top_n:
  278. break
  279. if top:
  280. return top[0]["domain"], top
  281. score_only = [{"domain": x["domain"], "scores": x["scores"], "created_raw": x["created_raw"]} for x in ranked_scored[:top_n]]
  282. return score_only[0]["domain"], score_only
  283. top_scored = [{"domain": x["domain"], "scores": x["scores"], "created_raw": x["created_raw"]} for x in ranked_scored[:top_n]]
  284. if top_scored:
  285. return top_scored[0]["domain"], top_scored
  286. if check_results:
  287. top = [x for x in check_results if x["success_ratio"] > 0][:top_n]
  288. if top:
  289. return top[0]["domain"], top
  290. return None, check_results[:top_n]
  291. if filtered_domains:
  292. return filtered_domains[0], [{"domain": x} for x in filtered_domains[:top_n]]
  293. return None, []
  294. def main():
  295. ap = argparse.ArgumentParser(description="Auto select VMess preferred domain")
  296. ap.add_argument("--config", default="config.json", help="Path to config JSON")
  297. args = ap.parse_args()
  298. config_path_abs = os.path.abspath(args.config)
  299. if not os.path.exists(config_path_abs):
  300. print(json.dumps({"status": "error", "error": f"config file not found: {config_path_abs}"}, ensure_ascii=True), file=sys.stderr)
  301. sys.exit(1)
  302. cfg = read_json_file(config_path_abs)
  303. output_cfg = cfg.get("output", {})
  304. runtime_dir_cfg = output_cfg.get("runtime_dir", "./runtime")
  305. if os.path.isabs(runtime_dir_cfg):
  306. runtime_dir = runtime_dir_cfg
  307. else:
  308. runtime_dir = os.path.normpath(os.path.join(os.path.dirname(config_path_abs), runtime_dir_cfg))
  309. v2_cfg = cfg.get("v2ray", {})
  310. notify_cfg = cfg.get("notify", {})
  311. current_domain_file = os.path.join(runtime_dir, output_cfg.get("current_domain_file", "current_domain.txt"))
  312. current_domain_json = os.path.join(runtime_dir, output_cfg.get("current_domain_json", "current_domain.json"))
  313. state_file = os.path.join(runtime_dir, output_cfg.get("state_file", "state.json"))
  314. substore_vars_file = os.path.join(runtime_dir, output_cfg.get("substore_vars_file", "substore_vars.json"))
  315. state = read_json_file(state_file, default={})
  316. last_good = state.get("last_good_domain", "")
  317. try:
  318. payload = fetch_api_json(cfg)
  319. parsed = parse_domains(payload, cfg.get("parser", {}))
  320. filtered = apply_filter(parsed, cfg.get("domain_filter", {}))
  321. scored_records = parse_scored_records(payload, cfg.get("scoring", {}))
  322. scored_records = [r for r in scored_records if r["domain"] in set(filtered)]
  323. ranked_scored = rank_scored_records(scored_records, cfg.get("scoring", {}))
  324. check_results = []
  325. if cfg.get("healthcheck", {}).get("enabled", True):
  326. check_results = check_domains(filtered, cfg.get("healthcheck", {}))
  327. top_n = int(cfg.get("selection", {}).get("top_n", 3))
  328. selected, top_candidates = choose_domain(filtered, check_results, top_n, ranked_scored)
  329. status = "ok"
  330. if not selected and last_good:
  331. selected = last_good
  332. status = "fallback_last_good"
  333. if not selected:
  334. raise RuntimeError("No valid domain available from API and no fallback in state")
  335. write_text_file(current_domain_file, selected + "\n")
  336. current_json = {
  337. "domain": selected,
  338. "updated_at": utc_now_iso(),
  339. "status": status,
  340. "source_count": len(parsed),
  341. "checked_count": len(check_results),
  342. "top_candidates": top_candidates,
  343. }
  344. write_json_file(current_domain_json, current_json)
  345. write_json_file(
  346. substore_vars_file,
  347. {
  348. "AUTO_DOMAIN": selected,
  349. "UPDATED_AT": current_json["updated_at"],
  350. "STATUS": status,
  351. },
  352. )
  353. rendered = render_v2ray(
  354. template_file=v2_cfg.get("template_file", ""),
  355. output_file=v2_cfg.get("output_file", ""),
  356. token=v2_cfg.get("replace_token", "__AUTO_DOMAIN__"),
  357. domain=selected,
  358. )
  359. new_state = {
  360. "updated_at": current_json["updated_at"],
  361. "last_good_domain": selected,
  362. "status": status,
  363. "source_count": len(parsed),
  364. "checked_count": len(check_results),
  365. "rendered_v2ray": rendered,
  366. }
  367. write_json_file(state_file, new_state)
  368. run_notify(notify_cfg.get("command", ""), selected, status)
  369. print(json.dumps(current_json, ensure_ascii=True))
  370. except Exception as e:
  371. now = utc_now_iso()
  372. err_state = {
  373. "updated_at": now,
  374. "status": "error",
  375. "error": str(e),
  376. "last_good_domain": last_good,
  377. }
  378. write_json_file(state_file, err_state)
  379. if last_good:
  380. write_text_file(current_domain_file, last_good + "\n")
  381. write_json_file(
  382. current_domain_json,
  383. {
  384. "domain": last_good,
  385. "updated_at": now,
  386. "status": "error_use_last_good",
  387. "error": str(e),
  388. },
  389. )
  390. run_notify(notify_cfg.get("command", ""), last_good, "error_use_last_good")
  391. print(json.dumps({"status": "error_use_last_good", "error": str(e)}, ensure_ascii=True))
  392. return
  393. print(json.dumps({"status": "error", "error": str(e)}, ensure_ascii=True), file=sys.stderr)
  394. sys.exit(1)
  395. if __name__ == "__main__":
  396. main()