|
|
@@ -9,6 +9,7 @@ import os
|
|
|
import re
|
|
|
import subprocess
|
|
|
import sys
|
|
|
+import tempfile
|
|
|
import urllib.parse
|
|
|
import urllib.request
|
|
|
|
|
|
@@ -33,20 +34,48 @@ def read_json_file(path, default=None):
|
|
|
return default
|
|
|
|
|
|
|
|
|
+def _atomic_write(path, payload_bytes):
|
|
|
+ """Write `payload_bytes` to `path` atomically.
|
|
|
+
|
|
|
+ Strategy: write to a sibling temp file, fsync, then os.replace.
|
|
|
+ Concurrent readers either see the previous version or the new
|
|
|
+ one — never a truncated/empty file. Required for state.json,
|
|
|
+ which is the sole fallback source on the next run.
|
|
|
+ """
|
|
|
+ parent = os.path.dirname(path) or "."
|
|
|
+ os.makedirs(parent, exist_ok=True)
|
|
|
+ fd, tmp_path = tempfile.mkstemp(
|
|
|
+ prefix=".{}.".format(os.path.basename(path)),
|
|
|
+ suffix=".tmp",
|
|
|
+ dir=parent,
|
|
|
+ )
|
|
|
+ try:
|
|
|
+ with os.fdopen(fd, "wb") as f:
|
|
|
+ f.write(payload_bytes)
|
|
|
+ f.flush()
|
|
|
+ try:
|
|
|
+ os.fsync(f.fileno())
|
|
|
+ except OSError:
|
|
|
+ # fsync may fail on some filesystems (e.g. tmpfs without
|
|
|
+ # backing store); the rename below still gives atomicity
|
|
|
+ # within a single filesystem.
|
|
|
+ pass
|
|
|
+ os.replace(tmp_path, path)
|
|
|
+ except Exception:
|
|
|
+ try:
|
|
|
+ os.unlink(tmp_path)
|
|
|
+ except OSError:
|
|
|
+ pass
|
|
|
+ raise
|
|
|
+
|
|
|
+
|
|
|
def write_json_file(path, data):
|
|
|
- parent = os.path.dirname(path)
|
|
|
- if parent:
|
|
|
- os.makedirs(parent, exist_ok=True)
|
|
|
- with open(path, "w", encoding="utf-8") as f:
|
|
|
- json.dump(data, f, ensure_ascii=True, indent=2)
|
|
|
+ payload = json.dumps(data, ensure_ascii=True, indent=2)
|
|
|
+ _atomic_write(path, payload.encode("utf-8"))
|
|
|
|
|
|
|
|
|
def write_text_file(path, data):
|
|
|
- parent = os.path.dirname(path)
|
|
|
- if parent:
|
|
|
- os.makedirs(parent, exist_ok=True)
|
|
|
- with open(path, "w", encoding="utf-8") as f:
|
|
|
- f.write(data)
|
|
|
+ _atomic_write(path, data.encode("utf-8"))
|
|
|
|
|
|
|
|
|
def build_url(base_url, params):
|
|
|
@@ -908,13 +937,32 @@ def choose_domain(filtered_domains, top_n, ranked_scored):
|
|
|
|
|
|
|
|
|
def build_output_settings(output_cfg, config_path_abs):
|
|
|
+ # H3: legacy keys (current_domain_file/current_domain_json/substore_vars_file)
|
|
|
+ # were ambiguous and drifted between server/local configs. Refuse to start
|
|
|
+ # so operators migrate to the unified key set documented in CLAUDE.md.
|
|
|
+ legacy_key_map = {
|
|
|
+ "current_domain_file": "selected_value_file",
|
|
|
+ "current_domain_json": "selected_value_json",
|
|
|
+ "substore_vars_file": "export_vars_file",
|
|
|
+ }
|
|
|
+ legacy_in_use = [k for k in legacy_key_map if k in output_cfg]
|
|
|
+ if legacy_in_use:
|
|
|
+ renames = ", ".join(
|
|
|
+ "{} -> {}".format(k, legacy_key_map[k]) for k in legacy_in_use
|
|
|
+ )
|
|
|
+ raise ValueError(
|
|
|
+ "deprecated output keys in config: {}. Rename them: {}.".format(
|
|
|
+ ", ".join(legacy_in_use), renames
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
runtime_dir_cfg = output_cfg.get("runtime_dir", "./runtime")
|
|
|
runtime_dir = resolve_path(os.path.dirname(config_path_abs), runtime_dir_cfg)
|
|
|
|
|
|
- selected_text_name = output_cfg.get("selected_value_file", output_cfg.get("current_domain_file", "current_domain.txt"))
|
|
|
- selected_json_name = output_cfg.get("selected_value_json", output_cfg.get("current_domain_json", "current_domain.json"))
|
|
|
+ selected_text_name = output_cfg.get("selected_value_file", "current_domain.txt")
|
|
|
+ selected_json_name = output_cfg.get("selected_value_json", "current_domain.json")
|
|
|
state_name = output_cfg.get("state_file", "state.json")
|
|
|
- vars_name = output_cfg.get("export_vars_file", output_cfg.get("substore_vars_file", "substore_vars.json"))
|
|
|
+ vars_name = output_cfg.get("export_vars_file", "substore_vars.json")
|
|
|
|
|
|
return {
|
|
|
"runtime_dir": runtime_dir,
|