Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 38 additions & 16 deletions tooling/sync/docker_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,19 @@ def build_docker_image(profile: str, image_tag: str, ethrex_dir: str) -> bool:
check=True
)
print("✅ Docker image built successfully")
# Prune build cache to prevent unbounded disk growth across runs
try:
result = subprocess.run(
["docker", "builder", "prune", "-f"],
capture_output=True, text=True, timeout=120
)
if result.returncode == 0:
reclaimed = result.stdout.strip().split("\n")[-1] if result.stdout.strip() else ""
print(f"🧹 Build cache pruned. {reclaimed}")
else:
print(f"⚠️ Build cache prune failed: {result.stderr.strip()}")
except Exception as e:
print(f"⚠️ Build cache prune error: {e}")
return True
except subprocess.CalledProcessError as e:
print(f"❌ Failed to build Docker image: {e}")
Expand Down Expand Up @@ -662,15 +675,18 @@ def save_container_logs(container: str, run_id: str, suffix: str = ""):
def save_all_logs(instances: list[Instance], run_id: str, compose_file: str):
"""Save logs for all containers (ethrex + consensus)."""
print(f"\n📁 Saving logs for run {run_id}...")

for inst in instances:
# Save ethrex logs
save_container_logs(inst.container, run_id)
# Save consensus logs (convention: consensus-{network})
consensus_container = inst.container.replace("ethrex-", "consensus-")
save_container_logs(consensus_container, run_id)

print(f"📁 Logs saved to {LOGS_DIR}/run_{run_id}/\n")

try:
for inst in instances:
# Save ethrex logs
save_container_logs(inst.container, run_id)
# Save consensus logs (convention: consensus-{network})
consensus_container = inst.container.replace("ethrex-", "consensus-")
save_container_logs(consensus_container, run_id)

print(f"📁 Logs saved to {LOGS_DIR}/run_{run_id}/\n")
except OSError as e:
print(f"⚠️ Failed to save some logs (disk full?): {e}", flush=True)
Comment on lines +679 to +689
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 except OSError in save_all_logs has incomplete coverage

The OSError guard here only catches errors from log_file.parent.mkdir() in save_container_logs, because that call sits outside save_container_logs's inner try block. However, write-time ENOSPC from log_file.write_text(logs) is swallowed by save_container_logs's broad except Exception handler, so the user-friendly "disk full?" message in save_all_logs is never printed for the most common failure scenario. The monitor won't crash either way (both paths print a warning), but the coverage is inconsistent.

A straightforward fix is to re-raise OSError from inside save_container_logs so it can propagate to the caller:

# save_container_logs – change the broad handler to only silence non-OSError exceptions
except OSError:
    raise  # let the caller (save_all_logs) handle disk-full
except Exception as e:
    print(f"  ⚠️ Error saving logs for {container}: {e}")
    return False
Prompt To Fix With AI
This is a comment left during a code review.
Path: tooling/sync/docker_monitor.py
Line: 427-437

Comment:
**`except OSError` in `save_all_logs` has incomplete coverage**

The `OSError` guard here only catches errors from `log_file.parent.mkdir()` in `save_container_logs`, because that call sits *outside* `save_container_logs`'s inner `try` block. However, write-time `ENOSPC` from `log_file.write_text(logs)` is swallowed by `save_container_logs`'s broad `except Exception` handler, so the user-friendly `"disk full?"` message in `save_all_logs` is never printed for the most common failure scenario. The monitor won't crash either way (both paths print a warning), but the coverage is inconsistent.

A straightforward fix is to re-raise `OSError` from inside `save_container_logs` so it can propagate to the caller:

```python
# save_container_logs – change the broad handler to only silence non-OSError exceptions
except OSError:
    raise  # let the caller (save_all_logs) handle disk-full
except Exception as e:
    print(f"  ⚠️ Error saving logs for {container}: {e}")
    return False
```

How can I resolve this? If you propose a fix, please make it concise.

Comment on lines +678 to +689
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

save_container_logs already catches exceptions



def log_run_result(run_id: str, run_count: int, instances: list[Instance], hostname: str, branch: str, commit: str, build_profile: str = "", diagnostics_tracker: Optional['DiagnosticsTracker'] = None):
Expand Down Expand Up @@ -737,13 +753,19 @@ def log_run_result(run_id: str, run_count: int, instances: list[Instance], hostn

lines.append("")
# Append to log file
with open(RUN_LOG_FILE, "a") as f:
f.write("\n".join(lines) + "\n")
print(f"📝 Run logged to {RUN_LOG_FILE}")
# Also write summary to the run folder
summary_file = LOGS_DIR / f"run_{run_id}" / "summary.txt"
summary_file.parent.mkdir(parents=True, exist_ok=True)
summary_file.write_text("\n".join(lines))
text = "\n".join(lines) + "\n"
try:
with open(RUN_LOG_FILE, "a") as f:
f.write(text)
print(f"📝 Run logged to {RUN_LOG_FILE}")
# Also write summary to the run folder
summary_file = LOGS_DIR / f"run_{run_id}" / "summary.txt"
summary_file.parent.mkdir(parents=True, exist_ok=True)
summary_file.write_text(text)
except OSError as e:
print(f"⚠️ Failed to write run log (disk full?): {e}", flush=True)
# Print to stdout so the result isn't lost entirely
print(text, flush=True)


def generate_run_id() -> str:
Expand Down
Loading