From 0c36a011438585449ada2726c2a288bd59a73a97 Mon Sep 17 00:00:00 2001 From: qyli00 Date: Thu, 23 Apr 2026 13:22:54 -0700 Subject: [PATCH 1/7] fix: recursively download nested gym-environment directories The previous implementation stopped at one level of nesting, silently dropping any files in sub-subdirectories. Extract the subdirectory walk into a helper that recurses through the GitHub Contents API listing. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli.py | 56 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/cli.py b/cli.py index ebc1c78..ed3cbc9 100644 --- a/cli.py +++ b/cli.py @@ -84,6 +84,35 @@ def _warn(msg: str) -> None: _GYM_ENV_SKIP = {"README.md"} +def _download_directory(api_url: str, dest_dir: Path) -> None: + """Recursively download all files from a GitHub API directory listing.""" + try: + req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github.v3+json"}) + with urllib.request.urlopen(req, timeout=15) as r: + entries = json.loads(r.read()) + except Exception as e: + _warn(f"Failed to list directory {dest_dir.name}: {e}") + return + + dest_dir.mkdir(parents=True, exist_ok=True) + for entry in entries: + name = entry.get("name", "") + etype = entry.get("type") + if etype == "file": + url = entry.get("download_url") + if not url: + continue + try: + with urllib.request.urlopen(url, timeout=15) as r: + (dest_dir / name).write_bytes(r.read()) + except Exception as e: + _warn(f"Failed to download {dest_dir.name}/{name}: {e}") + elif etype == "dir": + sub_url = entry.get("url") + if sub_url: + _download_directory(sub_url, dest_dir / name) + + def _install_gym_environment(dest: Path) -> None: """Download gym-environment files into dest and add them to .gitignore.""" try: @@ -114,30 +143,9 @@ def _install_gym_environment(dest: Path) -> None: _warn(f"Failed to download {name}: {e}") elif etype == "dir": - # Fetch subdirectory contents recursively (one level deep) - try: - sub_req = urllib.request.Request( - f"{_GYM_ENV_API}/{name}", - headers={"Accept": "application/vnd.github.v3+json"}, - ) - with urllib.request.urlopen(sub_req, timeout=15) as r: - sub_entries = json.loads(r.read()) - except Exception as e: - _warn(f"Failed to list directory {name}: {e}") - continue - - sub_dir = dest / name - sub_dir.mkdir(parents=True, exist_ok=True) - for sub in sub_entries: - sub_name = sub.get("name", "") - sub_url = sub.get("download_url") - if sub.get("type") != "file" or not sub_url: - continue - try: - with urllib.request.urlopen(sub_url, timeout=15) as r: - (sub_dir / sub_name).write_bytes(r.read()) - except Exception as e: - _warn(f"Failed to download {name}/{sub_name}: {e}") + sub_url = entry.get("url") + if sub_url: + _download_directory(sub_url, dest / name) downloaded.append(name) if not downloaded: From a5fd023d7cdb9d084861a3d93614255d094f9bc1 Mon Sep 17 00:00:00 2001 From: qyli00 Date: Wed, 3 Jun 2026 13:42:47 -0700 Subject: [PATCH 2/7] feat: integrate Entire (entire.io) to log AI workflows with consent-gated upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture AI agent sessions (prompts, responses, tool calls, files touched) via the Entire CLI and, only with user consent, upload them for research. - entire_logging.py: best-effort wrapper around the `entire` binary — setup (enable hooks + detected agents), ensure_git_repo (init non-git MLE workspaces), commit_workspace + push_branch (MLE code push), flush (CR checkpoint), has_sessions, and upload (inject aicodinggym-meta.json via git plumbing, push entire/checkpoints/v1 to a per-problem branch). - config.py: persist upload consent (entire_logging_consent) and the writable submission_repo_url; get_logging_consent/set_logging_consent helpers. - cli.py: - configure: offers to install Entire, captures submission repo URL, --upload-logs/--no-upload-logs to pre-set consent. - fetch/download (swe/cr/mle): set up local capture. If Entire isn't installed, point the user at `aicodinggym configure` (which offers to install it) instead of silently skipping. MLE inits a git repo. - submit (swe/cr/mle): consent-gated upload. First submit prompts once (research-only, de-identified). Non-interactive sessions never upload without recorded consent. - One repo, many branches: all three benchmarks log to the user's single repo (recorded from SWE fetch / configure), identified by branch aicodinggym-logs// + an aicodinggym-meta.json file. CR's cloned PR repo (read-only) is never used as a target. - MLE also pushes the user's solution code (data/ excluded) to a branch, gated by the same log-upload consent. - README: document the logging feature, consent flow, unified repo, MLE code push, and privacy. - bump version 0.5.1 -> 0.6.0 Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 62 +++++++- __init__.py | 2 +- cli.py | 332 ++++++++++++++++++++++++++++++++++++++- config.py | 26 +++- entire_logging.py | 389 ++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 6 files changed, 801 insertions(+), 12 deletions(-) create mode 100644 entire_logging.py diff --git a/README.md b/README.md index f64b306..863273b 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,9 @@ aicodinggym configure --user-id USER_ID [--workspace-dir DIR] |---|---|---| | `--user-id` | Yes | Your AI Coding Gym user ID | | `--workspace-dir` | No | Default workspace directory (default: cwd) | +| `--upload-logs` / `--no-upload-logs` | No | Pre-set consent for uploading de-identified AI session logs for research. If unset, you're asked once on your first submit. | + +On first run, if the [Entire](https://entire.io) CLI isn't installed, `configure` offers to install it (used for [AI workflow logging](#ai-workflow-logging-entire)). --- @@ -72,6 +75,8 @@ aicodinggym swe submit PROBLEM_ID [--message MSG] [--force] [--user-id ID] [--wo |---|---| | `--message, -m` | Commit message (auto-generated if omitted) | | `--force` | Force push with `--force-with-lease` | +| `--upload-logs` / `--no-upload-logs` | Override consent for uploading de-identified AI session logs | +| `--logs-remote` | Git URL to push AI session logs to (defaults to this problem's repo) | #### `aicodinggym swe test PROBLEM_ID` @@ -137,6 +142,8 @@ aicodinggym mle submit COMPETITION_ID -F FILE [--user-id ID] [--message MSG] |---|---|---| | `-F` | Yes | Path to prediction CSV file | | `--message, -m` | No | Submission description | +| `--upload-logs` / `--no-upload-logs` | No | Override consent for uploading de-identified AI session logs | +| `--logs-remote` | No | Git URL to push AI session logs to (defaults to your submission repo) | --- @@ -169,6 +176,58 @@ echo "My review" | aicodinggym cr submit PROBLEM_ID | `-f, --file` | Path to a file containing your review (e.g. `review.md`) | | `-m, --message` | Inline review text | | stdin | Pipe review text from stdin | +| `--upload-logs` / `--no-upload-logs` | Override consent for uploading de-identified AI session logs | +| `--logs-remote` | Git URL to push AI session logs to (defaults to your submission repo) | + +--- + +## AI Workflow Logging (Entire) + +AI Coding Gym can optionally capture **how** a solution was produced — the AI +agent prompts, responses, tool calls and files touched — for research. This is +powered by the [Entire](https://entire.io) CLI, which hooks into git and stores +sessions on a separate `entire/checkpoints/v1` branch (it never adds commits to +your working branch). + +**Consent first.** Capture is **local only** until you opt in. The first time +you `submit` (unless already configured), you're asked once: + +> AI Coding Gym can upload this AI coding session (prompts, responses, files +> changed) for research only. Data is de-identified/anonymized before use. + +Your choice is saved in `~/.aicodinggym/config.json`. Set it ahead of time with +`aicodinggym configure --upload-logs` / `--no-upload-logs`, or override per +submit with `--upload-logs` / `--no-upload-logs`. In non-interactive sessions +with no recorded choice, nothing is uploaded. + +**One repo, many branches.** All three benchmarks log to your **single** repo +(the writable per-user repo — one repo, many branches), so a log is identified +by its branch name even though the repo holds many problems. SWE `fetch` records +that repo URL (`submission_repo_url`) so CR/MLE reuse it; the cloned CR PR repo +is read-only and is never used as a target. Override with `--logs-remote` or the +`AICODINGGYM_LOGS_REMOTE` env var. + +**How it works** + +1. `swe fetch` / `cr fetch` / `mle download` installs Entire's hooks so your + session is captured locally as you work. (MLE workspaces aren't git repos, so + a lightweight one is initialised — this also lets your solution code be + pushed on submit. The dataset under `data/` is gitignored.) + If the `entire` CLI isn't installed, you're pointed at `aicodinggym configure` + (which offers to install it) instead of being silently skipped. +2. On `submit`, after consent, the captured `entire/checkpoints/v1` branch is + pushed to your repo on a per-problem branch + `aicodinggym-logs//`, with an `aicodinggym-meta.json` + file at the tip (problem id, benchmark, user, tool, timestamp). +3. **MLE also pushes your solution code** (notebooks/scripts/CSV; `data/` + excluded) to a branch named after the competition (e.g. `spaceship-titanic`), + gated by the same consent. The prediction CSV still goes to the scoring API as + before. + +**Privacy.** Uploaded data is used solely for research and is +de-identified/anonymized before use. Entire also redacts detected secrets when +writing to the checkpoints branch (best-effort). Logging is entirely optional — +if the `entire` binary isn't installed, fetch/submit work unchanged. --- @@ -181,6 +240,7 @@ aicodinggym-cli/ ├── config.py # Config + credentials persistence (~/.aicodinggym/) ├── api.py # HTTP client for aicodinggym.com/api ├── git_ops.py # SSH key generation, git clone/commit/push/reset +├── entire_logging.py # Optional AI-session capture/upload via the Entire CLI └── pyproject.toml # Package metadata and build config ``` @@ -188,7 +248,7 @@ aicodinggym-cli/ | File | Purpose | |---|---| -| `~/.aicodinggym/config.json` | Global config (user_id, repo_name, key path, workspace) | +| `~/.aicodinggym/config.json` | Global config (user_id, repo_name, key path, workspace, log-upload consent, submission repo URL) | | `~/.aicodinggym/credentials.json` | Per-problem credentials (repo_url, branch, cached after fetch) | | `~/.aicodinggym/{user_id}_id_rsa` | SSH private key | | `~/.aicodinggym/{user_id}_id_rsa.pub` | SSH public key | diff --git a/__init__.py b/__init__.py index c5f223c..cf9a855 100644 --- a/__init__.py +++ b/__init__.py @@ -1,3 +1,3 @@ """AI Coding Gym CLI.""" -__version__ = "0.3.0" +__version__ = "0.6.0" diff --git a/cli.py b/cli.py index ed3cbc9..3cdc169 100644 --- a/cli.py +++ b/cli.py @@ -35,6 +35,7 @@ import click from . import __version__ +from . import entire_logging from .api import ( APIError, configure as api_configure, @@ -47,10 +48,12 @@ submit_notification, ) from .config import ( + get_logging_consent, load_config, load_credentials, save_config, save_credentials, + set_logging_consent, ) from .git_ops import ( add_commit_push, @@ -314,6 +317,227 @@ def _resolve_key_path(config: dict, creds: dict | None = None) -> Path: return key_path +# ── AI-session logging (Entire integration) ────────────────────────────────── + +_CONSENT_PROMPT = ( + "AI Coding Gym can upload this AI coding session (prompts, responses, files " + "changed) for research only. Data is de-identified/anonymized before use.\n" + "Upload your session logs on submit?" +) + + +def _configure_hint(user_id: str | None) -> str: + """A copy-pasteable 'configure' command (where Entire install is offered).""" + return f"aicodinggym configure --user-id {user_id}" if user_id else "aicodinggym configure" + + +def _setup_logging(problem_dir: Path, *, init_git: bool = False, + user_id: str | None = None) -> None: + """Best-effort: install Entire hooks so the session is captured locally. + + Capture is local-only; nothing is uploaded until the user consents at + submit. If Entire isn't installed, point the user at ``configure`` (which + offers to install it) rather than silently skipping — unless they've already + opted out of logging. + """ + if not entire_logging.is_available(): + if get_logging_consent() is not False: # not explicitly opted out + click.echo( + " Logging: Not set up — the 'entire' CLI isn't installed.\n" + f" Run '{_configure_hint(user_id)}' to enable AI workflow logging." + ) + return + ok, msg = entire_logging.setup(problem_dir, init_git=init_git) + if ok: + click.echo(f" Logging: {msg} (uploaded only with your consent on submit)") + # On setup failure (Entire present but enable errored) we stay quiet. + + +def _safe_key_path(config: dict, creds: dict | None = None) -> Path | None: + """Resolve the SSH key for a log push without exiting if it's missing. + + Unlike :func:`_resolve_key_path`, this returns None instead of aborting, so + an optional log upload never kills a submit that already succeeded. + """ + path_str = (creds or {}).get("private_key_path") or config.get("private_key_path") + if not path_str: + return None + key_path = Path(path_str) + return key_path if key_path.exists() else None + + +def _resolve_log_upload_consent(flag: bool | None) -> bool: + """Resolve whether to upload session logs, prompting once if needed. + + Precedence: explicit --upload-logs/--no-upload-logs flag > stored consent > + first-time prompt. In non-interactive sessions with no recorded choice we + default to NOT uploading (privacy-safe) and print how to opt in. + """ + if flag is not None: + set_logging_consent(flag) + return flag + stored = get_logging_consent() + if stored is not None: + return stored + if not sys.stdin.isatty(): + click.echo( + "AI session captured locally; upload skipped (no consent on record).\n" + " Opt in for research (de-identified): aicodinggym configure --upload-logs" + ) + return False + answer = click.confirm("\n" + _CONSENT_PROMPT, default=False) + set_logging_consent(answer) + return answer + + +def _resolve_logs_remote(benchmark: str, creds: dict | None, + config: dict, override: str | None) -> str | None: + """Resolve the writable git URL to push session logs to. + + All benchmarks log to the user's single repo (one repo, many branches), + distinguished by the per-problem log branch name. SWE may fall back to its + own writable clone URL when the submission repo isn't recorded yet; CR's + cloned repo is the read-only PR and must never be used. + """ + if override: + return override + env = os.environ.get("AICODINGGYM_LOGS_REMOTE") + if env: + return env + if config.get("submission_repo_url"): + return config["submission_repo_url"] + if benchmark == "swe": + return (creds or {}).get("repo_url") + return None + + +def _maybe_upload_logs(problem_dir: Path, *, benchmark: str, problem_id: str, + user_id: str, key_path: Path | None, config: dict, + creds: dict | None, upload_flag: bool | None, + logs_remote_override: str | None, tool: str | None = None, + flush: bool = False) -> None: + """Consent-gated upload of the captured AI session at submit time.""" + if not entire_logging.is_available(): + if get_logging_consent() is not False: # not explicitly opted out + click.echo( + " Logs: Not captured — the 'entire' CLI isn't installed.\n" + f" Run '{_configure_hint(user_id)}' to enable logging for next time." + ) + return + if not entire_logging.is_enabled(problem_dir): + return # repo wasn't set up for capture (e.g. fetched before install) + if flush: + entire_logging.flush(problem_dir) + if not entire_logging.has_sessions(problem_dir): + return # nothing was captured — stay quiet + + if not _resolve_log_upload_consent(upload_flag): + click.echo(" Logs: AI session captured locally; upload skipped.") + return + + remote = _resolve_logs_remote(benchmark, creds, config, logs_remote_override) + if not remote: + click.echo( + " Logs: consented, but no logs repository is configured.\n" + " Re-run 'aicodinggym configure' or pass --logs-remote URL." + ) + return + + ok, info = entire_logging.upload( + problem_dir, remote_url=remote, benchmark=benchmark, problem_id=problem_id, + user_id=user_id, key_path=key_path, tool=tool, cli_version=__version__, + ) + if ok: + click.echo(f" Logs: uploaded for research (branch {info})") + else: + _warn(f"AI session log upload failed: {info}") + + +def _maybe_submit_mle_artifacts(workspace_repo: Path, *, competition_id: str, + user_id: str, config: dict, key_path: Path | None, + upload_flag: bool | None, + logs_remote_override: str | None) -> None: + """MLE submit: push the solution code to a branch and (when + captured) the AI session logs — both to the user's own repo, gated by the + single log-upload consent. The CSV itself already went to the API. + """ + workspace_repo = Path(workspace_repo) + if not (workspace_repo / ".git").exists(): + return # workspace was never initialised (e.g. downloaded pre-upgrade) + + # Commit the solution code locally. With Entire enabled this commit also + # materialises the AI session checkpoint, so no separate flush is needed. + entire_logging.commit_workspace(workspace_repo, f"MLE submission: {competition_id}") + + if not _resolve_log_upload_consent(upload_flag): + click.echo(" Logs: code + AI session captured locally; upload skipped.") + return + + remote = _resolve_logs_remote("mle", None, config, logs_remote_override) + if not remote: + click.echo( + " Logs: consented, but no repository is configured to push to.\n" + " Re-run 'aicodinggym configure' or pass --logs-remote URL." + ) + return + + code_ok, code_info = entire_logging.push_branch( + workspace_repo, remote_url=remote, dest_branch=competition_id, key_path=key_path, + ) + if code_ok: + click.echo(f" Code: pushed to branch '{code_info}'") + else: + _warn(f"MLE code push failed: {code_info}") + + if entire_logging.is_enabled(workspace_repo) and entire_logging.has_sessions(workspace_repo): + ok, info = entire_logging.upload( + workspace_repo, remote_url=remote, benchmark="mle", problem_id=competition_id, + user_id=user_id, key_path=key_path, cli_version=__version__, + ) + if ok: + click.echo(f" Logs: uploaded for research (branch {info})") + else: + _warn(f"AI session log upload failed: {info}") + elif not entire_logging.is_available(): + click.echo( + f" Logs: AI session not captured ('entire' not installed).\n" + f" Run '{_configure_hint(user_id)}' to enable logging next time." + ) + + +def _configure_logging(upload_logs_flag: bool | None) -> None: + """During configure: record consent (if given) and offer to install Entire.""" + if upload_logs_flag is not None: + set_logging_consent(upload_logs_flag) + + if entire_logging.is_available(): + ver = entire_logging.version() + click.echo(f" Logging: Entire detected ({ver or 'installed'})") + return + + click.echo( + "\nOptional — AI workflow logging:\n" + " AI Coding Gym can capture your AI coding sessions (via Entire,\n" + " https://entire.io) and, only with your consent at submit, upload them\n" + " for research. Uploaded data is de-identified/anonymized. Needs the\n" + " 'entire' CLI." + ) + if not sys.stdin.isatty(): + click.echo(f" Install later with:\n {entire_logging.INSTALL_COMMAND}") + return + if click.confirm("Install the Entire CLI now?", default=True): + ok, msg = entire_logging.install() + if ok: + click.echo(f" Entire: {msg}") + else: + _warn( + f"Could not install Entire automatically: {msg}\n" + f" Install manually: {entire_logging.INSTALL_COMMAND}" + ) + else: + click.echo(f" Skipped. Install later with:\n {entire_logging.INSTALL_COMMAND}") + + # ── Top-level group ────────────────────────────────────────────────────────── @@ -369,7 +593,12 @@ def main(): help="Default workspace directory for cloning repositories. " "Defaults to the current working directory.", ) -def configure(user_id: str, workspace_dir: str | None): +@click.option( + "--upload-logs/--no-upload-logs", "upload_logs", default=None, + help="Pre-set consent for uploading de-identified AI session logs for " + "research (otherwise you're asked once, on your first submit).", +) +def configure(user_id: str, workspace_dir: str | None, upload_logs: bool | None): """Configure credentials and register SSH key with aicodinggym.com. Generates an SSH key pair locally (stored in ~/.aicodinggym/), @@ -396,15 +625,18 @@ def configure(user_id: str, workspace_dir: str | None): private_key_path, public_key = generate_ssh_key_pair(user_id) click.echo("Registering public key with aicodinggym.com...") + existing = load_config() + submission_repo_url = existing.get("submission_repo_url") try: data = api_configure(user_id, public_key) repo_name = data.get("repo_name") if not repo_name: _error("Server did not return a repository name. Please try again or contact support.") + # Writable repo we can push AI-session logs to (CR/MLE upload target). + submission_repo_url = data.get("repo_url") or submission_repo_url except APIError as e: if "409" in str(e): click.echo("Key already registered, reusing existing configuration.") - existing = load_config() repo_name = existing.get("repo_name", f"submission-{user_id}") else: raise @@ -417,6 +649,8 @@ def configure(user_id: str, workspace_dir: str | None): "private_key_path": str(private_key_path), "workspace_dir": resolved_workspace, } + if submission_repo_url: + config["submission_repo_url"] = submission_repo_url save_config(config) _install_gym_environment(Path(resolved_workspace)) @@ -428,9 +662,13 @@ def configure(user_id: str, workspace_dir: str | None): f" Repository: {repo_name}\n" f" Workspace: {resolved_workspace}\n" f" SSH Key: {private_key_path}\n" - f" Config: ~/.aicodinggym/config.json\n" - f"\n" - f"You can now use 'aicodinggym swe', 'aicodinggym mle', and 'aicodinggym cr' commands." + f" Config: ~/.aicodinggym/config.json" + ) + + _configure_logging(upload_logs) + + click.echo( + "\nYou can now use 'aicodinggym swe', 'aicodinggym mle', and 'aicodinggym cr' commands." ) except APIError as e: _error(str(e)) @@ -536,7 +774,14 @@ def swe_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): if not success: _error(msg) + # Record the user's writable repo (one repo, many branches) so CR/MLE logs + # can target it too. Only set if not already configured. + if repo_url and not config.get("submission_repo_url"): + config["submission_repo_url"] = repo_url + save_config(config) + _install_gym_environment(workspace / problem_id) + _setup_logging(workspace / problem_id, user_id=uid) click.echo( f"\nSuccessfully fetched problem: {problem_id}\n" @@ -563,8 +808,17 @@ def swe_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): "--workspace-dir", default=None, type=click.Path(), help="Workspace directory. Overrides configured/cached value.", ) +@click.option( + "--upload-logs/--no-upload-logs", "upload_logs", default=None, + help="Override consent for uploading de-identified AI session logs.", +) +@click.option( + "--logs-remote", default=None, + help="Git URL to push AI session logs to (defaults to this problem's repo).", +) def swe_submit(problem_id: str, user_id: str | None, message: str | None, - force: bool, workspace_dir: str | None): + force: bool, workspace_dir: str | None, + upload_logs: bool | None, logs_remote: str | None): """Submit your SWE-bench solution by committing and pushing changes. Stages all changes, commits them, pushes to the remote, and notifies @@ -657,6 +911,14 @@ def swe_submit(problem_id: str, user_id: str | None, message: str | None, f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/swe/{problem_id}')}" ) + # The solution commit above already triggered Entire's checkpoint, so no + # flush is needed here — just upload (with consent). + _maybe_upload_logs( + problem_dir, benchmark="swe", problem_id=problem_id, user_id=uid, + key_path=key_path, config=config, creds=creds, upload_flag=upload_logs, + logs_remote_override=logs_remote, + ) + @swe.command("reset") @click.argument("problem_id") @@ -1030,6 +1292,7 @@ def cr_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): _error(msg) _install_gym_environment(workspace / problem_id) + _setup_logging(workspace / problem_id, user_id=uid) problem_dir = workspace / problem_id @@ -1080,8 +1343,21 @@ def cr_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): "-m", "--message", "review_text", help="Inline review text.", ) +@click.option( + "--workspace-dir", default=None, type=click.Path(), + help="Workspace directory. Overrides configured/cached value.", +) +@click.option( + "--upload-logs/--no-upload-logs", "upload_logs", default=None, + help="Override consent for uploading de-identified AI session logs.", +) +@click.option( + "--logs-remote", default=None, + help="Git URL to push AI session logs to (defaults to your submission repo).", +) def cr_submit(problem_id: str, user_id: str | None, review_file: str | None, - review_text: str | None): + review_text: str | None, workspace_dir: str | None, + upload_logs: bool | None, logs_remote: str | None): """Submit a code review for a Code Review challenge. Reads your review from a file (-f), inline text (-m), or piped stdin, @@ -1133,6 +1409,18 @@ def cr_submit(problem_id: str, user_id: str | None, review_file: str | None, f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/cr/{problem_id}')}" ) + # CR clones a read-only PR, so logs upload to the user's own submission repo. + # The CR flow makes no commit, so flush=True materialises the checkpoint. + creds = load_credentials().get(problem_id) + workspace = _resolve_workspace(config, workspace_dir or (creds or {}).get("workspace_dir")) + problem_dir = workspace / problem_id + if problem_dir.exists(): + _maybe_upload_logs( + problem_dir, benchmark="cr", problem_id=problem_id, user_id=uid, + key_path=_safe_key_path(config, creds), config=config, creds=creds, + upload_flag=upload_logs, logs_remote_override=logs_remote, flush=True, + ) + # ── mle group ──────────────────────────────────────────────────────────────── @@ -1191,6 +1479,10 @@ def mle_download(competition_id: str, user_id: str | None, workspace_dir: str | _error(str(e)) _install_gym_environment(workspace / competition_id) + # MLE workspaces aren't git repos: init one so the solution code can be + # pushed on submit and Entire can attach for session capture. + entire_logging.ensure_git_repo(workspace / competition_id) + _setup_logging(workspace / competition_id, user_id=uid) click.echo( f"\nDataset downloaded to: {dest_path}\n" @@ -1210,8 +1502,21 @@ def mle_download(competition_id: str, user_id: str | None, workspace_dir: str | "--message", "-m", default=None, help="Description of your submission (optional).", ) +@click.option( + "--workspace-dir", default=None, type=click.Path(), + help="Workspace directory. Overrides configured value.", +) +@click.option( + "--upload-logs/--no-upload-logs", "upload_logs", default=None, + help="Override consent for uploading de-identified AI session logs.", +) +@click.option( + "--logs-remote", default=None, + help="Git URL to push AI session logs to (defaults to your submission repo).", +) def mle_submit(competition_id: str, csv_path: str, user_id: str | None, - message: str | None): + message: str | None, workspace_dir: str | None, + upload_logs: bool | None, logs_remote: str | None): """Submit a prediction CSV for an MLE-bench competition. Uploads your prediction CSV directly to the AI Coding Gym server @@ -1266,3 +1571,14 @@ def mle_submit(competition_id: str, csv_path: str, user_id: str | None, if score is not None: click.echo(f" Score: {score}\n") click.echo(f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/mle/{competition_id}')}") + + # Push the user's solution code to a branch and (with + # consent) the AI session logs, both to the user's own repo. + workspace = _resolve_workspace(config, workspace_dir) + competition_dir = workspace / competition_id + if competition_dir.exists(): + _maybe_submit_mle_artifacts( + competition_dir, competition_id=competition_id, user_id=uid, config=config, + key_path=_safe_key_path(config), upload_flag=upload_logs, + logs_remote_override=logs_remote, + ) diff --git a/config.py b/config.py index 7c5065e..5555adf 100644 --- a/config.py +++ b/config.py @@ -18,7 +18,14 @@ CREDENTIALS_PATH = CONFIG_DIR / "credentials.json" # Fields persisted in config.json -_CONFIG_FIELDS = ("user_id", "repo_name", "private_key_path", "workspace_dir") +_CONFIG_FIELDS = ( + "user_id", "repo_name", "private_key_path", "workspace_dir", + # Writable repo to push AI-session logs to (Entire integration). Used for + # CR/MLE where the cloned repo is read-only or absent. + "submission_repo_url", + # AI-session upload consent: "granted" | "declined" (absent = not yet asked). + "entire_logging_consent", +) def ensure_config_dir() -> Path: @@ -80,6 +87,23 @@ def save_credentials(credentials: dict[str, dict[str, Any]]) -> None: CREDENTIALS_PATH.write_text(json.dumps(credentials, indent=2) + "\n") +def get_logging_consent() -> bool | None: + """Return AI-session upload consent: True/False, or None if never asked.""" + value = load_config().get("entire_logging_consent") + if value == "granted": + return True + if value == "declined": + return False + return None + + +def set_logging_consent(granted: bool) -> None: + """Persist the user's AI-session upload consent choice.""" + config = load_config() + config["entire_logging_consent"] = "granted" if granted else "declined" + save_config(config) + + def require_config(config: dict[str, str], field: str, label: str) -> str: """Get a required config field or raise a descriptive error.""" value = config.get(field) diff --git a/entire_logging.py b/entire_logging.py new file mode 100644 index 0000000..88f119c --- /dev/null +++ b/entire_logging.py @@ -0,0 +1,389 @@ +"""Optional AI-workflow logging via the Entire CLI (https://entire.io). + +Entire hooks into the local git workflow to capture AI agent sessions +(prompts, responses, tool calls, files touched) and stores them on a separate +``entire/checkpoints/v1`` git branch — it never adds commits to your working +branch. AI Coding Gym uses this to study *how* solutions are produced. + +This module is a thin, best-effort wrapper around the ``entire`` binary: + +* :func:`setup` is called at fetch/download time to install Entire's hooks so + capture happens locally as the user works. Nothing leaves the machine here. +* :func:`upload` is called at submit time *after the user consents*. It pushes + the captured ``entire/checkpoints/v1`` branch to a writable git remote, under + a per-problem branch name so each upload is identifiable, with an + ``aicodinggym-meta.json`` metadata file injected at the tip. + +Every function degrades gracefully: if the ``entire`` binary is missing or a +command fails, AI Coding Gym's core fetch/submit flow is never blocked. + +Privacy: capture is local-only until the user opts in. Uploaded data is used +solely for research and is de-identified/anonymized before use. Entire also +redacts detected secrets when writing to the checkpoints branch (best-effort). +""" + +import json +import os +import shutil +import subprocess +import sys +from datetime import datetime, timezone +from pathlib import Path + +from . import git_ops + + +# Branch Entire writes captured sessions to (fixed by Entire). +CHECKPOINT_BRANCH = "entire/checkpoints/v1" + +# Metadata file injected at the tip of each uploaded log branch. +METADATA_FILENAME = "aicodinggym-meta.json" + +# Primary agent to capture (this CLI is commonly driven by Claude Code), plus +# extra agents we enable when their binary is detected on PATH. Keys are the +# names Entire expects for `entire agent add`; values are the PATH binary. +PRIMARY_AGENT = "claude-code" +OPTIONAL_AGENTS = { + "codex": "codex", + "gemini": "gemini", + "cursor": "cursor-agent", + "opencode": "opencode", +} + +# Identity used for the synthetic flush/metadata commits so we never depend on +# (or alter) the user's configured git identity. +_LOG_IDENT = { + "GIT_AUTHOR_NAME": "AI Coding Gym", + "GIT_AUTHOR_EMAIL": "logs@aicodinggym.com", + "GIT_COMMITTER_NAME": "AI Coding Gym", + "GIT_COMMITTER_EMAIL": "logs@aicodinggym.com", +} + +_FLUSH_MESSAGE = "AI Coding Gym: capture AI session checkpoint" + +if sys.platform == "win32": + INSTALL_COMMAND = ( + "scoop bucket add entire https://github.com/entireio/scoop-bucket.git; " + "scoop install entire/cli" + ) +else: + INSTALL_COMMAND = "curl -fsSL https://entire.io/install.sh | bash" + + +# ── binary discovery / install ─────────────────────────────────────────────── + + +def is_available() -> bool: + """True if the ``entire`` binary is on PATH.""" + return shutil.which("entire") is not None + + +def version() -> str | None: + """Return Entire's reported version, or None if unavailable.""" + if not is_available(): + return None + res = _entire(["version"], cwd=Path.cwd()) + if res.returncode == 0: + return res.stdout.strip() or None + return None + + +def install() -> tuple[bool, str]: + """Run Entire's official installer. Returns (installed_now, message). + + Best-effort: on failure the caller should fall back to printing + :data:`INSTALL_COMMAND` for the user to run manually. + """ + try: + if sys.platform == "win32": + # Scoop lives behind PowerShell; auto-driving it reliably is brittle, + # so we defer to the documented manual command. + return False, "automatic install is not supported on Windows" + res = subprocess.run( + ["bash", "-c", INSTALL_COMMAND], + capture_output=True, text=True, timeout=300, + ) + if res.returncode != 0: + return False, (res.stderr or res.stdout or "installer exited non-zero").strip() + # shutil.which caches nothing, but the new binary may have landed in a + # dir not yet on this process's PATH (e.g. ~/.local/bin). + if is_available(): + return True, "installed" + return True, "installed (you may need to open a new shell for it to appear on PATH)" + except FileNotFoundError: + return False, "bash not found" + except subprocess.TimeoutExpired: + return False, "installer timed out" + except Exception as e: # noqa: BLE001 - never let install crash configure + return False, str(e) + + +# ── repo-level helpers ─────────────────────────────────────────────────────── + + +def is_enabled(repo_dir: Path) -> bool: + """True if Entire has been set up in this repo (``.entire/`` present).""" + return (Path(repo_dir) / ".entire").is_dir() + + +def has_sessions(repo_dir: Path) -> bool: + """True if a captured-session checkpoint branch exists locally.""" + res = _git(["rev-parse", "--verify", "--quiet", f"refs/heads/{CHECKPOINT_BRANCH}"], + cwd=repo_dir) + return res.returncode == 0 and bool(res.stdout.strip()) + + +def setup(repo_dir: Path, *, init_git: bool = False) -> tuple[bool, str]: + """Install Entire hooks so this repo captures AI sessions locally. + + Captures only — sessions are not pushed here (``--skip-push-sessions``); + upload happens later, with consent, in :func:`upload`. + + Set ``init_git=True`` for non-git workspaces (e.g. MLE competition dirs): + a lightweight repo is initialised first so Entire has something to attach + to. Returns (ok, message); never raises. + """ + if not is_available(): + return False, "entire not installed" + + repo_dir = Path(repo_dir) + try: + if init_git and not (repo_dir / ".git").exists(): + ok, msg = ensure_git_repo(repo_dir) + if not ok: + return False, msg + if not (repo_dir / ".git").exists(): + return False, "not a git repository" + + enable = _entire( + ["enable", "--agent", PRIMARY_AGENT, "--skip-push-sessions", "--telemetry=false"], + cwd=repo_dir, + ) + if enable.returncode != 0 and not is_enabled(repo_dir): + return False, (enable.stderr or enable.stdout or "entire enable failed").strip() + + enabled_agents = [PRIMARY_AGENT] + for agent_name, binary in OPTIONAL_AGENTS.items(): + if shutil.which(binary): + add = _entire(["agent", "add", agent_name], cwd=repo_dir) + if add.returncode == 0: + enabled_agents.append(agent_name) + + return True, "capturing AI sessions for: " + ", ".join(enabled_agents) + except Exception as e: # noqa: BLE001 - logging must never break fetch + return False, str(e) + + +def flush(repo_dir: Path) -> None: + """Materialise a checkpoint from the active session, best-effort. + + Entire writes checkpoints on commit. Flows that already commit (SWE submit) + don't need this; flows that don't (CR submit) call this to trigger Entire's + post-commit hook via an empty commit. The empty commit stays local — only + the resulting ``entire/checkpoints/v1`` branch is ever pushed. + """ + if not is_available() or not is_enabled(repo_dir): + return + try: + _git( + ["-c", "user.name=AI Coding Gym", "-c", "user.email=logs@aicodinggym.com", + "commit", "--allow-empty", "-m", _FLUSH_MESSAGE], + cwd=repo_dir, env_extra=_LOG_IDENT, + ) + except Exception: # noqa: BLE001 + pass + + +def commit_workspace(repo_dir: Path, message: str) -> bool: + """Stage and commit the whole working tree (gitignored files excluded). + + Used by MLE submit to record the user's solution code; the commit also + triggers Entire's post-commit hook (so it doubles as a checkpoint flush + when Entire is enabled). Returns True if a commit was made. + """ + if not (Path(repo_dir) / ".git").exists(): + return False + try: + _git(["add", "-A"], cwd=repo_dir) + res = _git( + ["-c", "user.name=AI Coding Gym", "-c", "user.email=logs@aicodinggym.com", + "commit", "--allow-empty", "-m", message], + cwd=repo_dir, env_extra=_LOG_IDENT, + ) + return res.returncode == 0 + except Exception: # noqa: BLE001 + return False + + +def push_branch(repo_dir: Path, *, remote_url: str, dest_branch: str, + key_path: Path | None = None) -> tuple[bool, str]: + """Push the current HEAD to ``dest_branch`` on ``remote_url`` (force). + + Used for MLE: pushes the user's solution code to a competition-named branch + in their own repo. Returns (ok, branch_or_error). Never raises. + """ + try: + safe = dest_branch.replace(" ", "_") + refspec = f"HEAD:refs/heads/{safe}" + res = git_ops.run_git_command( + ["git", "push", "--force", remote_url, refspec], str(repo_dir), key_path, + ) + if res.returncode != 0: + return False, (res.stderr or "git push failed").strip() + return True, safe + except Exception as e: # noqa: BLE001 + return False, str(e) + + +def logs_branch(benchmark: str, problem_id: str) -> str: + """Remote branch name that identifies which problem a log belongs to.""" + safe = problem_id.replace(" ", "_") + return f"aicodinggym-logs/{benchmark}/{safe}" + + +def upload(repo_dir: Path, *, remote_url: str, benchmark: str, problem_id: str, + user_id: str, key_path: Path | None = None, tool: str | None = None, + cli_version: str | None = None) -> tuple[bool, str]: + """Push the captured session branch to ``remote_url`` for research. + + Pushes ``entire/checkpoints/v1`` to a per-problem branch + (:func:`logs_branch`) on the given writable remote, after injecting an + ``aicodinggym-meta.json`` metadata file at the tip so each upload is + self-describing. Returns (ok, branch_or_error). Never raises. + """ + repo_dir = Path(repo_dir) + try: + tip = _git(["rev-parse", "--verify", f"refs/heads/{CHECKPOINT_BRANCH}"], cwd=repo_dir) + if tip.returncode != 0 or not tip.stdout.strip(): + return False, "no captured sessions to upload" + parent = tip.stdout.strip() + + meta = { + "problem_id": problem_id, + "benchmark": benchmark, + "user_id": user_id, + "tool": tool, + "cli_version": cli_version, + "captured_by": "aicodinggym-cli", + "uploaded_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + } + # Inject metadata as an extra commit on top of the checkpoint tip, + # without disturbing the working tree or Entire's branch. Falls back to + # the raw tip if plumbing fails — the branch name still identifies it. + push_sha = _commit_with_metadata(repo_dir, parent, meta) or parent + + dest = logs_branch(benchmark, problem_id) + refspec = f"{push_sha}:refs/heads/{dest}" + # Force: re-submitting the same problem replaces its log branch. Entire's + # checkpoint branch accumulates history, so the latest tip is lossless. + push = git_ops.run_git_command( + ["git", "push", "--force", remote_url, refspec], str(repo_dir), key_path, + ) + if push.returncode != 0: + return False, (push.stderr or "git push failed").strip() + return True, dest + except Exception as e: # noqa: BLE001 + return False, str(e) + + +# ── internals ──────────────────────────────────────────────────────────────── + + +def _entire(args: list[str], cwd: Path) -> subprocess.CompletedProcess: + """Run the ``entire`` binary, capturing output. Non-interactive.""" + env = os.environ.copy() + env.setdefault("ACCESSIBLE", "1") # avoid interactive TUI elements + return subprocess.run( + ["entire", *args], cwd=str(cwd), capture_output=True, text=True, env=env, + ) + + +def _git(args: list[str], cwd: Path, *, env_extra: dict | None = None, + input_text: str | None = None) -> subprocess.CompletedProcess: + """Run a local git command (no network) with optional extra env.""" + env = os.environ.copy() + if env_extra: + env.update(env_extra) + return subprocess.run( + ["git", *args], cwd=str(cwd), capture_output=True, text=True, + env=env, input=input_text, + ) + + +def ensure_git_repo(repo_dir: Path) -> tuple[bool, str]: + """Initialise a minimal git repo so Entire can attach and MLE code can be + pushed (MLE workspaces aren't git repos by default). No-op if already a repo.""" + if (Path(repo_dir) / ".git").exists(): + return True, "already a git repo" + init = _git(["init", "-q"], cwd=repo_dir) + if init.returncode != 0: + return False, (init.stderr or "git init failed").strip() + + # Keep heavy/derived files out of the repo; we never commit the working + # tree anyway, but this keeps Entire's "files touched" view sane. + gitignore = repo_dir / ".gitignore" + existing = gitignore.read_text(encoding="utf-8") if gitignore.exists() else "" + wanted = ["data/", "*.zip", ".entire/"] + missing = [w for w in wanted if w not in existing.splitlines()] + if missing: + block = ("" if existing.endswith("\n") or not existing else "\n") + \ + "\n# aicodinggym logging\n" + "\n".join(missing) + "\n" + with open(gitignore, "a", encoding="utf-8", newline="\n") as fh: + fh.write(block) + + _git(["add", ".gitignore"], cwd=repo_dir) + commit = _git( + ["-c", "user.name=AI Coding Gym", "-c", "user.email=logs@aicodinggym.com", + "commit", "--allow-empty", "-m", "AI Coding Gym: initialize logging workspace"], + cwd=repo_dir, env_extra=_LOG_IDENT, + ) + if commit.returncode != 0: + return False, (commit.stderr or "initial commit failed").strip() + return True, "initialized git repo" + + +def _commit_with_metadata(repo_dir: Path, parent_sha: str, meta: dict) -> str | None: + """Return a new commit SHA = parent_sha + an aicodinggym-meta.json file. + + Uses git plumbing with a throwaway index so neither the working tree nor + Entire's checkpoint branch is touched. Returns None on any failure. + """ + try: + blob = _git(["hash-object", "-w", "--stdin"], cwd=repo_dir, + input_text=json.dumps(meta, indent=2) + "\n") + if blob.returncode != 0 or not blob.stdout.strip(): + return None + blob_sha = blob.stdout.strip() + + index_path = Path(repo_dir) / ".git" / "aicodinggym_meta.index" + index_env = {"GIT_INDEX_FILE": str(index_path)} + try: + if _git(["read-tree", parent_sha], cwd=repo_dir, env_extra=index_env).returncode != 0: + return None + added = _git( + ["update-index", "--add", "--cacheinfo", + f"100644,{blob_sha},{METADATA_FILENAME}"], + cwd=repo_dir, env_extra=index_env, + ) + if added.returncode != 0: + return None + tree = _git(["write-tree"], cwd=repo_dir, env_extra=index_env) + if tree.returncode != 0 or not tree.stdout.strip(): + return None + tree_sha = tree.stdout.strip() + finally: + try: + index_path.unlink() + except OSError: + pass + + message = f"AI Coding Gym session log: {meta.get('benchmark')}/{meta.get('problem_id')}" + commit = _git( + ["commit-tree", tree_sha, "-p", parent_sha, "-m", message], + cwd=repo_dir, env_extra=_LOG_IDENT, + ) + if commit.returncode != 0 or not commit.stdout.strip(): + return None + return commit.stdout.strip() + except Exception: # noqa: BLE001 + return None diff --git a/pyproject.toml b/pyproject.toml index 9f749e9..1d1f7d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "aicodinggym-cli" -version = "0.5.1" +version = "0.6.0" description = "CLI tool for AI Coding Gym platform" readme = "README.md" requires-python = ">=3.10" From a0fcf7aa6022c84a01706a422c799168369086a3 Mon Sep 17 00:00:00 2001 From: qyli00 Date: Wed, 3 Jun 2026 14:23:18 -0700 Subject: [PATCH 3/7] fix(logging): unique non-overwriting log branches, reorder consent, expand MLE ignores, add tests Addresses review feedback on PR #4: - Ordering: submit commands now resolve logging (incl. the interactive consent prompt) BEFORE printing the success banner, and embed the Logs/Code status line into the summary. The helpers return status text instead of echoing. - No overwrites: every submission pushes to a unique per-submission branch `aicodinggym-logs///` (UTC timestamp + random), so re-submissions and submissions from different directories/machines never clobber previous logs. MLE code goes to `/`; code and logs share one submission id. Dropped the force-push. - Expanded MLE .gitignore (model weights, checkpoints, caches, archives, venvs) so the pushed code branch stays small. - Added pytest suite (tests/): entire_logging git behaviour (metadata injection, unique branches, ignore list, has_sessions), config consent round-trip + allowlist persistence, and CLI remote/consent resolution. 27 tests. - pyproject: dev extra (pytest) + pytest testpaths. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 31 ++++-- cli.py | 151 +++++++++++++++------------ entire_logging.py | 87 ++++++++++++---- pyproject.toml | 6 ++ tests/test_cli_logging.py | 86 +++++++++++++++ tests/test_config_consent.py | 55 ++++++++++ tests/test_entire_logging.py | 196 +++++++++++++++++++++++++++++++++++ 7 files changed, 519 insertions(+), 93 deletions(-) create mode 100644 tests/test_cli_logging.py create mode 100644 tests/test_config_consent.py create mode 100644 tests/test_entire_logging.py diff --git a/README.md b/README.md index 863273b..1dccf48 100644 --- a/README.md +++ b/README.md @@ -207,22 +207,29 @@ that repo URL (`submission_repo_url`) so CR/MLE reuse it; the cloned CR PR repo is read-only and is never used as a target. Override with `--logs-remote` or the `AICODINGGYM_LOGS_REMOTE` env var. +**Nothing is overwritten.** Every submission gets its own unique branch +(`…/`, a UTC timestamp + random suffix). Re-submitting the same +problem — or submitting it from a different directory or machine — adds a new +branch and never deletes or clobbers a previous one. + **How it works** 1. `swe fetch` / `cr fetch` / `mle download` installs Entire's hooks so your session is captured locally as you work. (MLE workspaces aren't git repos, so a lightweight one is initialised — this also lets your solution code be - pushed on submit. The dataset under `data/` is gitignored.) + pushed on submit. The dataset under `data/` plus common model/checkpoint/cache + artifacts are gitignored.) If the `entire` CLI isn't installed, you're pointed at `aicodinggym configure` (which offers to install it) instead of being silently skipped. 2. On `submit`, after consent, the captured `entire/checkpoints/v1` branch is - pushed to your repo on a per-problem branch - `aicodinggym-logs//`, with an `aicodinggym-meta.json` - file at the tip (problem id, benchmark, user, tool, timestamp). -3. **MLE also pushes your solution code** (notebooks/scripts/CSV; `data/` - excluded) to a branch named after the competition (e.g. `spaceship-titanic`), - gated by the same consent. The prediction CSV still goes to the scoring API as - before. + pushed to your repo on a unique per-submission branch + `aicodinggym-logs///`, with an + `aicodinggym-meta.json` file at the tip (problem id, benchmark, user, tool, + submission id, timestamp). +3. **MLE also pushes your solution code** (notebooks/scripts/CSV; dataset and + heavy artifacts excluded) to `/`, gated by the + same consent. The two share a submission id so code and logs correlate. The + prediction CSV still goes to the scoring API as before. **Privacy.** Uploaded data is used solely for research and is de-identified/anonymized before use. Entire also redacts detected secrets when @@ -241,9 +248,17 @@ aicodinggym-cli/ ├── api.py # HTTP client for aicodinggym.com/api ├── git_ops.py # SSH key generation, git clone/commit/push/reset ├── entire_logging.py # Optional AI-session capture/upload via the Entire CLI +├── tests/ # pytest suite (logging, consent, remote resolution) └── pyproject.toml # Package metadata and build config ``` +## Development + +```bash +pip install -e ".[dev]" # installs pytest +pytest # run the test suite +``` + ## Configuration Files | File | Purpose | diff --git a/cli.py b/cli.py index 3cdc169..c0b9cbf 100644 --- a/cli.py +++ b/cli.py @@ -415,77 +415,85 @@ def _maybe_upload_logs(problem_dir: Path, *, benchmark: str, problem_id: str, user_id: str, key_path: Path | None, config: dict, creds: dict | None, upload_flag: bool | None, logs_remote_override: str | None, tool: str | None = None, - flush: bool = False) -> None: - """Consent-gated upload of the captured AI session at submit time.""" + flush: bool = False) -> str: + """Consent-gated upload of the captured AI session at submit time. + + Returns a status line (or "") to embed in the caller's success summary. Any + interactive consent prompt happens here, so callers should invoke this + *before* printing their banner. Hard failures are warned to stderr. + """ if not entire_logging.is_available(): if get_logging_consent() is not False: # not explicitly opted out - click.echo( + return ( " Logs: Not captured — the 'entire' CLI isn't installed.\n" - f" Run '{_configure_hint(user_id)}' to enable logging for next time." + f" Run '{_configure_hint(user_id)}' to enable logging next time." ) - return + return "" if not entire_logging.is_enabled(problem_dir): - return # repo wasn't set up for capture (e.g. fetched before install) + return "" # repo wasn't set up for capture (e.g. fetched before install) if flush: entire_logging.flush(problem_dir) if not entire_logging.has_sessions(problem_dir): - return # nothing was captured — stay quiet + return "" # nothing was captured — stay quiet if not _resolve_log_upload_consent(upload_flag): - click.echo(" Logs: AI session captured locally; upload skipped.") - return + return " Logs: AI session captured locally; upload skipped." remote = _resolve_logs_remote(benchmark, creds, config, logs_remote_override) if not remote: - click.echo( + return ( " Logs: consented, but no logs repository is configured.\n" " Re-run 'aicodinggym configure' or pass --logs-remote URL." ) - return ok, info = entire_logging.upload( problem_dir, remote_url=remote, benchmark=benchmark, problem_id=problem_id, user_id=user_id, key_path=key_path, tool=tool, cli_version=__version__, ) if ok: - click.echo(f" Logs: uploaded for research (branch {info})") - else: - _warn(f"AI session log upload failed: {info}") + return f" Logs: uploaded for research (branch {info})" + _warn(f"AI session log upload failed: {info}") + return "" def _maybe_submit_mle_artifacts(workspace_repo: Path, *, competition_id: str, user_id: str, config: dict, key_path: Path | None, upload_flag: bool | None, - logs_remote_override: str | None) -> None: - """MLE submit: push the solution code to a branch and (when - captured) the AI session logs — both to the user's own repo, gated by the - single log-upload consent. The CSV itself already went to the API. + logs_remote_override: str | None) -> str: + """MLE submit: push the solution code and (when captured) the AI session logs + to the user's own repo, gated by the single log-upload consent. Each goes to + a unique per-submission branch so prior submissions are never overwritten. + The CSV itself already went to the API. Returns a status block (or ""). """ workspace_repo = Path(workspace_repo) if not (workspace_repo / ".git").exists(): - return # workspace was never initialised (e.g. downloaded pre-upgrade) + return "" # workspace was never initialised (e.g. downloaded pre-upgrade) # Commit the solution code locally. With Entire enabled this commit also # materialises the AI session checkpoint, so no separate flush is needed. entire_logging.commit_workspace(workspace_repo, f"MLE submission: {competition_id}") if not _resolve_log_upload_consent(upload_flag): - click.echo(" Logs: code + AI session captured locally; upload skipped.") - return + return " Logs: code + AI session captured locally; upload skipped." remote = _resolve_logs_remote("mle", None, config, logs_remote_override) if not remote: - click.echo( + return ( " Logs: consented, but no repository is configured to push to.\n" " Re-run 'aicodinggym configure' or pass --logs-remote URL." ) - return + + # One stamp ties this submission's code branch and log branch together, and + # makes both unique so re-submissions never overwrite earlier ones. + stamp = entire_logging.new_stamp() + lines: list[str] = [] code_ok, code_info = entire_logging.push_branch( - workspace_repo, remote_url=remote, dest_branch=competition_id, key_path=key_path, + workspace_repo, remote_url=remote, + dest_branch=f"{competition_id}/{stamp}", key_path=key_path, ) if code_ok: - click.echo(f" Code: pushed to branch '{code_info}'") + lines.append(f" Code: pushed to branch '{code_info}'") else: _warn(f"MLE code push failed: {code_info}") @@ -493,16 +501,17 @@ def _maybe_submit_mle_artifacts(workspace_repo: Path, *, competition_id: str, ok, info = entire_logging.upload( workspace_repo, remote_url=remote, benchmark="mle", problem_id=competition_id, user_id=user_id, key_path=key_path, cli_version=__version__, + submission_stamp=stamp, ) if ok: - click.echo(f" Logs: uploaded for research (branch {info})") + lines.append(f" Logs: uploaded for research (branch {info})") else: _warn(f"AI session log upload failed: {info}") elif not entire_logging.is_available(): - click.echo( - f" Logs: AI session not captured ('entire' not installed).\n" - f" Run '{_configure_hint(user_id)}' to enable logging next time." - ) + lines.append(" Logs: AI session not captured ('entire' not installed).") + lines.append(f" Run '{_configure_hint(user_id)}' to enable logging next time.") + + return "\n".join(lines) def _configure_logging(upload_logs_flag: bool | None) -> None: @@ -901,24 +910,27 @@ def swe_submit(problem_id: str, user_id: str | None, message: str | None, except APIError as e: _warn(f"Changes pushed, but failed to notify backend: {e}") - click.echo( - f"\nSuccessfully submitted solution for {problem_id}\n" - f"\n" - f" Commit: {commit_hash[:8]}\n" - f" Branch: {branch}\n" - f" Status: Pushed and backend notified\n" - f"\n" - f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/swe/{problem_id}')}" - ) - - # The solution commit above already triggered Entire's checkpoint, so no - # flush is needed here — just upload (with consent). - _maybe_upload_logs( + # Resolve logging (incl. any consent prompt) before the summary so the + # prompt never interrupts the success banner. The solution commit above + # already triggered Entire's checkpoint, so no flush is needed. + log_status = _maybe_upload_logs( problem_dir, benchmark="swe", problem_id=problem_id, user_id=uid, key_path=key_path, config=config, creds=creds, upload_flag=upload_logs, logs_remote_override=logs_remote, ) + summary = [ + f"\nSuccessfully submitted solution for {problem_id}\n", + f" Commit: {commit_hash[:8]}", + f" Branch: {branch}", + f" Status: Pushed and backend notified", + ] + if log_status: + summary.append(log_status) + summary.append("") + summary.append(f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/swe/{problem_id}')}") + click.echo("\n".join(summary)) + @swe.command("reset") @click.argument("problem_id") @@ -1401,26 +1413,30 @@ def cr_submit(problem_id: str, user_id: str | None, review_file: str | None, except APIError as e: _error(str(e)) - click.echo( - f"\nSuccessfully submitted code review for {problem_id}\n" - f"\n" - f" Status: {result.get('status', 'COMPLETED')}\n" - f"\n" - f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/cr/{problem_id}')}" - ) - # CR clones a read-only PR, so logs upload to the user's own submission repo. # The CR flow makes no commit, so flush=True materialises the checkpoint. + # Resolve before the banner so any consent prompt comes first. creds = load_credentials().get(problem_id) workspace = _resolve_workspace(config, workspace_dir or (creds or {}).get("workspace_dir")) problem_dir = workspace / problem_id + log_status = "" if problem_dir.exists(): - _maybe_upload_logs( + log_status = _maybe_upload_logs( problem_dir, benchmark="cr", problem_id=problem_id, user_id=uid, key_path=_safe_key_path(config, creds), config=config, creds=creds, upload_flag=upload_logs, logs_remote_override=logs_remote, flush=True, ) + summary = [ + f"\nSuccessfully submitted code review for {problem_id}\n", + f" Status: {result.get('status', 'COMPLETED')}", + ] + if log_status: + summary.append(log_status) + summary.append("") + summary.append(f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/cr/{problem_id}')}") + click.echo("\n".join(summary)) + # ── mle group ──────────────────────────────────────────────────────────────── @@ -1562,23 +1578,28 @@ def mle_submit(competition_id: str, csv_path: str, user_id: str | None, score_msg = result.get("message", "Submission received for scoring.") score = result.get("score") - click.echo( - f"\nSuccessfully submitted prediction for {competition_id}\n" - f"\n" - f" CSV: {csv_src.name}\n" - f" Status: {score_msg}\n" - ) - if score is not None: - click.echo(f" Score: {score}\n") - click.echo(f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/mle/{competition_id}')}") - - # Push the user's solution code to a branch and (with - # consent) the AI session logs, both to the user's own repo. + # Push the user's solution code + (with consent) the AI session logs to the + # user's own repo, each on a unique per-submission branch. Resolve before + # the banner so any consent prompt comes first. workspace = _resolve_workspace(config, workspace_dir) competition_dir = workspace / competition_id + artifacts_status = "" if competition_dir.exists(): - _maybe_submit_mle_artifacts( + artifacts_status = _maybe_submit_mle_artifacts( competition_dir, competition_id=competition_id, user_id=uid, config=config, key_path=_safe_key_path(config), upload_flag=upload_logs, logs_remote_override=logs_remote, ) + + summary = [ + f"\nSuccessfully submitted prediction for {competition_id}\n", + f" CSV: {csv_src.name}", + f" Status: {score_msg}", + ] + if score is not None: + summary.append(f" Score: {score}") + if artifacts_status: + summary.append(artifacts_status) + summary.append("") + summary.append(f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/mle/{competition_id}')}") + click.echo("\n".join(summary)) diff --git a/entire_logging.py b/entire_logging.py index 88f119c..edf75e5 100644 --- a/entire_logging.py +++ b/entire_logging.py @@ -24,9 +24,11 @@ import json import os +import re import shutil import subprocess import sys +import uuid from datetime import datetime, timezone from pathlib import Path @@ -217,16 +219,18 @@ def commit_workspace(repo_dir: Path, message: str) -> bool: def push_branch(repo_dir: Path, *, remote_url: str, dest_branch: str, key_path: Path | None = None) -> tuple[bool, str]: - """Push the current HEAD to ``dest_branch`` on ``remote_url`` (force). + """Push the current HEAD to a fresh ``dest_branch`` on ``remote_url``. - Used for MLE: pushes the user's solution code to a competition-named branch - in their own repo. Returns (ok, branch_or_error). Never raises. + Used for MLE to push the user's solution code. The caller passes a unique, + per-submission branch name (see :func:`new_stamp`), so we do NOT force-push: + each submission lands on its own branch and previous ones are preserved. + Returns (ok, branch_or_error). Never raises. """ try: - safe = dest_branch.replace(" ", "_") + safe = _safe_ref(dest_branch) refspec = f"HEAD:refs/heads/{safe}" res = git_ops.run_git_command( - ["git", "push", "--force", remote_url, refspec], str(repo_dir), key_path, + ["git", "push", remote_url, refspec], str(repo_dir), key_path, ) if res.returncode != 0: return False, (res.stderr or "git push failed").strip() @@ -235,21 +239,40 @@ def push_branch(repo_dir: Path, *, remote_url: str, dest_branch: str, return False, str(e) -def logs_branch(benchmark: str, problem_id: str) -> str: - """Remote branch name that identifies which problem a log belongs to.""" - safe = problem_id.replace(" ", "_") - return f"aicodinggym-logs/{benchmark}/{safe}" +def new_stamp() -> str: + """A unique, sortable per-submission id: ``-``. + + Used to give every upload its own branch so re-submissions — and submissions + of the same problem from different directories/machines — never overwrite + each other's logs. + """ + return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + "-" + uuid.uuid4().hex[:8] + + +def logs_branch(benchmark: str, problem_id: str, suffix: str | None = None) -> str: + """Remote branch identifying which problem (and submission) a log belongs to. + + ``aicodinggym-logs//`` — and when ``suffix`` is given + (the per-submission stamp), ``...//`` so each upload is a + distinct, non-overwriting branch. + """ + parts = ["aicodinggym-logs", benchmark, _safe_ref(problem_id)] + if suffix: + parts.append(_safe_ref(suffix)) + return "/".join(parts) def upload(repo_dir: Path, *, remote_url: str, benchmark: str, problem_id: str, user_id: str, key_path: Path | None = None, tool: str | None = None, - cli_version: str | None = None) -> tuple[bool, str]: + cli_version: str | None = None, + submission_stamp: str | None = None) -> tuple[bool, str]: """Push the captured session branch to ``remote_url`` for research. - Pushes ``entire/checkpoints/v1`` to a per-problem branch - (:func:`logs_branch`) on the given writable remote, after injecting an + Pushes ``entire/checkpoints/v1`` to a unique per-submission branch + (:func:`logs_branch` with a stamp), after injecting an ``aicodinggym-meta.json`` metadata file at the tip so each upload is - self-describing. Returns (ok, branch_or_error). Never raises. + self-describing. The unique branch means previous logs are never + overwritten. Returns (ok, branch_or_error). Never raises. """ repo_dir = Path(repo_dir) try: @@ -258,12 +281,14 @@ def upload(repo_dir: Path, *, remote_url: str, benchmark: str, problem_id: str, return False, "no captured sessions to upload" parent = tip.stdout.strip() + stamp = submission_stamp or new_stamp() meta = { "problem_id": problem_id, "benchmark": benchmark, "user_id": user_id, "tool": tool, "cli_version": cli_version, + "submission_id": stamp, "captured_by": "aicodinggym-cli", "uploaded_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), } @@ -272,12 +297,11 @@ def upload(repo_dir: Path, *, remote_url: str, benchmark: str, problem_id: str, # the raw tip if plumbing fails — the branch name still identifies it. push_sha = _commit_with_metadata(repo_dir, parent, meta) or parent - dest = logs_branch(benchmark, problem_id) + dest = logs_branch(benchmark, problem_id, stamp) refspec = f"{push_sha}:refs/heads/{dest}" - # Force: re-submitting the same problem replaces its log branch. Entire's - # checkpoint branch accumulates history, so the latest tip is lossless. + # No force: a fresh per-submission branch, so nothing is overwritten. push = git_ops.run_git_command( - ["git", "push", "--force", remote_url, refspec], str(repo_dir), key_path, + ["git", "push", remote_url, refspec], str(repo_dir), key_path, ) if push.returncode != 0: return False, (push.stderr or "git push failed").strip() @@ -289,6 +313,13 @@ def upload(repo_dir: Path, *, remote_url: str, benchmark: str, problem_id: str, # ── internals ──────────────────────────────────────────────────────────────── +def _safe_ref(name: str) -> str: + """Sanitise a string into a valid git ref path component.""" + safe = re.sub(r"[ \t~^:?*\[\]\\\x00-\x1f\x7f]", "_", name) + safe = safe.replace("..", "_").strip("/") + return safe or "_" + + def _entire(args: list[str], cwd: Path) -> subprocess.CompletedProcess: """Run the ``entire`` binary, capturing output. Non-interactive.""" env = os.environ.copy() @@ -319,11 +350,27 @@ def ensure_git_repo(repo_dir: Path) -> tuple[bool, str]: if init.returncode != 0: return False, (init.stderr or "git init failed").strip() - # Keep heavy/derived files out of the repo; we never commit the working - # tree anyway, but this keeps Entire's "files touched" view sane. + # Keep heavy/derived ML artifacts out of the repo so the code branch we push + # on submit stays small. The dataset and common model/checkpoint/cache files + # are excluded; the user's notebooks/scripts and submission CSV are kept. gitignore = repo_dir / ".gitignore" existing = gitignore.read_text(encoding="utf-8") if gitignore.exists() else "" - wanted = ["data/", "*.zip", ".entire/"] + wanted = [ + # dataset & archives + "data/", "*.zip", "*.tar", "*.tar.gz", "*.tgz", "*.7z", + # python / tooling caches + "__pycache__/", "*.py[cod]", ".ipynb_checkpoints/", + ".venv/", "venv/", "env/", ".env", + ".entire/", + # model weights / serialized artifacts + "*.pkl", "*.pickle", "*.joblib", "*.npy", "*.npz", + "*.h5", "*.hdf5", "*.pt", "*.pth", "*.ckpt", "*.onnx", "*.pb", "*.bin", "*.safetensors", + # experiment-tracking / output dirs + "wandb/", "mlruns/", "lightning_logs/", "runs/", + "checkpoints/", "outputs/", "artifacts/", "models/", + # logs + "*.log", + ] missing = [w for w in wanted if w not in existing.splitlines()] if missing: block = ("" if existing.endswith("\n") or not existing else "\n") + \ diff --git a/pyproject.toml b/pyproject.toml index 1d1f7d5..865e0cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,9 @@ dependencies = [ "requests>=2.31.0", ] +[project.optional-dependencies] +dev = ["pytest>=7.0"] + [project.urls] Homepage = "https://aicodinggym.com" @@ -37,3 +40,6 @@ packages = ["aicodinggym"] [tool.setuptools.package-dir] aicodinggym = "." + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/tests/test_cli_logging.py b/tests/test_cli_logging.py new file mode 100644 index 0000000..c079104 --- /dev/null +++ b/tests/test_cli_logging.py @@ -0,0 +1,86 @@ +"""Tests for the CLI-level logging helpers: remote resolution and consent.""" + +import pytest + +from aicodinggym import cli + + +# ── _resolve_logs_remote: one repo for all, SWE-only clone fallback ────────── + +def test_swe_falls_back_to_problem_repo_when_no_submission_repo(): + assert cli._resolve_logs_remote("swe", {"repo_url": "git@h:u/r.git"}, {}, None) \ + == "git@h:u/r.git" + + +def test_submission_repo_takes_precedence_for_all_benchmarks(): + cfg = {"submission_repo_url": "git@h:u/own.git"} + assert cli._resolve_logs_remote("swe", {"repo_url": "git@h:u/prob.git"}, cfg, None) \ + == "git@h:u/own.git" + assert cli._resolve_logs_remote("cr", None, cfg, None) == "git@h:u/own.git" + assert cli._resolve_logs_remote("mle", None, cfg, None) == "git@h:u/own.git" + + +def test_cr_never_uses_the_readonly_clone(): + # CR creds carry the read-only PR repo_url; it must NOT become the target. + assert cli._resolve_logs_remote("cr", {"repo_url": "git@h:upstream/pr.git"}, {}, None) \ + is None + + +def test_mle_without_submission_repo_is_none(): + assert cli._resolve_logs_remote("mle", None, {}, None) is None + + +def test_override_wins(): + cfg = {"submission_repo_url": "git@h:u/own.git"} + assert cli._resolve_logs_remote("swe", {"repo_url": "x"}, cfg, "git@h:u/override.git") \ + == "git@h:u/override.git" + + +def test_env_var_used_before_config(monkeypatch): + monkeypatch.setenv("AICODINGGYM_LOGS_REMOTE", "git@h:u/env.git") + assert cli._resolve_logs_remote("swe", {"repo_url": "x"}, {}, None) == "git@h:u/env.git" + + +# ── _resolve_log_upload_consent: flag > stored > prompt; non-tty is safe ────── + +@pytest.fixture(autouse=True) +def _isolate_consent(monkeypatch): + """Keep consent in memory so the prompt logic is tested without touching disk.""" + store = {"value": None} + monkeypatch.setattr(cli, "get_logging_consent", lambda: store["value"]) + monkeypatch.setattr(cli, "set_logging_consent", lambda v: store.__setitem__("value", v)) + return store + + +def test_explicit_flag_is_persisted_and_returned(_isolate_consent): + assert cli._resolve_log_upload_consent(True) is True + assert _isolate_consent["value"] is True + assert cli._resolve_log_upload_consent(False) is False + assert _isolate_consent["value"] is False + + +def test_stored_consent_is_used_without_prompting(_isolate_consent, monkeypatch): + _isolate_consent["value"] = True + # If this tried to prompt, confirm() would raise in a non-tty; ensure it doesn't. + monkeypatch.setattr("sys.stdin.isatty", lambda: False) + assert cli._resolve_log_upload_consent(None) is True + + +def test_non_tty_without_record_defaults_to_no_upload(_isolate_consent, monkeypatch): + monkeypatch.setattr("sys.stdin.isatty", lambda: False) + assert cli._resolve_log_upload_consent(None) is False + assert _isolate_consent["value"] is None # nothing recorded + + +def test_interactive_prompt_is_recorded(_isolate_consent, monkeypatch): + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr(cli.click, "confirm", lambda *a, **k: True) + assert cli._resolve_log_upload_consent(None) is True + assert _isolate_consent["value"] is True + + +# ── _configure_hint ────────────────────────────────────────────────────────── + +def test_configure_hint_includes_user_id(): + assert cli._configure_hint("alice") == "aicodinggym configure --user-id alice" + assert cli._configure_hint(None) == "aicodinggym configure" diff --git a/tests/test_config_consent.py b/tests/test_config_consent.py new file mode 100644 index 0000000..1711e5c --- /dev/null +++ b/tests/test_config_consent.py @@ -0,0 +1,55 @@ +"""Tests for logging-consent storage and the submission-repo allowlist field.""" + +import json + +import pytest + +from aicodinggym import config + + +@pytest.fixture +def isolated_config(tmp_path, monkeypatch): + """Point config at a throwaway dir so we never touch the real ~/.aicodinggym.""" + cfg_dir = tmp_path / ".aicodinggym" + monkeypatch.setattr(config, "CONFIG_DIR", cfg_dir) + monkeypatch.setattr(config, "CONFIG_PATH", cfg_dir / "config.json") + monkeypatch.setattr(config, "CREDENTIALS_PATH", cfg_dir / "credentials.json") + return cfg_dir + + +def test_consent_roundtrip(isolated_config): + assert config.get_logging_consent() is None # never asked + config.set_logging_consent(True) + assert config.get_logging_consent() is True + config.set_logging_consent(False) + assert config.get_logging_consent() is False + + +def test_consent_persists_as_string_in_allowlist(isolated_config): + config.set_logging_consent(True) + raw = json.loads(config.CONFIG_PATH.read_text()) + assert raw["entire_logging_consent"] == "granted" + + +def test_submission_repo_url_survives_save(isolated_config): + cfg = config.load_config() + cfg["user_id"] = "alice" + cfg["submission_repo_url"] = "git@aicodinggym.com:alice/sub.git" + config.save_config(cfg) + + reloaded = config.load_config() + assert reloaded["submission_repo_url"] == "git@aicodinggym.com:alice/sub.git" + + +def test_consent_and_submission_repo_coexist(isolated_config): + cfg = config.load_config() + cfg["submission_repo_url"] = "git@h:u/r.git" + config.save_config(cfg) + config.set_logging_consent(True) # separate load+save must not drop the URL + assert config.load_config().get("submission_repo_url") == "git@h:u/r.git" + assert config.get_logging_consent() is True + + +def test_unknown_fields_are_filtered(isolated_config): + config.save_config({"user_id": "a", "bogus": "x"}) + assert "bogus" not in config.load_config() diff --git a/tests/test_entire_logging.py b/tests/test_entire_logging.py new file mode 100644 index 0000000..dc14c9b --- /dev/null +++ b/tests/test_entire_logging.py @@ -0,0 +1,196 @@ +"""Tests for the Entire logging integration (entire_logging.py). + +These exercise the git-level behaviour directly and do NOT require the `entire` +binary: we simulate Entire's `entire/checkpoints/v1` branch by hand, then verify +metadata injection, unique non-overwriting branches, and the MLE code push. +""" + +import json +import subprocess + +import pytest + +from aicodinggym import entire_logging as el + + +def _git(args, cwd): + return subprocess.run( + ["git", "-c", "user.name=Test", "-c", "user.email=t@t", *args], + cwd=str(cwd), capture_output=True, text=True, + ) + + +def _init_repo_with_session(repo): + """A git repo whose entire/checkpoints/v1 branch holds a fake session.""" + _git(["init", "-q", "-b", "main", "."], repo) + (repo / "code.py").write_text("print('hi')\n") + _git(["add", "."], repo) + _git(["commit", "-q", "-m", "work"], repo) + _git(["checkout", "-q", "-b", el.CHECKPOINT_BRANCH], repo) + (repo / "session.json").write_text('{"prompt": "fix the bug"}') + _git(["add", "."], repo) + _git(["commit", "-q", "-m", "checkpoint"], repo) + _git(["checkout", "-q", "main"], repo) + + +def _bare_remote(tmp_path, name="remote.git"): + bare = tmp_path / name + _git(["init", "--bare", "-q", str(bare)], tmp_path) + return bare + + +def _remote_branches(bare): + out = _git(["for-each-ref", "--format=%(refname:short)", "refs/heads"], bare) + return set(out.stdout.split()) + + +# ── pure helpers ───────────────────────────────────────────────────────────── + +def test_safe_ref_sanitizes_illegal_characters(): + assert el._safe_ref("spaceship titanic") == "spaceship_titanic" + assert el._safe_ref("a..b") == "a_b" + assert el._safe_ref("we:ird~name^") == "we_ird_name_" + assert el._safe_ref("/leading/") == "leading" + + +def test_logs_branch_with_and_without_suffix(): + assert el.logs_branch("swe", "django__django-10097") == \ + "aicodinggym-logs/swe/django__django-10097" + assert el.logs_branch("mle", "spaceship-titanic", "20260603T000000Z-abc123") == \ + "aicodinggym-logs/mle/spaceship-titanic/20260603T000000Z-abc123" + + +def test_new_stamp_is_unique(): + stamps = {el.new_stamp() for _ in range(50)} + assert len(stamps) == 50 + + +# ── ensure_git_repo / commit_workspace ─────────────────────────────────────── + +def test_ensure_git_repo_creates_repo_and_ignores(tmp_path): + ws = tmp_path / "comp" + ws.mkdir() + ok, _ = el.ensure_git_repo(ws) + assert ok + assert (ws / ".git").is_dir() + ignore = (ws / ".gitignore").read_text() + for pat in ("data/", "*.pkl", "*.pt", "__pycache__/", "checkpoints/", "*.zip"): + assert pat in ignore + # has an initial commit (HEAD resolves) + assert _git(["rev-parse", "HEAD"], ws).returncode == 0 + + +def test_ensure_git_repo_is_noop_when_already_a_repo(tmp_path): + ws = tmp_path / "comp" + ws.mkdir() + _git(["init", "-q", "."], ws) + ok, msg = el.ensure_git_repo(ws) + assert ok and "already" in msg + + +def test_commit_workspace_excludes_heavy_artifacts(tmp_path): + ws = tmp_path / "comp" + ws.mkdir() + el.ensure_git_repo(ws) + (ws / "data").mkdir() + (ws / "data" / "train.csv").write_text("x,y\n1,2\n") # dataset -> ignored + (ws / "model.pkl").write_bytes(b"\x00" * 100) # weights -> ignored + (ws / "solution.py").write_text("print('model')\n") # code -> kept + (ws / "submission.csv").write_text("id,pred\n1,0\n") # prediction -> kept + + assert el.commit_workspace(ws, "MLE submission: comp") is True + tracked = _git(["ls-tree", "-r", "--name-only", "HEAD"], ws).stdout.split() + assert "solution.py" in tracked + assert "submission.csv" in tracked + assert "model.pkl" not in tracked + assert not any(f.startswith("data/") for f in tracked) + + +# ── push_branch (MLE code) ─────────────────────────────────────────────────── + +def test_push_branch_pushes_to_unique_branch_without_overwrite(tmp_path): + ws = tmp_path / "comp" + ws.mkdir() + el.ensure_git_repo(ws) + (ws / "solution.py").write_text("v1\n") + el.commit_workspace(ws, "submit 1") + bare = _bare_remote(tmp_path) + + ok1, b1 = el.push_branch(ws, remote_url=str(bare), + dest_branch="spaceship-titanic/stamp1", key_path=None) + (ws / "solution.py").write_text("v2\n") + el.commit_workspace(ws, "submit 2") + ok2, b2 = el.push_branch(ws, remote_url=str(bare), + dest_branch="spaceship-titanic/stamp2", key_path=None) + + assert ok1 and ok2 and b1 != b2 + # Both submissions are preserved on the remote — neither overwrote the other. + assert {"spaceship-titanic/stamp1", "spaceship-titanic/stamp2"} <= _remote_branches(bare) + + +# ── upload (AI logs + metadata) ────────────────────────────────────────────── + +def test_upload_pushes_unique_branch_with_metadata(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + _init_repo_with_session(repo) + bare = _bare_remote(tmp_path) + + ok, branch = el.upload( + repo, remote_url=str(bare), benchmark="swe", + problem_id="django__django-10097", user_id="alice", + key_path=None, tool="claude-code", cli_version="0.6.0", + submission_stamp="20260603T000000Z-aaaa1111", + ) + assert ok + assert branch == "aicodinggym-logs/swe/django__django-10097/20260603T000000Z-aaaa1111" + + files = _git(["ls-tree", "-r", "--name-only", branch], bare).stdout.split() + assert el.METADATA_FILENAME in files + assert "session.json" in files # original captured session carried over + + meta = json.loads(_git(["show", f"{branch}:{el.METADATA_FILENAME}"], bare).stdout) + assert meta["problem_id"] == "django__django-10097" + assert meta["benchmark"] == "swe" + assert meta["user_id"] == "alice" + assert meta["submission_id"] == "20260603T000000Z-aaaa1111" + + +def test_upload_twice_does_not_overwrite_previous_logs(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + _init_repo_with_session(repo) + bare = _bare_remote(tmp_path) + + _, b1 = el.upload(repo, remote_url=str(bare), benchmark="swe", + problem_id="p1", user_id="u", submission_stamp="s1") + _, b2 = el.upload(repo, remote_url=str(bare), benchmark="swe", + problem_id="p1", user_id="u", submission_stamp="s2") + branches = _remote_branches(bare) + assert b1 in branches and b2 in branches and b1 != b2 + + +def test_upload_returns_false_without_sessions(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + _git(["init", "-q", "-b", "main", "."], repo) + (repo / "f").write_text("x") + _git(["add", "."], repo) + _git(["commit", "-q", "-m", "c"], repo) # no checkpoint branch + bare = _bare_remote(tmp_path) + + ok, msg = el.upload(repo, remote_url=str(bare), benchmark="swe", + problem_id="p", user_id="u") + assert not ok and "no captured sessions" in msg + + +def test_has_sessions_reflects_checkpoint_branch(tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + _git(["init", "-q", "-b", "main", "."], repo) + (repo / "f").write_text("x") + _git(["add", "."], repo) + _git(["commit", "-q", "-m", "c"], repo) + assert el.has_sessions(repo) is False + _git(["branch", el.CHECKPOINT_BRANCH], repo) + assert el.has_sessions(repo) is True From 7b98781fd77ef1adffffe08cfde2861b983cca6b Mon Sep 17 00:00:00 2001 From: qyli00 Date: Wed, 3 Jun 2026 14:33:17 -0700 Subject: [PATCH 4/7] fix(logging): guard success banner from logging errors; make CLI tests hermetic - Reordering logging before the success banner meant an unexpected error in the logging path could suppress the "Successfully submitted" summary. Wrap the pre-banner logging call in _logging_status() so it degrades to a warning and the banner always prints. - tests/test_cli_logging.py: autouse fixture clears ambient AICODINGGYM_LOGS_REMOTE (resolution reads it first) so the resolver tests are hermetic; add tests for _logging_status. Co-Authored-By: Claude Opus 4.8 (1M context) --- cli.py | 25 +++++++++++++++++++------ tests/test_cli_logging.py | 18 ++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/cli.py b/cli.py index c0b9cbf..96cc19e 100644 --- a/cli.py +++ b/cli.py @@ -411,6 +411,19 @@ def _resolve_logs_remote(benchmark: str, creds: dict | None, return None +def _logging_status(thunk) -> str: + """Run a logging step before the success banner, never suppressing it. + + The submission already succeeded by the time logging runs, so any unexpected + error here must not hide the banner — degrade to a warning + empty status. + """ + try: + return thunk() + except Exception as e: # noqa: BLE001 + _warn(f"AI session logging skipped due to an error: {e}") + return "" + + def _maybe_upload_logs(problem_dir: Path, *, benchmark: str, problem_id: str, user_id: str, key_path: Path | None, config: dict, creds: dict | None, upload_flag: bool | None, @@ -913,11 +926,11 @@ def swe_submit(problem_id: str, user_id: str | None, message: str | None, # Resolve logging (incl. any consent prompt) before the summary so the # prompt never interrupts the success banner. The solution commit above # already triggered Entire's checkpoint, so no flush is needed. - log_status = _maybe_upload_logs( + log_status = _logging_status(lambda: _maybe_upload_logs( problem_dir, benchmark="swe", problem_id=problem_id, user_id=uid, key_path=key_path, config=config, creds=creds, upload_flag=upload_logs, logs_remote_override=logs_remote, - ) + )) summary = [ f"\nSuccessfully submitted solution for {problem_id}\n", @@ -1421,11 +1434,11 @@ def cr_submit(problem_id: str, user_id: str | None, review_file: str | None, problem_dir = workspace / problem_id log_status = "" if problem_dir.exists(): - log_status = _maybe_upload_logs( + log_status = _logging_status(lambda: _maybe_upload_logs( problem_dir, benchmark="cr", problem_id=problem_id, user_id=uid, key_path=_safe_key_path(config, creds), config=config, creds=creds, upload_flag=upload_logs, logs_remote_override=logs_remote, flush=True, - ) + )) summary = [ f"\nSuccessfully submitted code review for {problem_id}\n", @@ -1585,11 +1598,11 @@ def mle_submit(competition_id: str, csv_path: str, user_id: str | None, competition_dir = workspace / competition_id artifacts_status = "" if competition_dir.exists(): - artifacts_status = _maybe_submit_mle_artifacts( + artifacts_status = _logging_status(lambda: _maybe_submit_mle_artifacts( competition_dir, competition_id=competition_id, user_id=uid, config=config, key_path=_safe_key_path(config), upload_flag=upload_logs, logs_remote_override=logs_remote, - ) + )) summary = [ f"\nSuccessfully submitted prediction for {competition_id}\n", diff --git a/tests/test_cli_logging.py b/tests/test_cli_logging.py index c079104..17638fc 100644 --- a/tests/test_cli_logging.py +++ b/tests/test_cli_logging.py @@ -5,6 +5,12 @@ from aicodinggym import cli +@pytest.fixture(autouse=True) +def _clear_logs_remote_env(monkeypatch): + """Resolution reads AICODINGGYM_LOGS_REMOTE first; keep tests hermetic.""" + monkeypatch.delenv("AICODINGGYM_LOGS_REMOTE", raising=False) + + # ── _resolve_logs_remote: one repo for all, SWE-only clone fallback ────────── def test_swe_falls_back_to_problem_repo_when_no_submission_repo(): @@ -84,3 +90,15 @@ def test_interactive_prompt_is_recorded(_isolate_consent, monkeypatch): def test_configure_hint_includes_user_id(): assert cli._configure_hint("alice") == "aicodinggym configure --user-id alice" assert cli._configure_hint(None) == "aicodinggym configure" + + +# ── _logging_status: must never let logging suppress the success banner ────── + +def test_logging_status_swallows_errors(): + def boom(): + raise RuntimeError("kaboom") + assert cli._logging_status(boom) == "" + + +def test_logging_status_passes_value_through(): + assert cli._logging_status(lambda: " Logs: uploaded") == " Logs: uploaded" From fe3b9dde21465425d0dd8ac960f468d5ee73e356 Mon Sep 17 00:00:00 2001 From: qyli00 Date: Wed, 3 Jun 2026 14:51:48 -0700 Subject: [PATCH 5/7] feat(mle): progress bar on dataset download; stream Entire installer output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - mle download: api.mlebench_download_open() exposes Content-Length + a chunk iterator so the CLI drives a click.progressbar (falls back to a running MB counter when the server omits Content-Length). Replaces the silent mlebench_download_info(). - configure: the Entire auto-installer no longer captures output — it streams, and we print "Installing Entire (downloading...)" first, so it no longer looks frozen during the download (the likely cause of the "stuck on configure" report). Co-Authored-By: Claude Opus 4.8 (1M context) --- api.py | 14 +++++++++----- cli.py | 20 ++++++++++++++++++-- entire_logging.py | 8 +++----- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/api.py b/api.py index 08c6e85..00263ea 100644 --- a/api.py +++ b/api.py @@ -102,12 +102,16 @@ def cr_submit_review(user_id: str, problem_id: str, review: str) -> dict: }) -def mlebench_download_info(user_id: str, competition_id: str, dest_path: str) -> None: - """Download dataset for an MLE-bench competition directly to dest_path.""" +def mlebench_download_open(competition_id: str, chunk_size: int = 1 << 16): + """Open a streaming download for an MLE-bench dataset. + + Returns (total_bytes, chunk_iterator). ``total_bytes`` is the server's + Content-Length, or 0 if it isn't advertised. Lets the caller drive a + progress bar while writing chunks to disk. + """ resp = _get(f"competitions/{competition_id}/download", stream=True) - with open(dest_path, "wb") as f: - for chunk in resp.iter_content(chunk_size=8192): - f.write(chunk) + total = int(resp.headers.get("Content-Length") or 0) + return total, resp.iter_content(chunk_size=chunk_size) def mlebench_download_file(url: str, dest_path: str, timeout: int = 300) -> None: diff --git a/cli.py b/cli.py index 96cc19e..e90c7bb 100644 --- a/cli.py +++ b/cli.py @@ -43,7 +43,7 @@ fetch_pr as api_fetch_pr, fetch_problem as api_fetch_problem, mlebench_download_file, - mlebench_download_info, + mlebench_download_open, mlebench_submit_csv, submit_notification, ) @@ -548,6 +548,7 @@ def _configure_logging(upload_logs_flag: bool | None) -> None: click.echo(f" Install later with:\n {entire_logging.INSTALL_COMMAND}") return if click.confirm("Install the Entire CLI now?", default=True): + click.echo("Installing Entire (downloading a binary; this may take a minute)...") ok, msg = entire_logging.install() if ok: click.echo(f" Entire: {msg}") @@ -1503,7 +1504,22 @@ def mle_download(competition_id: str, user_id: str | None, workspace_dir: str | try: click.echo(f"Downloading dataset for '{competition_id}'...") - mlebench_download_info(uid, competition_id, str(dest_path)) + total, chunks = mlebench_download_open(competition_id) + with open(dest_path, "wb") as f: + if total: + with click.progressbar(length=total, label=" Downloading", + show_percent=True) as bar: + for chunk in chunks: + f.write(chunk) + bar.update(len(chunk)) + else: + # Server didn't advertise a size — show a running byte counter. + downloaded = 0 + for chunk in chunks: + f.write(chunk) + downloaded += len(chunk) + click.echo(f"\r {downloaded / 1_048_576:.1f} MB downloaded", nl=False) + click.echo() except APIError as e: _error(str(e)) diff --git a/entire_logging.py b/entire_logging.py index edf75e5..43e4ba3 100644 --- a/entire_logging.py +++ b/entire_logging.py @@ -101,12 +101,10 @@ def install() -> tuple[bool, str]: # Scoop lives behind PowerShell; auto-driving it reliably is brittle, # so we defer to the documented manual command. return False, "automatic install is not supported on Windows" - res = subprocess.run( - ["bash", "-c", INSTALL_COMMAND], - capture_output=True, text=True, timeout=300, - ) + # Stream the installer's output so the download doesn't look frozen. + res = subprocess.run(["bash", "-c", INSTALL_COMMAND], timeout=300) if res.returncode != 0: - return False, (res.stderr or res.stdout or "installer exited non-zero").strip() + return False, "installer exited non-zero (see output above)" # shutil.which caches nothing, but the new binary may have landed in a # dir not yet on this process's PATH (e.g. ~/.local/bin). if is_available(): From de82bcf11d2ba958e2607f881329e34916c0eb48 Mon Sep 17 00:00:00 2001 From: qyli00 Date: Wed, 3 Jun 2026 15:49:23 -0700 Subject: [PATCH 6/7] feat(cli): guide users to launch their agent inside the problem dir; preserve workspace on re-configure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - fetch/download now print an instruction (when Entire capture is active) to start the AI agent INSIDE the fetched directory, because Claude Code/Codex load capture hooks from the launch dir and fix them for the session — cd-ing in later does not activate them, so the session wouldn't be captured. - configure: only change workspace_dir when --workspace-dir is explicitly given; on re-configure preserve the existing workspace instead of silently resetting it to the current directory (fall back to cwd only on first-time setup). Co-Authored-By: Claude Opus 4.8 (1M context) --- cli.py | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/cli.py b/cli.py index e90c7bb..c317150 100644 --- a/cli.py +++ b/cli.py @@ -332,13 +332,13 @@ def _configure_hint(user_id: str | None) -> str: def _setup_logging(problem_dir: Path, *, init_git: bool = False, - user_id: str | None = None) -> None: + user_id: str | None = None) -> bool: """Best-effort: install Entire hooks so the session is captured locally. Capture is local-only; nothing is uploaded until the user consents at submit. If Entire isn't installed, point the user at ``configure`` (which offers to install it) rather than silently skipping — unless they've already - opted out of logging. + opted out of logging. Returns True if AI-session capture is now active. """ if not entire_logging.is_available(): if get_logging_consent() is not False: # not explicitly opted out @@ -346,11 +346,28 @@ def _setup_logging(problem_dir: Path, *, init_git: bool = False, " Logging: Not set up — the 'entire' CLI isn't installed.\n" f" Run '{_configure_hint(user_id)}' to enable AI workflow logging." ) - return + return False ok, msg = entire_logging.setup(problem_dir, init_git=init_git) if ok: click.echo(f" Logging: {msg} (uploaded only with your consent on submit)") + return True # On setup failure (Entire present but enable errored) we stay quiet. + return False + + +def _launch_instruction(problem_dir: Path) -> str: + """Reminder that AI-session capture only works for an agent launched here. + + Claude Code (and Codex etc.) load their capture hooks from the directory + they're started in, fixed for the whole session — cd-ing in later does not + activate them. So the agent must be launched inside the problem folder. + """ + return ( + f"Please start your agent inside {problem_dir}:\n" + f" cd {problem_dir}\n" + " claude # or codex / your AI agent\n" + "(Capture only works for an agent launched here — not one you cd into later.)" + ) def _safe_key_path(config: dict, creds: dict | None = None) -> Path | None: @@ -664,7 +681,14 @@ def configure(user_id: str, workspace_dir: str | None, upload_logs: bool | None) else: raise - resolved_workspace = str(Path(workspace_dir).resolve()) if workspace_dir else str(Path.cwd().resolve()) + # Only change the workspace when --workspace-dir is explicitly given. + # On re-configure, preserve the previously configured workspace instead + # of silently relocating it to wherever 'configure' happened to run; + # fall back to the current directory only on first-time setup. + if workspace_dir: + resolved_workspace = str(Path(workspace_dir).resolve()) + else: + resolved_workspace = existing.get("workspace_dir") or str(Path.cwd().resolve()) config = { "user_id": user_id, @@ -804,7 +828,7 @@ def swe_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): save_config(config) _install_gym_environment(workspace / problem_id) - _setup_logging(workspace / problem_id, user_id=uid) + capture_active = _setup_logging(workspace / problem_id, user_id=uid) click.echo( f"\nSuccessfully fetched problem: {problem_id}\n" @@ -814,6 +838,8 @@ def swe_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): if server_msg: click.echo(f" Server: {server_msg}\n") click.echo("You can now start working on the solution!") + if capture_active: + click.echo("\n" + _launch_instruction(workspace / problem_id)) @swe.command("submit") @@ -1318,7 +1344,7 @@ def cr_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): _error(msg) _install_gym_environment(workspace / problem_id) - _setup_logging(workspace / problem_id, user_id=uid) + capture_active = _setup_logging(workspace / problem_id, user_id=uid) problem_dir = workspace / problem_id @@ -1356,6 +1382,8 @@ def cr_fetch(problem_id: str, user_id: str | None, workspace_dir: str | None): f" 2. Write your review in {review_path}\n" f" 3. Submit: aicodinggym cr submit {problem_id} -f review.md\n" ) + if capture_active: + click.echo("\n" + _launch_instruction(problem_dir)) @cr.command("submit") @@ -1527,13 +1555,15 @@ def mle_download(competition_id: str, user_id: str | None, workspace_dir: str | # MLE workspaces aren't git repos: init one so the solution code can be # pushed on submit and Entire can attach for session capture. entire_logging.ensure_git_repo(workspace / competition_id) - _setup_logging(workspace / competition_id, user_id=uid) + capture_active = _setup_logging(workspace / competition_id, user_id=uid) click.echo( f"\nDataset downloaded to: {dest_path}\n" f"\nNext step: train your model and submit predictions with:\n" f" aicodinggym mle submit {competition_id} -F your_predictions.csv" ) + if capture_active: + click.echo("\n" + _launch_instruction(workspace / competition_id)) @mle.command("submit") From 7c1ddc320ace61097a1e8d257b1f1417e4fd29d5 Mon Sep 17 00:00:00 2001 From: qyli00 Date: Mon, 8 Jun 2026 14:53:08 -0700 Subject: [PATCH 7/7] feat(mle): always push solution code on submit; add `mle restore` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `mle submit` gated the solution-code push and the AI session log upload behind the same consent check. A non-interactive submit (e.g. driven by an agent, where stdin is not a TTY) hit the privacy-safe "no consent on record" branch and returned early, silently pushing neither the code nor the logs to the user's submission repo. Decouple the two: - Always push the solution code to the user's own submission repo — it's their repo and the whole point of `submit`. Consent now gates only the de-identified AI-session log upload. - Resolve upload consent once at `configure` (where a human is reliably present) so later non-interactive submits have a recorded answer; keep the submit-time prompt as a fallback. - Always set Entire's `commit_linking=always` (in setup/commit/flush) so its per-commit "link this session?" prompt never fires — the user's single upload-consent question is the only one, and it governs only the push to the logs branch. - Name the code branch after the competition (force-pushed, latest wins) instead of `/`, so it has a predictable name to pull. - Add `aicodinggym mle restore COMPETITION_ID`: fetch the code branch back into the workspace, preserving the gitignored dataset and guarding uncommitted changes (override with --force). Tests: ensure_commit_linking, force-push overwrite, and restore_branch round-trip / dirty-guard / missing-branch. Co-Authored-By: Claude Opus 4.8 (1M context) --- cli.py | 183 +++++++++++++++++++++++++--------- entire_logging.py | 60 +++++++++-- git_ops.py | 50 ++++++++++ tests/test_entire_logging.py | 43 ++++++++ tests/test_git_ops_restore.py | 67 +++++++++++++ 5 files changed, 347 insertions(+), 56 deletions(-) create mode 100644 tests/test_git_ops_restore.py diff --git a/cli.py b/cli.py index c317150..2d9a711 100644 --- a/cli.py +++ b/cli.py @@ -62,6 +62,7 @@ clone_repo_cr, generate_ssh_key_pair, reset_to_setup_commit, + restore_branch, run_git_command, ) @@ -490,92 +491,117 @@ def _maybe_submit_mle_artifacts(workspace_repo: Path, *, competition_id: str, user_id: str, config: dict, key_path: Path | None, upload_flag: bool | None, logs_remote_override: str | None) -> str: - """MLE submit: push the solution code and (when captured) the AI session logs - to the user's own repo, gated by the single log-upload consent. Each goes to - a unique per-submission branch so prior submissions are never overwritten. - The CSV itself already went to the API. Returns a status block (or ""). + """MLE submit: always push the user's solution code to their submission repo, + and — only with consent — also upload the captured AI session logs. + + Pushing the code is the whole point of ``submit`` and it goes to the user's + *own* repo, so it is NOT gated on research-log consent; only the + de-identified AI session upload is. The code lands on a stable branch named + after the competition (overwritten each submit, so ``mle restore`` has a + predictable name to pull); logs land on a unique per-submission branch so + prior sessions are never overwritten. The CSV itself already went to the + API. Returns a status block (or ""). """ workspace_repo = Path(workspace_repo) if not (workspace_repo / ".git").exists(): return "" # workspace was never initialised (e.g. downloaded pre-upgrade) - # Commit the solution code locally. With Entire enabled this commit also - # materialises the AI session checkpoint, so no separate flush is needed. + # Commit the solution code locally. With Entire enabled, this commit also + # captures the AI session checkpoint (commit_linking is always on) — no + # separate flush needed. The session is uploaded only with consent, below. entire_logging.commit_workspace(workspace_repo, f"MLE submission: {competition_id}") - if not _resolve_log_upload_consent(upload_flag): - return " Logs: code + AI session captured locally; upload skipped." - remote = _resolve_logs_remote("mle", None, config, logs_remote_override) if not remote: return ( - " Logs: consented, but no repository is configured to push to.\n" + " Code: committed locally, but no submission repository is configured.\n" " Re-run 'aicodinggym configure' or pass --logs-remote URL." ) - # One stamp ties this submission's code branch and log branch together, and - # makes both unique so re-submissions never overwrite earlier ones. - stamp = entire_logging.new_stamp() lines: list[str] = [] + # 1) Always push the solution code — it's the user's own repo and the point + # of submitting; independent of research-log consent. code_ok, code_info = entire_logging.push_branch( - workspace_repo, remote_url=remote, - dest_branch=f"{competition_id}/{stamp}", key_path=key_path, + workspace_repo, remote_url=remote, dest_branch=competition_id, + key_path=key_path, force=True, ) if code_ok: lines.append(f" Code: pushed to branch '{code_info}'") + lines.append(f" Restore later with: aicodinggym mle restore {competition_id}") else: _warn(f"MLE code push failed: {code_info}") - if entire_logging.is_enabled(workspace_repo) and entire_logging.has_sessions(workspace_repo): + # 2) Upload the de-identified AI session logs only with consent. + if not entire_logging.is_available(): + lines.append(" Logs: AI session not captured ('entire' not installed).") + lines.append(f" Run '{_configure_hint(user_id)}' to enable logging next time.") + elif not (entire_logging.is_enabled(workspace_repo) + and entire_logging.has_sessions(workspace_repo)): + pass # no AI session was captured — stay quiet + elif not _resolve_log_upload_consent(upload_flag): + lines.append(" Logs: AI session captured locally; upload skipped.") + else: ok, info = entire_logging.upload( workspace_repo, remote_url=remote, benchmark="mle", problem_id=competition_id, user_id=user_id, key_path=key_path, cli_version=__version__, - submission_stamp=stamp, + submission_stamp=entire_logging.new_stamp(), ) if ok: lines.append(f" Logs: uploaded for research (branch {info})") else: _warn(f"AI session log upload failed: {info}") - elif not entire_logging.is_available(): - lines.append(" Logs: AI session not captured ('entire' not installed).") - lines.append(f" Run '{_configure_hint(user_id)}' to enable logging next time.") return "\n".join(lines) def _configure_logging(upload_logs_flag: bool | None) -> None: - """During configure: record consent (if given) and offer to install Entire.""" - if upload_logs_flag is not None: - set_logging_consent(upload_logs_flag) + """During configure: offer to install Entire, then record upload consent. + Consent is resolved here — where a human is reliably at the keyboard — + rather than only at submit. Later submits are often non-interactive (e.g. + driven by an AI agent, where stdin is not a TTY); recording the choice now + gives them an answer to act on instead of silently skipping the upload. + """ if entire_logging.is_available(): ver = entire_logging.version() click.echo(f" Logging: Entire detected ({ver or 'installed'})") - return - - click.echo( - "\nOptional — AI workflow logging:\n" - " AI Coding Gym can capture your AI coding sessions (via Entire,\n" - " https://entire.io) and, only with your consent at submit, upload them\n" - " for research. Uploaded data is de-identified/anonymized. Needs the\n" - " 'entire' CLI." - ) - if not sys.stdin.isatty(): - click.echo(f" Install later with:\n {entire_logging.INSTALL_COMMAND}") - return - if click.confirm("Install the Entire CLI now?", default=True): - click.echo("Installing Entire (downloading a binary; this may take a minute)...") - ok, msg = entire_logging.install() - if ok: - click.echo(f" Entire: {msg}") - else: - _warn( - f"Could not install Entire automatically: {msg}\n" - f" Install manually: {entire_logging.INSTALL_COMMAND}" - ) else: - click.echo(f" Skipped. Install later with:\n {entire_logging.INSTALL_COMMAND}") + click.echo( + "\nOptional — AI workflow logging:\n" + " AI Coding Gym can capture your AI coding sessions (via Entire,\n" + " https://entire.io) and, only with your consent, upload them for\n" + " research. Uploaded data is de-identified/anonymized. Needs the\n" + " 'entire' CLI." + ) + if not sys.stdin.isatty(): + click.echo(f" Install later with:\n {entire_logging.INSTALL_COMMAND}") + elif click.confirm("Install the Entire CLI now?", default=True): + click.echo("Installing Entire (downloading a binary; this may take a minute)...") + ok, msg = entire_logging.install() + if ok: + click.echo(f" Entire: {msg}") + else: + _warn( + f"Could not install Entire automatically: {msg}\n" + f" Install manually: {entire_logging.INSTALL_COMMAND}" + ) + else: + click.echo(f" Skipped. Install later with:\n {entire_logging.INSTALL_COMMAND}") + + # Record the standing upload consent now. An explicit flag wins; otherwise + # ask once (only with a TTY and no prior choice) so non-interactive submits + # have an answer. The submit-time prompt remains as a fallback for anyone who + # configured non-interactively. + if upload_logs_flag is not None: + set_logging_consent(upload_logs_flag) + elif get_logging_consent() is None and sys.stdin.isatty(): + granted = click.confirm("\n" + _CONSENT_PROMPT, default=False) + set_logging_consent(granted) + click.echo( + " Consent recorded — change it anytime with\n" + " 'aicodinggym configure --upload-logs' (or --no-upload-logs)." + ) # ── Top-level group ────────────────────────────────────────────────────────── @@ -1662,3 +1688,68 @@ def mle_submit(competition_id: str, csv_path: str, user_id: str | None, summary.append("") summary.append(f"View results at: {_hyperlink(f'https://aicodinggym.com/challenges/mle/{competition_id}')}") click.echo("\n".join(summary)) + + +@mle.command("restore") +@click.argument("competition_id") +@click.option("--user-id", default=None, help="Override configured user ID.") +@click.option( + "--workspace-dir", default=None, type=click.Path(), + help="Workspace directory. Overrides configured value.", +) +@click.option( + "--branch", default=None, + help="Branch to restore (defaults to the competition name). Pass a full " + "'/' branch to restore an older submission.", +) +@click.option( + "--remote", default=None, + help="Git URL to restore from (defaults to your submission repo).", +) +@click.option( + "--force", is_flag=True, default=False, + help="Overwrite uncommitted local changes in the workspace.", +) +def mle_restore(competition_id: str, user_id: str | None, workspace_dir: str | None, + branch: str | None, remote: str | None, force: bool): + """Restore a competition workspace from your submission repo. + + Pulls the solution code you pushed on a previous 'mle submit' back into your + workspace — e.g. to pick up on another machine or recover after a reset. Your + downloaded dataset and other gitignored files are left untouched. + + \b + EXAMPLE: + aicodinggym mle restore spaceship-titanic + aicodinggym mle restore spaceship-titanic --branch spaceship-titanic/20260608T213116Z-5252df01 + """ + config = load_config() + uid = _resolve_user_id(config, user_id) + + remote_url = _resolve_logs_remote("mle", None, config, remote) + if not remote_url: + _error( + "No submission repository is configured to restore from.\n" + "Run 'aicodinggym configure --user-id YOUR_USER_ID' first, or pass --remote URL." + ) + + workspace = _resolve_workspace(config, workspace_dir) + target = workspace / competition_id + branch_name = branch or competition_id + + click.echo(f"Restoring '{branch_name}' from your submission repo into {target}...") + ok, msg = restore_branch( + remote_url, branch_name, str(target), + key_path=_safe_key_path(config), force=force, + ) + if not ok: + _error(msg) + + # Re-arm local AI-session capture for continued work in the restored repo. + _setup_logging(target, user_id=uid) + + click.echo( + f"\n{msg}\n" + f"\nNext step: continue working, then submit with:\n" + f" aicodinggym mle submit {competition_id} -F your_predictions.csv" + ) diff --git a/entire_logging.py b/entire_logging.py index 43e4ba3..8238f92 100644 --- a/entire_logging.py +++ b/entire_logging.py @@ -169,11 +169,44 @@ def setup(repo_dir: Path, *, init_git: bool = False) -> tuple[bool, str]: if add.returncode == 0: enabled_agents.append(agent_name) + # Suppress Entire's per-commit "link this session?" prompt up front, so + # the user's own agent commits during the session aren't interrupted. + ensure_commit_linking(repo_dir) return True, "capturing AI sessions for: " + ", ".join(enabled_agents) except Exception as e: # noqa: BLE001 - logging must never break fetch return False, str(e) +def ensure_commit_linking(repo_dir: Path) -> None: + """Make Entire link every commit to the active AI session without asking. + + Entire's ``prepare-commit-msg`` hook otherwise interactively prompts ("link + this commit to session context?") on every commit unless ``commit_linking`` + is set in ``.entire/settings.local.json``. We always set it to ``"always"`` + so Entire never asks: capture stays local-only, and the single consent + question (ours) gates only whether the captured session is *uploaded* at + submit. Never raises. + """ + try: + settings = Path(repo_dir) / ".entire" / "settings.local.json" + if not settings.parent.is_dir(): + return # Entire isn't set up in this repo + data: dict = {} + if settings.exists(): + try: + loaded = json.loads(settings.read_text()) + if isinstance(loaded, dict): + data = loaded + except (json.JSONDecodeError, OSError): + data = {} + if data.get("commit_linking") == "always": + return + data["commit_linking"] = "always" + settings.write_text(json.dumps(data, indent=2) + "\n") + except Exception: # noqa: BLE001 - never let this break setup/submit + pass + + def flush(repo_dir: Path) -> None: """Materialise a checkpoint from the active session, best-effort. @@ -185,6 +218,7 @@ def flush(repo_dir: Path) -> None: if not is_available() or not is_enabled(repo_dir): return try: + ensure_commit_linking(repo_dir) # this commit must not trigger a prompt _git( ["-c", "user.name=AI Coding Gym", "-c", "user.email=logs@aicodinggym.com", "commit", "--allow-empty", "-m", _FLUSH_MESSAGE], @@ -204,6 +238,7 @@ def commit_workspace(repo_dir: Path, message: str) -> bool: if not (Path(repo_dir) / ".git").exists(): return False try: + ensure_commit_linking(repo_dir) # this commit must not trigger a prompt _git(["add", "-A"], cwd=repo_dir) res = _git( ["-c", "user.name=AI Coding Gym", "-c", "user.email=logs@aicodinggym.com", @@ -216,20 +251,25 @@ def commit_workspace(repo_dir: Path, message: str) -> bool: def push_branch(repo_dir: Path, *, remote_url: str, dest_branch: str, - key_path: Path | None = None) -> tuple[bool, str]: - """Push the current HEAD to a fresh ``dest_branch`` on ``remote_url``. - - Used for MLE to push the user's solution code. The caller passes a unique, - per-submission branch name (see :func:`new_stamp`), so we do NOT force-push: - each submission lands on its own branch and previous ones are preserved. - Returns (ok, branch_or_error). Never raises. + key_path: Path | None = None, force: bool = False) -> tuple[bool, str]: + """Push the current HEAD to ``dest_branch`` on ``remote_url``. + + Used for MLE to push the user's solution code. ``force=True`` overwrites the + branch — used for the stable, competition-named code branch so the latest + submission wins and ``mle restore`` has a predictable name to pull. The new + tip still has every prior submission commit as an ancestor, so nothing is + truly lost. ``force=False`` refuses to clobber an existing branch (used for + unique, per-submission log branches). Returns (ok, branch_or_error). Never + raises. """ try: safe = _safe_ref(dest_branch) refspec = f"HEAD:refs/heads/{safe}" - res = git_ops.run_git_command( - ["git", "push", remote_url, refspec], str(repo_dir), key_path, - ) + cmd = ["git", "push"] + if force: + cmd.append("--force") + cmd += [remote_url, refspec] + res = git_ops.run_git_command(cmd, str(repo_dir), key_path) if res.returncode != 0: return False, (res.stderr or "git push failed").strip() return True, safe diff --git a/git_ops.py b/git_ops.py index db742df..5a6c5df 100644 --- a/git_ops.py +++ b/git_ops.py @@ -308,6 +308,56 @@ def reset_to_setup_commit(problem_dir: str) -> tuple[bool, str]: return True, f"Reset to setup commit {setup_commit[:8]}.\nLocal changes discarded and untracked files removed." +def restore_branch(remote_url: str, branch: str, target_dir: str, + key_path: Optional[Path] = None, + force: bool = False) -> tuple[bool, str]: + """Restore a competition workspace's tracked files from a remote ``branch``. + + Fetches ``branch`` from ``remote_url`` and hard-resets ``target_dir`` to it. + Gitignored files already present (e.g. the downloaded dataset) are left + untouched — only tracked files are restored. Initialises a repo in place if + the directory isn't one yet. Refuses to discard uncommitted tracked changes + unless ``force`` is set. Returns (success, message). + """ + _validate_git_ref(branch, "branch") + target = Path(target_dir) + target.mkdir(parents=True, exist_ok=True) + + if not (target / ".git").exists(): + init = run_git_command(["git", "init", "-q"], str(target)) + if init.returncode != 0: + return False, f"git init failed:\n{init.stderr}" + + # Don't clobber real uncommitted work; gitignored files (the dataset) don't + # count. Only meaningful once the repo already has a commit history. + has_head = run_git_command( + ["git", "rev-parse", "--verify", "-q", "HEAD"], str(target) + ).returncode == 0 + if has_head and not force: + status = run_git_command(["git", "status", "--porcelain"], str(target)) + if status.stdout.strip(): + return False, ( + "Workspace has uncommitted changes. Commit them first, or re-run " + "with --force to overwrite them with the restored version." + ) + + fetch = run_git_command(["git", "fetch", remote_url, branch], str(target), key_path) + if fetch.returncode != 0: + return False, ( + f"Could not fetch branch '{branch}':\n{fetch.stderr.strip()}\n" + "Make sure you've submitted this competition at least once." + ) + + reset = run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(target)) + if reset.returncode != 0: + return False, f"git reset failed:\n{reset.stderr}" + + short = run_git_command( + ["git", "rev-parse", "--short", "HEAD"], str(target) + ).stdout.strip() + return True, f"Restored '{branch}' into {target} (at commit {short})." + + def check_tool_installed(tool_name: str) -> bool: """Check if a CLI tool is available on PATH.""" return shutil.which(tool_name) is not None diff --git a/tests/test_entire_logging.py b/tests/test_entire_logging.py index dc14c9b..dd046c6 100644 --- a/tests/test_entire_logging.py +++ b/tests/test_entire_logging.py @@ -108,6 +108,49 @@ def test_commit_workspace_excludes_heavy_artifacts(tmp_path): # ── push_branch (MLE code) ─────────────────────────────────────────────────── +def test_push_branch_force_overwrites_stable_branch(tmp_path): + # The MLE code branch is named after the competition and force-pushed, so the + # latest submission wins (and `mle restore` has a predictable name to pull). + ws = tmp_path / "comp" + ws.mkdir() + el.ensure_git_repo(ws) + (ws / "solution.py").write_text("v1\n") + el.commit_workspace(ws, "submit 1") + bare = _bare_remote(tmp_path) + + ok1, b1 = el.push_branch(ws, remote_url=str(bare), + dest_branch="spaceship-titanic", key_path=None, force=True) + (ws / "solution.py").write_text("v2\n") + el.commit_workspace(ws, "submit 2") + ok2, b2 = el.push_branch(ws, remote_url=str(bare), + dest_branch="spaceship-titanic", key_path=None, force=True) + + assert ok1 and ok2 and b1 == b2 == "spaceship-titanic" + assert _remote_branches(bare) == {"spaceship-titanic"} # one stable branch + assert _git(["show", "spaceship-titanic:solution.py"], bare).stdout == "v2\n" + + +# ── ensure_commit_linking (suppresses Entire's per-commit prompt) ───────────── + +def test_ensure_commit_linking_sets_always_and_merges(tmp_path): + repo = tmp_path / "r" + (repo / ".entire").mkdir(parents=True) + el.ensure_commit_linking(repo) + settings = repo / ".entire" / "settings.local.json" + assert json.loads(settings.read_text())["commit_linking"] == "always" + + # Preserves unrelated keys and is idempotent. + settings.write_text(json.dumps({"telemetry": False})) + el.ensure_commit_linking(repo) + data = json.loads(settings.read_text()) + assert data == {"telemetry": False, "commit_linking": "always"} + + +def test_ensure_commit_linking_noop_without_entire_dir(tmp_path): + el.ensure_commit_linking(tmp_path) # no .entire -> must not create anything + assert not (tmp_path / ".entire").exists() + + def test_push_branch_pushes_to_unique_branch_without_overwrite(tmp_path): ws = tmp_path / "comp" ws.mkdir() diff --git a/tests/test_git_ops_restore.py b/tests/test_git_ops_restore.py new file mode 100644 index 0000000..f614710 --- /dev/null +++ b/tests/test_git_ops_restore.py @@ -0,0 +1,67 @@ +"""Tests for git_ops.restore_branch — `aicodinggym mle restore`'s workhorse. + +Verifies it pulls tracked code back from a remote branch while leaving the +already-present (gitignored) dataset untouched, and guards uncommitted work. +""" + +import subprocess + +from aicodinggym import git_ops + + +def _git(args, cwd): + return subprocess.run( + ["git", "-c", "user.name=T", "-c", "user.email=t@t", *args], + cwd=str(cwd), capture_output=True, text=True, + ) + + +def _seed_remote_with_code(tmp_path): + """A bare remote holding branch 'comp' with solution.py at v1 (data/ ignored).""" + src = tmp_path / "src" + src.mkdir() + _git(["init", "-q", "-b", "comp", "."], src) + (src / ".gitignore").write_text("data/\n") + (src / "solution.py").write_text("v1\n") + _git(["add", "-A"], src) + _git(["commit", "-q", "-m", "submit"], src) + + bare = tmp_path / "remote.git" + _git(["init", "--bare", "-q", str(bare)], tmp_path) + _git(["push", "-q", str(bare), "comp"], src) + return bare + + +def test_restore_into_new_dir_with_existing_dataset(tmp_path): + bare = _seed_remote_with_code(tmp_path) + target = tmp_path / "ws" / "comp" + target.mkdir(parents=True) + (target / "data").mkdir() + (target / "data" / "train.csv").write_text("keep me\n") # gitignored dataset + + ok, msg = git_ops.restore_branch(str(bare), "comp", str(target), key_path=None) + + assert ok, msg + assert (target / "solution.py").read_text() == "v1\n" # code restored + assert (target / "data" / "train.csv").read_text() == "keep me\n" # dataset kept + + +def test_restore_refuses_to_clobber_uncommitted_without_force(tmp_path): + bare = _seed_remote_with_code(tmp_path) + target = tmp_path / "comp" + git_ops.restore_branch(str(bare), "comp", str(target), key_path=None) + + (target / "solution.py").write_text("LOCAL WORK\n") # uncommitted change + ok, msg = git_ops.restore_branch(str(bare), "comp", str(target), key_path=None) + assert not ok and "uncommitted" in msg + assert (target / "solution.py").read_text() == "LOCAL WORK\n" # untouched + + ok2, _ = git_ops.restore_branch(str(bare), "comp", str(target), key_path=None, force=True) + assert ok2 and (target / "solution.py").read_text() == "v1\n" # overwritten + + +def test_restore_reports_missing_branch(tmp_path): + bare = _seed_remote_with_code(tmp_path) + target = tmp_path / "comp" + ok, msg = git_ops.restore_branch(str(bare), "does-not-exist", str(target), key_path=None) + assert not ok and "fetch" in msg.lower()