diff --git a/.github/workflows/test-error-code-hints.yaml b/.github/workflows/test-error-code-hints.yaml new file mode 100644 index 000000000..b2066cc51 --- /dev/null +++ b/.github/workflows/test-error-code-hints.yaml @@ -0,0 +1,49 @@ +name: Test error code hints + +on: + push: + branches: ["*"] + pull_request: + branches: [dev, main] + +jobs: + test-error-code-hints: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install mlcflow + run: pip install -e . + + - name: Test exit code hint helper + run: | + python -c " + from mlc.script_action import _get_exit_code_hint, ScriptExecutionError + + # Known codes return specific hints + assert 'Segmentation fault' in _get_exit_code_hint(139) + assert 'disk space' in _get_exit_code_hint(28).lower() + assert 'Network error' in _get_exit_code_hint(6) + assert 'Network error' in _get_exit_code_hint(7) + assert 'PATH' in _get_exit_code_hint(127) + + # Unknown code returns generic fallback with the code number + hint = _get_exit_code_hint(999) + assert '999' in hint + + # ScriptExecutionError embeds hint in message + err = ScriptExecutionError('test failed', return_code=139) + assert 'Segmentation fault' in str(err) + assert err.return_code == 139 + + # return_code=-1 means no hint appended + err2 = ScriptExecutionError('test failed') + assert str(err2) == 'test failed' + + print('All assertions passed.') + " \ No newline at end of file diff --git a/.gitignore b/.gitignore index a988bc06a..10748638c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,5 @@ build dist *egg-info __pycache__ - +.vscode .mlc-log.txt diff --git a/README.md b/README.md index 449c972bb..eb5e3eb64 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ On February 9, 2025, MLCFlow released its first stable version, 1.0.0. ### Key Features Building upon the core idea of CMind—wrapping native scripts with Python wrappers and YAML metadata—MLCFlow focuses exclusively on key automation components: **Scripts**, along with its complementary modules: **Cache**, **Docker**, and **Experiments**. This targeted design simplifies both implementation and interface, enabling a more user-friendly experience. +- **Smart Error Messages**: When a script fails, MLCFlow now detects the exit code and displays a human-readable suggestion — for example, flagging network issues, disk space problems, permission errors, or segmentation faults — so you spend less time debugging. + --- ### Status diff --git a/mlc/script_action.py b/mlc/script_action.py index 3bdc02f83..ae58979ff 100644 --- a/mlc/script_action.py +++ b/mlc/script_action.py @@ -9,6 +9,37 @@ from . import utils from .logger import logger +_SCRIPT_EXIT_CODE_HINTS: dict[int, str] = { + 1: "General error. Review the script output above for details.", + 2: "Shell misuse or invalid argument passed to the script.", + 13: "Permission denied. Check file or directory permissions.", + 28: "No space left on device. Free up disk space and retry.", + 126: "Command cannot execute. Check execute permissions on the script.", + 127: "Command not found. Ensure all required dependencies are installed and on PATH.", + 130: "Script interrupted by user (Ctrl+C / SIGINT).", + 137: "Process killed (SIGKILL). Possible out-of-memory condition.", + 139: "Segmentation fault in native run. Check your binary or native library.", + 143: "Script terminated externally (SIGTERM).", +} + + +def _get_exit_code_hint(return_code: int) -> str: + """Return a human-readable hint for a subprocess exit code. + Falls back to a generic message for unknown codes. + Network-error heuristic: codes 6/7 are curl exit codes for DNS / connect + failures, often surfaced as return_code=6 or 7 inside scripts. + """ + if return_code in (6, 7): + return ( + "Network error detected (DNS resolution or connection failure). " + "Check your internet connection and proxy settings." + ) + return _SCRIPT_EXIT_CODE_HINTS.get( + return_code, + f"Script exited with code {return_code}. " + "See the output above for more details.", + ) + class ScriptAction(Action): """ @@ -312,10 +343,14 @@ def call_script_module_function(self, function_name, run_args): _repo_alias = _repo_match.group(1) if _repo_match else None _script_name = run_args.get('tags', run_args.get('details')) raise ScriptExecutionError( - f"Script {function_name} execution failed in {module_path}." + - "\nError : " + f"{type(exc).__name__}: {exc}", - script_name=_script_name, repo_alias=_repo_alias, module_path=module_path, - run_args=run_args) from exc + f"Script {function_name} execution failed in { + module_path}. \nError : {error}", + script_name=_script_name, + repo_alias=_repo_alias, + module_path=module_path, + run_args=run_args, + version_info_file=_version_info_file, + return_code=result.get("return", -1), ) if result['return'] > 0: error = result.get('error', "") @@ -336,7 +371,8 @@ def call_script_module_function(self, function_name, run_args): except Exception: _version_info_file = None raise ScriptExecutionError( - f"Script {function_name} execution failed in {module_path}. \nError : {error}", + f"Script {function_name} execution failed in { + module_path}. \nError : {error}", script_name=_script_name, repo_alias=_repo_alias, module_path=module_path, run_args=run_args, version_info_file=_version_info_file) @@ -679,11 +715,23 @@ def remote_docker(self, run_args): class ScriptExecutionError(Exception): - def __init__(self, message, script_name=None, repo_alias=None, - module_path=None, run_args=None, version_info_file=None): - super().__init__(message) + def __init__( + self, + message, + script_name=None, + repo_alias=None, + module_path=None, + run_args=None, + version_info_file=None, + return_code: int = -1, + ): + hint = _get_exit_code_hint(return_code) if return_code != -1 else "" + full_message = f"{message}\n[Exit code {return_code}] { + hint}" if hint else message + super().__init__(full_message) self.script_name = script_name self.repo_alias = repo_alias self.module_path = module_path self.run_args = run_args or {} self.version_info_file = version_info_file + self.return_code = return_code