diff --git a/action.yml b/action.yml index dcbbfc4..ef4212a 100644 --- a/action.yml +++ b/action.yml @@ -4,7 +4,9 @@ author: "Socket" runs: using: "docker" - image: "docker://ghcr.io/socketdev/socket-basics:2.0.2" + # TODO: Revert to the prebuilt GHCR image before merge. + # image: "docker://ghcr.io/socketdev/socket-basics:2.0.2" + image: "Dockerfile" env: # Core GitHub variables (these are automatically available, but we explicitly pass GITHUB_TOKEN) GITHUB_TOKEN: ${{ inputs.github_token }} diff --git a/docs/github-action.md b/docs/github-action.md index 3297c12..1c8ea54 100644 --- a/docs/github-action.md +++ b/docs/github-action.md @@ -603,8 +603,12 @@ jobs: ### Custom Rule Configuration +Use custom rules from your repository by setting `use_custom_sast_rules` and +`custom_sast_rule_path`. This path is resolved relative to `GITHUB_WORKSPACE` +in GitHub Actions. + ```yaml -name: Security Scan with Custom Rules +name: Security Scan with Custom SAST Rules on: pull_request: types: [opened, synchronize, reopened] @@ -625,21 +629,25 @@ jobs: GITHUB_PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }} with: github_token: ${{ secrets.GITHUB_TOKEN }} - - # Enable Python SAST + + # Enable SAST languages you expect to run. python_sast_enabled: 'true' - - # Enable specific Python rules - python_enabled_rules: 'sql-injection,xss,hardcoded-credentials' - - # Disable noisy rules - python_disabled_rules: 'unused-import,line-too-long' - - # JavaScript with custom rules javascript_sast_enabled: 'true' - javascript_enabled_rules: 'eval-usage,prototype-pollution' + + # Enable custom rules from repository path. + use_custom_sast_rules: 'true' + custom_sast_rule_path: '.socket/rules' + + # Optional: to avoid allowlist exclusions, run all rules for enabled languages. + all_rules_enabled: 'true' ``` +Important behavior: +- `socket_security_api_key` + `socket_org` enables dashboard config loading. +- Dashboard/API settings override overlapping `with:` values. +- `_enabled_rules` is an allowlist and can suppress custom rule IDs. +- `all_rules_enabled: 'true'` disables allowlist filtering for enabled languages. + ## Configuration Reference ### All Available Inputs @@ -667,6 +675,8 @@ See [`action.yml`](../action.yml) for the complete list of inputs. **Rule Configuration (per language):** - `_enabled_rules` — Comma-separated rules to enable - `_disabled_rules` — Comma-separated rules to disable +- `use_custom_sast_rules` — Enable custom SAST rule discovery from repo files +- `custom_sast_rule_path` — Relative path to custom SAST rule directory **Security Scanning:** - `secret_scanning_enabled` — Enable secret scanning diff --git a/docs/parameters.md b/docs/parameters.md index c30e785..89a5dfa 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -194,6 +194,10 @@ Use custom SAST rules instead of bundled rules (falls back to bundled rules for socket-basics --python --use-custom-sast-rules ``` +When this is enabled, custom rules are loaded from YAML files under +`--custom-sast-rule-path`. Each rule must include a `languages` list so Socket +Basics can map it to the correct OpenGrep language rule file. + ### `--custom-sast-rule-path CUSTOM_SAST_RULE_PATH` Relative path to custom SAST rules directory (relative to workspace if set, otherwise cwd). @@ -206,6 +210,11 @@ Relative path to custom SAST rules directory (relative to workspace if set, othe socket-basics --python --use-custom-sast-rules --custom-sast-rule-path "my_custom_rules" ``` +Custom rule file notes: +- `.yml` and `.yaml` files are discovered recursively. +- Files ending in `.test.yml` or `.test.yaml` are ignored. +- Rules without `languages` are skipped. + ### Language-Specific Rule Configuration For each language, you can enable or disable specific rules: @@ -519,7 +528,9 @@ All notification integrations support environment variables as alternatives to C | Variable | Description | |----------|-------------| -| `INPUT_OPENGREP_RULES_DIR` | Custom directory containing SAST rules | +| `INPUT_OPENGREP_RULES_DIR` | Override directory for bundled OpenGrep rule files (`*.yml`) | +| `INPUT_USE_CUSTOM_SAST_RULES` | Enable repository custom SAST rules | +| `INPUT_CUSTOM_SAST_RULE_PATH` | Relative directory path for repository custom SAST rules | ## Configuration File @@ -536,6 +547,8 @@ You can provide configuration via a JSON file using `--config`: "python_sast_enabled": true, "javascript_sast_enabled": true, + "use_custom_sast_rules": true, + "custom_sast_rule_path": ".socket/rules", "go_sast_enabled": true, "secrets_enabled": true, @@ -559,17 +572,18 @@ You can provide configuration via a JSON file using `--config`: Configuration is merged in the following order (later sources override earlier ones): 1. Default values -2. JSON configuration file (via `--config`) -3. Environment variables -4. Command-line arguments +2. Environment variables +3. Socket Basics API configuration (when available and no `--config` file is used) +4. JSON configuration file (via `--config`) +5. Command-line arguments **Example:** ```bash -# JSON file sets python_sast_enabled: true -# Environment has PYTHON_SAST_ENABLED=false +# Environment sets python_sast_enabled=true +# Dashboard/API sets python_sast_enabled=false # CLI has --javascript -# Result: JavaScript enabled, Python disabled (env override), other settings from JSON -socket-basics --config config.json --javascript +# Result: JavaScript enabled, Python follows dashboard/API value, other settings from env/API +socket-basics --javascript ``` ## Common Usage Patterns diff --git a/socket_basics/core/config.py b/socket_basics/core/config.py index 55512cf..b463c33 100644 --- a/socket_basics/core/config.py +++ b/socket_basics/core/config.py @@ -897,6 +897,10 @@ def normalize_api_config(api_config: Dict[str, Any]) -> Dict[str, Any]: # OpenGrep/SAST Configuration 'openGrepNotificationMethod': 'opengrep_notification_method', + 'useCustomSastRules': 'use_custom_sast_rules', + 'customSastRulePath': 'custom_sast_rule_path', + # Accept common pluralized variant for robustness. + 'customSastRulesPath': 'custom_sast_rule_path', # Socket Tier 1 'socketTier1Enabled': 'socket_tier_1_enabled', @@ -1004,13 +1008,15 @@ def merge_json_and_env_config(json_config: Dict[str, Any] | None = None) -> Dict Returns: Merged configuration dictionary """ + logger = logging.getLogger(__name__) + # Start with environment defaults (lowest priority) config = load_config_from_env() + logger.info("Configuration sources: environment defaults loaded") # Override with Socket Basics API config if no explicit JSON config provided # API config takes precedence over environment defaults if not json_config: - logger = logging.getLogger(__name__) logger.debug(" No JSON config provided, attempting to load Socket Basics API config") socket_basics_config = load_socket_basics_config() logger.debug(f" Socket Basics API config result: {socket_basics_config is not None}") @@ -1027,7 +1033,10 @@ def merge_json_and_env_config(json_config: Dict[str, Any] | None = None) -> Dict continue filtered_config[k] = v config.update(filtered_config) - logging.getLogger(__name__).info("Loaded Socket Basics API configuration (overrides environment defaults)") + if bool(filtered_config.get('socket_has_enterprise', False)): + logging.getLogger(__name__).info("Loaded Socket Basics API configuration (overrides environment defaults)") + else: + logging.getLogger(__name__).info("Loaded Socket plan metadata (free/non-enterprise mode; no dashboard overrides)") else: logger.debug(" No Socket Basics API config loaded") @@ -1049,6 +1058,13 @@ def merge_json_and_env_config(json_config: Dict[str, Any] | None = None) -> Dict # Note: CLI arguments are handled separately and take highest priority # They override the config object after this merge completes + logger.info( + "Effective custom SAST config: use_custom_sast_rules=%s custom_sast_rule_path=%s all_languages_enabled=%s all_rules_enabled=%s", + bool(config.get('use_custom_sast_rules', False)), + config.get('custom_sast_rule_path', ''), + bool(config.get('all_languages_enabled', False)), + bool(config.get('all_rules_enabled', False)), + ) return config @@ -1087,9 +1103,9 @@ def add_dynamic_cli_args(parser: argparse.ArgumentParser): if param_type == 'bool': parser.add_argument(option, action='store_true', help=description) elif param_type == 'str': - parser.add_argument(option, type=str, default=default, help=description) + parser.add_argument(option, type=str, default=None, help=description) elif param_type == 'int': - parser.add_argument(option, type=int, default=default, help=description) + parser.add_argument(option, type=int, default=None, help=description) except Exception as e: logging.getLogger(__name__).warning("Warning: Could not load dynamic CLI args: %s", e) @@ -1116,9 +1132,9 @@ def add_dynamic_cli_args(parser: argparse.ArgumentParser): if p_type == 'bool': parser.add_argument(option, action='store_true', help=desc) elif p_type == 'int': - parser.add_argument(option, type=int, default=default, help=desc) + parser.add_argument(option, type=int, default=None, help=desc) else: - parser.add_argument(option, type=str, default=default, help=desc) + parser.add_argument(option, type=str, default=None, help=desc) except Exception: pass @@ -1127,7 +1143,7 @@ def parse_cli_args(): """Parse command line arguments and return argument parser""" parser = argparse.ArgumentParser(description='Socket Security Basics - Dynamic security scanning') parser.add_argument('--config', type=str, - help='Path to JSON configuration file. JSON config is merged with environment variables (environment takes precedence)') + help='Path to JSON configuration file. JSON config is merged with environment variables (JSON takes precedence)') parser.add_argument('--output', type=str, default='.socket.facts.json', help='Output file name (default: .socket.facts.json)') parser.add_argument('--workspace', type=str, help='Workspace directory to scan') diff --git a/socket_basics/core/connector/opengrep/__init__.py b/socket_basics/core/connector/opengrep/__init__.py index 0cf4135..1aed8f3 100644 --- a/socket_basics/core/connector/opengrep/__init__.py +++ b/socket_basics/core/connector/opengrep/__init__.py @@ -40,6 +40,12 @@ def scan(self) -> Dict[str, Any]: rule_files = self.config.build_opengrep_rules() or [] except Exception: rule_files = [] + logger.info( + "OpenGrep config summary: all_languages_enabled=%s all_rules_enabled=%s requested_rule_files=%s", + bool(self.config.get('all_languages_enabled', False)), + bool(self.config.get('all_rules_enabled', False)), + rule_files, + ) # If no languages selected and not explicitly allowing all, skip if not rule_files and not self.config.get('all_languages_enabled', False): @@ -51,6 +57,12 @@ def scan(self) -> Dict[str, Any]: # Check if custom rules mode is enabled custom_rules_path = self.config.get_custom_rules_path() custom_rule_files: Dict[str, Path] = {} + logger.info( + "Custom SAST requested=%s custom_path=%s resolved_path=%s", + bool(self.config.get('use_custom_sast_rules', False)), + self.config.get('custom_sast_rule_path', ''), + str(custom_rules_path) if custom_rules_path else '(none)', + ) if custom_rules_path: logger.info(f"Custom SAST rules enabled, loading from: {custom_rules_path}") @@ -74,6 +86,11 @@ def scan(self) -> Dict[str, Any]: filtered = self.config.build_filtered_opengrep_rules() or {} except Exception: filtered = {} + if filtered: + filtered_counts = {k: len(v or []) for k, v in filtered.items()} + logger.info("Per-language enabled-rule filters detected: %s", filtered_counts) + else: + logger.info("Per-language enabled-rule filters disabled for this run") # Debugging: log computed rule files and filtered rules for diagnosis try: @@ -91,25 +108,42 @@ def scan(self) -> Dict[str, Any]: # Process all enabled languages - use filtered rules if specified, otherwise use all rules for rf in rule_files: # Check if we have a custom rule file for this language + using_custom_rules = bool(custom_rule_files and rf in custom_rule_files) if custom_rule_files and rf in custom_rule_files: p = custom_rule_files[rf] - logger.info(f"Using custom rules for {rf}") + logger.info("Using custom rules for %s from %s", rf, p) else: # Fall back to bundled rules p = Path(rules_dir) / rf if not p.exists(): logger.debug('Rule file missing: %s', p) continue + logger.info("Using bundled rules for %s from %s", rf, p) # Check if this language has specific rules enabled (filtered mode) if filtered and rf in filtered: enabled_ids = filtered[rf] - logger.debug(f"Using filtered rules for {rf}: {len(enabled_ids)} rules enabled") + logger.info("Filtering rules for %s: %d enabled IDs configured", rf, len(enabled_ids)) try: with open(p, 'r') as fh: data = yaml.safe_load(fh) or {} all_ids = [r.get('id') for r in (data.get('rules') or []) if r.get('id')] - to_exclude = [rid for rid in all_ids if rid not in (enabled_ids or [])] + # Custom-rule mode can coexist with legacy bundled allowlists. + # If none of the configured enabled IDs match custom IDs, keep all + # custom IDs active to avoid silently disabling user-authored rules. + if using_custom_rules: + matched_enabled_ids = [rid for rid in all_ids if rid in (enabled_ids or [])] + if enabled_ids and not matched_enabled_ids: + logger.warning( + "No configured enabled-rule IDs matched custom rules for %s; using all custom rules from %s", + rf, + p, + ) + config_args.extend(['--config', str(p)]) + continue + to_exclude = [rid for rid in all_ids if rid not in matched_enabled_ids] + else: + to_exclude = [rid for rid in all_ids if rid not in (enabled_ids or [])] config_args.extend(['--config', str(p)]) for ex in to_exclude: config_args.extend(['--exclude-rule', ex]) diff --git a/tests/test_config_custom_sast.py b/tests/test_config_custom_sast.py new file mode 100644 index 0000000..868c0c2 --- /dev/null +++ b/tests/test_config_custom_sast.py @@ -0,0 +1,69 @@ +from socket_basics.core import config as config_module +from socket_basics.core.config import ( + create_config_from_args, + merge_json_and_env_config, + normalize_api_config, + parse_cli_args, +) + + +def test_normalize_api_config_maps_custom_sast_keys(): + normalized = normalize_api_config( + { + "useCustomSastRules": True, + "customSastRulePath": ".socket/rules", + } + ) + + assert normalized["use_custom_sast_rules"] is True + assert normalized["custom_sast_rule_path"] == ".socket/rules" + + +def test_normalize_api_config_maps_custom_sast_plural_path_alias(): + normalized = normalize_api_config({"customSastRulesPath": "custom_rules"}) + assert normalized["custom_sast_rule_path"] == "custom_rules" + + +def test_merge_json_and_env_config_api_overrides_env_custom_sast(monkeypatch): + monkeypatch.setenv("INPUT_USE_CUSTOM_SAST_RULES", "true") + monkeypatch.setenv("INPUT_CUSTOM_SAST_RULE_PATH", ".socket/rules") + + monkeypatch.setattr( + config_module, + "load_socket_basics_config", + lambda: {"useCustomSastRules": False, "customSastRulePath": "dashboard/rules"}, + ) + + merged = merge_json_and_env_config() + assert merged["use_custom_sast_rules"] is False + assert merged["custom_sast_rule_path"] == "dashboard/rules" + + +def test_merge_json_and_env_config_json_overrides_env_custom_sast(monkeypatch): + monkeypatch.setenv("INPUT_USE_CUSTOM_SAST_RULES", "false") + monkeypatch.setenv("INPUT_CUSTOM_SAST_RULE_PATH", "custom_rules") + + merged = merge_json_and_env_config( + {"useCustomSastRules": True, "customSastRulePath": ".socket/rules"} + ) + assert merged["use_custom_sast_rules"] is True + assert merged["custom_sast_rule_path"] == ".socket/rules" + + +def test_create_config_from_args_does_not_override_env_custom_path(monkeypatch): + monkeypatch.setenv("INPUT_USE_CUSTOM_SAST_RULES", "true") + monkeypatch.setenv("INPUT_CUSTOM_SAST_RULE_PATH", ".socket/rules") + + monkeypatch.setattr(config_module, "_discover_repository", lambda *args, **kwargs: "repo") + monkeypatch.setattr(config_module, "_discover_branch", lambda *args, **kwargs: "branch") + monkeypatch.setattr(config_module, "_discover_commit_hash", lambda *args, **kwargs: "commit") + monkeypatch.setattr(config_module, "_discover_is_default_branch", lambda *args, **kwargs: False) + monkeypatch.setattr(config_module, "_discover_pull_request", lambda *args, **kwargs: 0) + monkeypatch.setattr(config_module, "_discover_committers", lambda *args, **kwargs: []) + + parser = parse_cli_args() + args = parser.parse_args([]) + config = create_config_from_args(args) + + assert config.get("use_custom_sast_rules") is True + assert config.get("custom_sast_rule_path") == ".socket/rules" diff --git a/tests/test_opengrep_custom_rules.py b/tests/test_opengrep_custom_rules.py new file mode 100644 index 0000000..1fa1187 --- /dev/null +++ b/tests/test_opengrep_custom_rules.py @@ -0,0 +1,183 @@ +import json +from pathlib import Path +from types import SimpleNamespace + +from socket_basics.core.config import Config +from socket_basics.core.connector.opengrep import OpenGrepScanner + + +def _write_rule_file(path: Path, rule_ids: list[str]) -> None: + rules = [{"id": rid, "languages": ["javascript"], "pattern": "eval(...)"} for rid in rule_ids] + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps({"rules": rules}), encoding="utf-8") + + +def _write_custom_rules_file(path: Path, rule_ids: list[str]) -> None: + lines = ["rules:"] + for rid in rule_ids: + lines.extend( + [ + f" - id: {rid}", + " pattern: eval(...)", + " languages: [javascript, typescript]", + f' message: Rule {rid}', + " severity: ERROR", + ] + ) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("\n".join(lines), encoding="utf-8") + + +def _mock_subprocess_run(monkeypatch, captured_cmd: list[str]): + def _runner(cmd, capture_output, text): + captured_cmd.extend(cmd) + out_file = cmd[cmd.index("--output") + 1] + Path(out_file).write_text(json.dumps({"results": []}), encoding="utf-8") + return SimpleNamespace(stdout="", stderr="", returncode=0) + + monkeypatch.setattr("socket_basics.core.connector.opengrep.subprocess.run", _runner) + + +def test_scan_uses_custom_rule_file_when_available(tmp_path, monkeypatch): + workspace = tmp_path / "workspace" + workspace.mkdir(parents=True, exist_ok=True) + + custom_rules_dir = workspace / ".socket" / "rules" + # Custom file can be any yaml name; builder groups by languages. + custom_rules_file = custom_rules_dir / "org-rules.yml" + _write_custom_rules_file(custom_rules_file, ["org.no-eval"]) + + bundled_rules_dir = tmp_path / "bundled-rules" + _write_rule_file(bundled_rules_dir / "javascript_typescript.yml", ["js-default-rule"]) + + config = Config( + { + "workspace": str(workspace), + "output_dir": str(workspace), + "javascript_sast_enabled": True, + "use_custom_sast_rules": True, + "custom_sast_rule_path": ".socket/rules", + "opengrep_rules_dir": str(bundled_rules_dir), + "all_languages_enabled": False, + "all_rules_enabled": False, + "verbose": False, + } + ) + scanner = OpenGrepScanner(config) + scanner._convert_to_socket_facts = lambda _: {"components": []} + scanner.generate_notifications = lambda _: {} + + captured_cmd: list[str] = [] + _mock_subprocess_run(monkeypatch, captured_cmd) + scanner.scan() + + cmd_str = " ".join(captured_cmd) + assert "socket_custom_rules_" in cmd_str + assert str(bundled_rules_dir / "javascript_typescript.yml") not in cmd_str + + +def test_scan_falls_back_to_bundled_file_when_custom_missing(tmp_path, monkeypatch): + workspace = tmp_path / "workspace" + workspace.mkdir(parents=True, exist_ok=True) + + bundled_rules_dir = tmp_path / "bundled-rules" + bundled_file = bundled_rules_dir / "javascript_typescript.yml" + _write_rule_file(bundled_file, ["js-default-rule"]) + + config = Config( + { + "workspace": str(workspace), + "output_dir": str(workspace), + "javascript_sast_enabled": True, + "use_custom_sast_rules": True, + "custom_sast_rule_path": ".socket/missing-rules", + "opengrep_rules_dir": str(bundled_rules_dir), + "all_languages_enabled": False, + "all_rules_enabled": False, + "verbose": False, + } + ) + scanner = OpenGrepScanner(config) + scanner._convert_to_socket_facts = lambda _: {"components": []} + scanner.generate_notifications = lambda _: {} + + captured_cmd: list[str] = [] + _mock_subprocess_run(monkeypatch, captured_cmd) + scanner.scan() + + assert str(bundled_file) in " ".join(captured_cmd) + + +def test_custom_rules_ignore_nonmatching_bundled_allowlist_ids(tmp_path, monkeypatch): + workspace = tmp_path / "workspace" + workspace.mkdir(parents=True, exist_ok=True) + + custom_rules_file = workspace / ".socket" / "rules" / "org-rules.yml" + _write_custom_rules_file(custom_rules_file, ["org.no-eval", "org.no-innerhtml"]) + + bundled_rules_dir = tmp_path / "bundled-rules" + _write_rule_file(bundled_rules_dir / "javascript_typescript.yml", ["js-default-rule"]) + + config = Config( + { + "workspace": str(workspace), + "output_dir": str(workspace), + "javascript_sast_enabled": True, + "javascript_enabled_rules": "js-default-rule", + "use_custom_sast_rules": True, + "custom_sast_rule_path": ".socket/rules", + "opengrep_rules_dir": str(bundled_rules_dir), + "all_languages_enabled": False, + "all_rules_enabled": False, + "verbose": False, + } + ) + scanner = OpenGrepScanner(config) + scanner._convert_to_socket_facts = lambda _: {"components": []} + scanner.generate_notifications = lambda _: {} + + captured_cmd: list[str] = [] + _mock_subprocess_run(monkeypatch, captured_cmd) + scanner.scan() + + cmd_str = " ".join(captured_cmd) + assert "socket_custom_rules_" in cmd_str + assert "--exclude-rule org.no-eval" not in cmd_str + assert "--exclude-rule org.no-innerhtml" not in cmd_str + + +def test_custom_rules_apply_allowlist_when_custom_ids_match(tmp_path, monkeypatch): + workspace = tmp_path / "workspace" + workspace.mkdir(parents=True, exist_ok=True) + + custom_rules_file = workspace / ".socket" / "rules" / "org-rules.yml" + _write_custom_rules_file(custom_rules_file, ["org.no-eval", "org.no-innerhtml"]) + + bundled_rules_dir = tmp_path / "bundled-rules" + _write_rule_file(bundled_rules_dir / "javascript_typescript.yml", ["js-default-rule"]) + + config = Config( + { + "workspace": str(workspace), + "output_dir": str(workspace), + "javascript_sast_enabled": True, + "javascript_enabled_rules": "org.no-eval", + "use_custom_sast_rules": True, + "custom_sast_rule_path": ".socket/rules", + "opengrep_rules_dir": str(bundled_rules_dir), + "all_languages_enabled": False, + "all_rules_enabled": False, + "verbose": False, + } + ) + scanner = OpenGrepScanner(config) + scanner._convert_to_socket_facts = lambda _: {"components": []} + scanner.generate_notifications = lambda _: {} + + captured_cmd: list[str] = [] + _mock_subprocess_run(monkeypatch, captured_cmd) + scanner.scan() + + cmd_str = " ".join(captured_cmd) + assert "--exclude-rule org.no-innerhtml" in cmd_str + assert "--exclude-rule org.no-eval" not in cmd_str