diff --git a/cli/internal/discovery/language.go b/cli/internal/discovery/language.go index ac6e91f..951e4be 100644 --- a/cli/internal/discovery/language.go +++ b/cli/internal/discovery/language.go @@ -27,6 +27,7 @@ var ExtensionMap = map[string]string{ ".swift": "swift", ".kt": "kotlin", ".scala": "scala", + ".sol": "solidity", ".r": "r", ".lua": "lua", ".sh": "bash", diff --git a/doc/LANGUAGES.md b/doc/LANGUAGES.md index 70ef5c4..69164af 100644 --- a/doc/LANGUAGES.md +++ b/doc/LANGUAGES.md @@ -2,7 +2,7 @@ cix uses tree-sitter (via `github.com/odvcencio/gotreesitter`) to extract semantic chunks (functions, classes, methods, types) from source code. Files in unsupported languages still get indexed via a sliding-window fallback — they're searchable, just without per-symbol granularity. -## Default language set (30) +## Default language set (31) | ID | gotreesitter factory | Function | Class | Method | Type | |---|---|:-:|:-:|:-:|:-:| @@ -36,6 +36,7 @@ cix uses tree-sitter (via `github.com/odvcencio/gotreesitter`) to extract semant | `fortran` | `FortranLanguage` | ✓ | ✓ | | | | `haskell` | `HaskellLanguage` | ✓ | | | ✓ | | `ocaml` | `OcamlLanguage` | ✓ | ✓ | | ✓ | +| `solidity` | `SolidityLanguage` | ✓ | ✓ | | ✓ | The exact AST node types per language live in `server/internal/chunker/chunker.go` (`defaultRegistry`). File-extension mapping lives in `server/internal/langdetect/langdetect.go`. diff --git a/server/internal/chunker/chunker.go b/server/internal/chunker/chunker.go index b5e34a2..f8768b0 100644 --- a/server/internal/chunker/chunker.go +++ b/server/internal/chunker/chunker.go @@ -432,6 +432,15 @@ func defaultRegistry() map[string]languageEntry { }, identifiers: idID("type_identifier"), }, + "solidity": { + factory: grammars.SolidityLanguage, + nodes: map[string][]string{ + "function": {"function_definition", "modifier_definition", "constructor_definition", "fallback_receive_definition"}, + "class": {"contract_declaration", "library_declaration"}, + "type": {"interface_declaration", "struct_declaration", "enum_declaration", "event_definition"}, + }, + identifiers: idID(), + }, } } diff --git a/server/internal/chunker/chunker_test.go b/server/internal/chunker/chunker_test.go index 15310c5..78463f8 100644 --- a/server/internal/chunker/chunker_test.go +++ b/server/internal/chunker/chunker_test.go @@ -475,6 +475,124 @@ end } } +func TestChunkFile_Solidity(t *testing.T) { + src := `// SPDX-License-Identifier: MIT +pragma solidity ^0.8.0; + +interface IERC20 { + function transfer(address to, uint256 amount) external returns (bool); +} + +contract Token is IERC20 { + enum State { Active, Paused } + + function transfer(address to, uint256 amount) external returns (bool) { + return true; + } +} +` + chunks, _, err := ChunkFile("Token.sol", src, "solidity", 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(chunks) == 0 { + t.Fatal("expected chunks from Solidity source") + } + counts := chunkTypeCounts(chunks) + if counts["class"] == 0 { + t.Errorf("expected a class chunk for the contract, got: %v", counts) + } + + var names []string + for _, c := range chunks { + if c.SymbolName != nil { + names = append(names, *c.SymbolName) + } + } + if len(names) == 0 { + t.Fatalf("expected named symbols from Solidity source, got none (chunk types: %v)", counts) + } + want := map[string]bool{"Token": false, "transfer": false} + for _, n := range names { + if _, ok := want[n]; ok { + want[n] = true + } + } + for sym, found := range want { + if !found { + t.Errorf("expected symbol %q among extracted symbols %v", sym, names) + } + } +} + +// TestRegistry_SolidityAllNodeKindsPresent is a stricter sibling to +// TestRegistry_NodeNamesMatchAST: the generic test passes if *any* one of a +// language's configured node types appears in the AST, which would let a +// grammar rename of e.g. `modifier_definition` slip through unnoticed. +// +// Solidity's registry entry advertises 10 node kinds. This test parses a +// contract that exercises every one of them and fails loudly if any single +// kind is missing from the AST — protecting downstream `cix def` / `cix refs` +// behavior for modifiers, events, constructors, and receive/fallback +// functions, which are easy to overlook in the broader smoke test. +func TestRegistry_SolidityAllNodeKindsPresent(t *testing.T) { + defer Configure(nil) + Configure(nil) + + src := `// SPDX-License-Identifier: MIT +pragma solidity ^0.8.0; + +interface I { function f() external; } +library L { function g() internal pure returns (uint) { return 1; } } +contract C { + struct S { uint x; } + enum E { A, B } + event Ev(uint x); + modifier m() { _; } + constructor() {} + receive() external payable {} + function f() public {} +} +` + + registryMu.RLock() + fn, ok := languageRegistry["solidity"] + nodes := languageNodes["solidity"] + registryMu.RUnlock() + if !ok { + t.Fatal("solidity not in registry") + } + if nodes == nil { + t.Fatal("solidity has no node map") + } + + grammar := fn() + if grammar == nil { + t.Fatal("nil solidity grammar") + } + + parser := sitter.NewParser(grammar) + tree, err := parser.Parse([]byte(src)) + if err != nil { + t.Fatalf("parse error: %v", err) + } + root := tree.RootNode() + if root == nil { + t.Fatal("nil root") + } + + seen := map[string]struct{}{} + collectNodeTypes(root, grammar, seen) + + for _, types := range nodes { + for _, ty := range types { + if _, found := seen[ty]; !found { + t.Errorf("configured Solidity node type %q not present in AST — grammar rename?", ty) + } + } + } +} + // --- Configure() filtering --- func TestConfigure_FilterToSubset(t *testing.T) { @@ -605,6 +723,22 @@ func TestRegistry_NodeNamesMatchAST(t *testing.T) { "fortran": "subroutine s\nend subroutine\n", "haskell": "module M where\n\nf :: Int -> Int\nf x = x\n", "ocaml": "let f x = x\n", + // Covers every configured kind: contract_declaration, library_declaration, + // interface_declaration, struct_declaration, enum_declaration, event_definition, + // function_definition, modifier_definition, constructor_definition, + // fallback_receive_definition. Guards against grammar renames for any of them. + "solidity": "" + + "interface I { function f() external; }\n" + + "library L { function g() internal pure returns (uint) { return 1; } }\n" + + "contract C {\n" + + " struct S { uint x; }\n" + + " enum E { A, B }\n" + + " event Ev(uint x);\n" + + " modifier m() { _; }\n" + + " constructor() {}\n" + + " receive() external payable {}\n" + + " function f() public {}\n" + + "}\n", } for lang, src := range fixtures { diff --git a/server/internal/langdetect/langdetect.go b/server/internal/langdetect/langdetect.go index 4568fdf..eef4755 100644 --- a/server/internal/langdetect/langdetect.go +++ b/server/internal/langdetect/langdetect.go @@ -12,29 +12,30 @@ import ( // Ported 1:1 from EXTENSION_MAP in api/app/core/language.py. var extensionMap = map[string]string{ // Systems / compiled - ".py": "python", - ".go": "go", - ".rs": "rust", - ".java": "java", - ".c": "c", - ".h": "c", - ".cpp": "cpp", - ".cc": "cpp", - ".cxx": "cpp", - ".hpp": "cpp", - ".cs": "c_sharp", + ".py": "python", + ".go": "go", + ".rs": "rust", + ".java": "java", + ".c": "c", + ".h": "c", + ".cpp": "cpp", + ".cc": "cpp", + ".cxx": "cpp", + ".hpp": "cpp", + ".cs": "c_sharp", ".swift": "swift", - ".kt": "kotlin", - ".kts": "kotlin", + ".kt": "kotlin", + ".kts": "kotlin", ".scala": "scala", - ".zig": "zig", - ".jl": "julia", - ".f90": "fortran", - ".f95": "fortran", - ".f03": "fortran", - ".f": "fortran", - ".m": "objc", - ".mm": "objc", + ".zig": "zig", + ".jl": "julia", + ".f90": "fortran", + ".f95": "fortran", + ".f03": "fortran", + ".f": "fortran", + ".m": "objc", + ".mm": "objc", + ".sol": "solidity", // Web / scripting ".ts": "typescript", ".tsx": "tsx", diff --git a/server/internal/langdetect/langdetect_test.go b/server/internal/langdetect/langdetect_test.go index 27e94f7..defe8fb 100644 --- a/server/internal/langdetect/langdetect_test.go +++ b/server/internal/langdetect/langdetect_test.go @@ -33,8 +33,9 @@ func TestDetect(t *testing.T) { {"README.md", "markdown"}, {"unknown.xyz", ""}, {"/some/path/to/main.go", "go"}, - {"script.R", "r"}, // uppercase .R + {"script.R", "r"}, // uppercase .R {"script.sh", "bash"}, + {"Token.sol", "solidity"}, {"build.gradle.kts", "kotlin"}, {"app.kts", "kotlin"}, }