Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cli/internal/discovery/language.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ var ExtensionMap = map[string]string{
".swift": "swift",
".kt": "kotlin",
".scala": "scala",
".sol": "solidity",
".r": "r",
".lua": "lua",
".sh": "bash",
Expand Down
3 changes: 2 additions & 1 deletion doc/LANGUAGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

cix uses tree-sitter (via `github.com/odvcencio/gotreesitter`) to extract semantic chunks (functions, classes, methods, types) from source code. Files in unsupported languages still get indexed via a sliding-window fallback — they're searchable, just without per-symbol granularity.

## Default language set (30)
## Default language set (31)

| ID | gotreesitter factory | Function | Class | Method | Type |
|---|---|:-:|:-:|:-:|:-:|
Expand Down Expand Up @@ -36,6 +36,7 @@ cix uses tree-sitter (via `github.com/odvcencio/gotreesitter`) to extract semant
| `fortran` | `FortranLanguage` | ✓ | ✓ | | |
| `haskell` | `HaskellLanguage` | ✓ | | | ✓ |
| `ocaml` | `OcamlLanguage` | ✓ | ✓ | | ✓ |
| `solidity` | `SolidityLanguage` | ✓ | ✓ | | ✓ |

The exact AST node types per language live in `server/internal/chunker/chunker.go` (`defaultRegistry`). File-extension mapping lives in `server/internal/langdetect/langdetect.go`.

Expand Down
9 changes: 9 additions & 0 deletions server/internal/chunker/chunker.go
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,15 @@ func defaultRegistry() map[string]languageEntry {
},
identifiers: idID("type_identifier"),
},
"solidity": {
factory: grammars.SolidityLanguage,
nodes: map[string][]string{
"function": {"function_definition", "modifier_definition", "constructor_definition", "fallback_receive_definition"},
"class": {"contract_declaration", "library_declaration"},
"type": {"interface_declaration", "struct_declaration", "enum_declaration", "event_definition"},
},
identifiers: idID(),
},
}
}

Expand Down
134 changes: 134 additions & 0 deletions server/internal/chunker/chunker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,124 @@ end
}
}

func TestChunkFile_Solidity(t *testing.T) {
src := `// SPDX-License-Identifier: MIT
pragma solidity ^0.8.0;

interface IERC20 {
function transfer(address to, uint256 amount) external returns (bool);
}

contract Token is IERC20 {
enum State { Active, Paused }

function transfer(address to, uint256 amount) external returns (bool) {
return true;
}
}
`
chunks, _, err := ChunkFile("Token.sol", src, "solidity", 0)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(chunks) == 0 {
t.Fatal("expected chunks from Solidity source")
}
counts := chunkTypeCounts(chunks)
if counts["class"] == 0 {
t.Errorf("expected a class chunk for the contract, got: %v", counts)
}

var names []string
for _, c := range chunks {
if c.SymbolName != nil {
names = append(names, *c.SymbolName)
}
}
if len(names) == 0 {
t.Fatalf("expected named symbols from Solidity source, got none (chunk types: %v)", counts)
}
want := map[string]bool{"Token": false, "transfer": false}
for _, n := range names {
if _, ok := want[n]; ok {
want[n] = true
}
}
for sym, found := range want {
if !found {
t.Errorf("expected symbol %q among extracted symbols %v", sym, names)
}
}
}

// TestRegistry_SolidityAllNodeKindsPresent is a stricter sibling to
// TestRegistry_NodeNamesMatchAST: the generic test passes if *any* one of a
// language's configured node types appears in the AST, which would let a
// grammar rename of e.g. `modifier_definition` slip through unnoticed.
//
// Solidity's registry entry advertises 10 node kinds. This test parses a
// contract that exercises every one of them and fails loudly if any single
// kind is missing from the AST — protecting downstream `cix def` / `cix refs`
// behavior for modifiers, events, constructors, and receive/fallback
// functions, which are easy to overlook in the broader smoke test.
func TestRegistry_SolidityAllNodeKindsPresent(t *testing.T) {
defer Configure(nil)
Configure(nil)

src := `// SPDX-License-Identifier: MIT
pragma solidity ^0.8.0;

interface I { function f() external; }
library L { function g() internal pure returns (uint) { return 1; } }
contract C {
struct S { uint x; }
enum E { A, B }
event Ev(uint x);
modifier m() { _; }
constructor() {}
receive() external payable {}
function f() public {}
}
`

registryMu.RLock()
fn, ok := languageRegistry["solidity"]
nodes := languageNodes["solidity"]
registryMu.RUnlock()
if !ok {
t.Fatal("solidity not in registry")
}
if nodes == nil {
t.Fatal("solidity has no node map")
}

grammar := fn()
if grammar == nil {
t.Fatal("nil solidity grammar")
}

parser := sitter.NewParser(grammar)
tree, err := parser.Parse([]byte(src))
if err != nil {
t.Fatalf("parse error: %v", err)
}
root := tree.RootNode()
if root == nil {
t.Fatal("nil root")
}

seen := map[string]struct{}{}
collectNodeTypes(root, grammar, seen)

for _, types := range nodes {
for _, ty := range types {
if _, found := seen[ty]; !found {
t.Errorf("configured Solidity node type %q not present in AST — grammar rename?", ty)
}
}
}
}

// --- Configure() filtering ---

func TestConfigure_FilterToSubset(t *testing.T) {
Expand Down Expand Up @@ -605,6 +723,22 @@ func TestRegistry_NodeNamesMatchAST(t *testing.T) {
"fortran": "subroutine s\nend subroutine\n",
"haskell": "module M where\n\nf :: Int -> Int\nf x = x\n",
"ocaml": "let f x = x\n",
// Covers every configured kind: contract_declaration, library_declaration,
// interface_declaration, struct_declaration, enum_declaration, event_definition,
// function_definition, modifier_definition, constructor_definition,
// fallback_receive_definition. Guards against grammar renames for any of them.
"solidity": "" +
"interface I { function f() external; }\n" +
"library L { function g() internal pure returns (uint) { return 1; } }\n" +
"contract C {\n" +
" struct S { uint x; }\n" +
" enum E { A, B }\n" +
" event Ev(uint x);\n" +
" modifier m() { _; }\n" +
" constructor() {}\n" +
" receive() external payable {}\n" +
" function f() public {}\n" +
"}\n",
}

for lang, src := range fixtures {
Expand Down
43 changes: 22 additions & 21 deletions server/internal/langdetect/langdetect.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,30 @@ import (
// Ported 1:1 from EXTENSION_MAP in api/app/core/language.py.
var extensionMap = map[string]string{
// Systems / compiled
".py": "python",
".go": "go",
".rs": "rust",
".java": "java",
".c": "c",
".h": "c",
".cpp": "cpp",
".cc": "cpp",
".cxx": "cpp",
".hpp": "cpp",
".cs": "c_sharp",
".py": "python",
".go": "go",
".rs": "rust",
".java": "java",
".c": "c",
".h": "c",
".cpp": "cpp",
".cc": "cpp",
".cxx": "cpp",
".hpp": "cpp",
".cs": "c_sharp",
".swift": "swift",
".kt": "kotlin",
".kts": "kotlin",
".kt": "kotlin",
".kts": "kotlin",
".scala": "scala",
".zig": "zig",
".jl": "julia",
".f90": "fortran",
".f95": "fortran",
".f03": "fortran",
".f": "fortran",
".m": "objc",
".mm": "objc",
".zig": "zig",
".jl": "julia",
".f90": "fortran",
".f95": "fortran",
".f03": "fortran",
".f": "fortran",
".m": "objc",
".mm": "objc",
".sol": "solidity",
// Web / scripting
".ts": "typescript",
".tsx": "tsx",
Expand Down
3 changes: 2 additions & 1 deletion server/internal/langdetect/langdetect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ func TestDetect(t *testing.T) {
{"README.md", "markdown"},
{"unknown.xyz", ""},
{"/some/path/to/main.go", "go"},
{"script.R", "r"}, // uppercase .R
{"script.R", "r"}, // uppercase .R
{"script.sh", "bash"},
{"Token.sol", "solidity"},
{"build.gradle.kts", "kotlin"},
{"app.kts", "kotlin"},
}
Expand Down
Loading