Skip to content

Commit 4f1c5a8

Browse files
shivasuryaclaude
andauthored
refactor: Extract builder logic to builder/ package (#7) (#378)
## Summary This PR extracts ~1050 LOC of call graph builder logic from `builder.go` into a dedicated `builder/` package for better modularity and organization. ## Changes ### New Files Created 1. **builder/cache.go** (100 LOC) - ImportMapCache for thread-safe import map caching - Comprehensive tests with concurrent access validation 2. **builder/builder.go** (800 LOC) - BuildCallGraph - main orchestration function - indexFunctions, getFunctionsInFile, findContainingFunction - resolveCallTarget - core resolution logic with type inference - validateStdlibFQN, validateFQN - validation functions - detectPythonVersion - Python version detection - All functions have public wrappers for external use 3. **builder/helpers.go** (50 LOC) - ReadFileBytes - file reading utility - FindFunctionAtLine - AST traversal for function lookup 4. **builder/taint.go** (80 LOC) - GenerateTaintSummaries - taint analysis (Pass 5) 5. **builder/integration.go** (50 LOC) - BuildCallGraphFromPath - convenience function for 3-pass build 6. **builder/doc.go** (60 LOC) - Package documentation with usage examples ### Files Modified 1. **graph/callgraph/builder.go** - Backward compatibility layer - Type aliases for ImportMapCache - Wrapper functions delegating to builder package - All wrappers marked as Deprecated with migration path 2. **graph/callgraph/integration.go** - Simplified - Now uses builder.BuildCallGraphFromPath - Maintains same public API 3. **graph/callgraph/python_version_detector.go** - Simplified - Delegates to builder.DetectPythonVersion ### Files Removed 1. **cache_test.go** - Moved to builder/cache_test.go 2. **python_version_detector_test.go** - Tests moved to builder package ## Testing ✅ All tests pass (18 packages) ✅ Build succeeds (gradle buildGo) ✅ Lint passes (0 issues) ✅ Zero breaking changes - backward compatibility maintained ## Architecture The builder package now contains all call graph construction logic: - Pass 1: Module registry (registry package) - Pass 2: Import extraction (resolution package) - Pass 3: Call graph building (builder package) ← This PR - Pass 4: Type inference integration - Pass 5: Taint analysis ## Dependencies - Parent PR: #6 (Patterns Package) - Stack: refactor/01-foundation-types → refactor/02-infrastructure-core → refactor/03-stdlib-taint → refactor/04-ast-extraction → refactor/05-advanced-resolution → refactor/06-patterns → **refactor/07-builder** (this PR) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent d0c9e09 commit 4f1c5a8

File tree

16 files changed

+2182
-1719
lines changed

16 files changed

+2182
-1719
lines changed

sourcecode-parser/graph/callgraph/builder.go

Lines changed: 43 additions & 1056 deletions
Large diffs are not rendered by default.

sourcecode-parser/graph/callgraph/builder/builder.go

Lines changed: 1011 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
package builder
2+
3+
import (
4+
"os"
5+
"path/filepath"
6+
"testing"
7+
8+
"github.com/shivasurya/code-pathfinder/sourcecode-parser/graph"
9+
"github.com/shivasurya/code-pathfinder/sourcecode-parser/graph/callgraph/core"
10+
"github.com/shivasurya/code-pathfinder/sourcecode-parser/graph/callgraph/registry"
11+
"github.com/stretchr/testify/assert"
12+
"github.com/stretchr/testify/require"
13+
)
14+
15+
func TestBuildCallGraph(t *testing.T) {
16+
// Create a temporary project
17+
tmpDir := t.TempDir()
18+
19+
mainPy := filepath.Join(tmpDir, "main.py")
20+
err := os.WriteFile(mainPy, []byte(`
21+
def greet(name):
22+
return f"Hello, {name}"
23+
24+
def main():
25+
message = greet("World")
26+
print(message)
27+
`), 0644)
28+
require.NoError(t, err)
29+
30+
// Parse project
31+
codeGraph := graph.Initialize(tmpDir)
32+
33+
// Build module registry
34+
moduleRegistry, err := registry.BuildModuleRegistry(tmpDir)
35+
require.NoError(t, err)
36+
37+
// Build call graph
38+
callGraph, err := BuildCallGraph(codeGraph, moduleRegistry, tmpDir)
39+
require.NoError(t, err)
40+
assert.NotNil(t, callGraph)
41+
42+
// Verify functions were indexed
43+
assert.NotEmpty(t, callGraph.Functions)
44+
45+
// Verify edges exist
46+
assert.NotNil(t, callGraph.Edges)
47+
48+
// Verify reverse edges exist
49+
assert.NotNil(t, callGraph.ReverseEdges)
50+
}
51+
52+
func TestIndexFunctions(t *testing.T) {
53+
// Create a temporary project
54+
tmpDir := t.TempDir()
55+
56+
mainPy := filepath.Join(tmpDir, "test.py")
57+
err := os.WriteFile(mainPy, []byte(`
58+
def func1():
59+
pass
60+
61+
def func2():
62+
pass
63+
64+
class MyClass:
65+
def method1(self):
66+
pass
67+
`), 0644)
68+
require.NoError(t, err)
69+
70+
// Parse project
71+
codeGraph := graph.Initialize(tmpDir)
72+
73+
// Build module registry
74+
moduleRegistry, err := registry.BuildModuleRegistry(tmpDir)
75+
require.NoError(t, err)
76+
77+
// Create call graph and index functions
78+
callGraph := core.NewCallGraph()
79+
IndexFunctions(codeGraph, callGraph, moduleRegistry)
80+
81+
// Verify functions were indexed
82+
assert.NotEmpty(t, callGraph.Functions)
83+
84+
// Count functions/methods
85+
functionCount := 0
86+
for _, node := range callGraph.Functions {
87+
if node.Type == "function_definition" || node.Type == "method_declaration" {
88+
functionCount++
89+
}
90+
}
91+
assert.GreaterOrEqual(t, functionCount, 3, "Should have at least 3 functions/methods")
92+
}
93+
94+
func TestGetFunctionsInFile(t *testing.T) {
95+
// Create a temporary file
96+
tmpDir := t.TempDir()
97+
testFile := filepath.Join(tmpDir, "test.py")
98+
99+
err := os.WriteFile(testFile, []byte(`
100+
def func1():
101+
pass
102+
103+
def func2():
104+
pass
105+
`), 0644)
106+
require.NoError(t, err)
107+
108+
// Parse file
109+
codeGraph := graph.Initialize(tmpDir)
110+
111+
// Get functions in file
112+
functions := GetFunctionsInFile(codeGraph, testFile)
113+
114+
// Verify functions were found
115+
assert.NotEmpty(t, functions)
116+
assert.GreaterOrEqual(t, len(functions), 2, "Should find at least 2 functions")
117+
}
118+
119+
func TestFindContainingFunction(t *testing.T) {
120+
// Create a temporary project
121+
tmpDir := t.TempDir()
122+
123+
testFile := filepath.Join(tmpDir, "test.py")
124+
err := os.WriteFile(testFile, []byte(`
125+
def outer_function():
126+
x = 1
127+
y = 2
128+
return x + y
129+
`), 0644)
130+
require.NoError(t, err)
131+
132+
// Parse file
133+
codeGraph := graph.Initialize(tmpDir)
134+
135+
// Get functions
136+
functions := GetFunctionsInFile(codeGraph, testFile)
137+
require.NotEmpty(t, functions)
138+
139+
// Test finding containing function for a location inside the function
140+
location := core.Location{
141+
File: testFile,
142+
Line: 3,
143+
Column: 5, // Inside function body
144+
}
145+
146+
modulePath := "test"
147+
containingFQN := FindContainingFunction(location, functions, modulePath)
148+
149+
// Should find the outer_function
150+
assert.NotEmpty(t, containingFQN)
151+
assert.Contains(t, containingFQN, "outer_function")
152+
}
153+
154+
func TestFindContainingFunction_ModuleLevel(t *testing.T) {
155+
// Create a temporary project
156+
tmpDir := t.TempDir()
157+
158+
testFile := filepath.Join(tmpDir, "test.py")
159+
err := os.WriteFile(testFile, []byte(`
160+
MODULE_VAR = 42
161+
162+
def my_function():
163+
pass
164+
`), 0644)
165+
require.NoError(t, err)
166+
167+
// Parse file
168+
codeGraph := graph.Initialize(tmpDir)
169+
170+
functions := GetFunctionsInFile(codeGraph, testFile)
171+
172+
// Test module-level code (column == 1)
173+
location := core.Location{
174+
File: testFile,
175+
Line: 2,
176+
Column: 1, // Module level
177+
}
178+
179+
modulePath := "test"
180+
containingFQN := FindContainingFunction(location, functions, modulePath)
181+
182+
// Should return empty for module-level code
183+
assert.Empty(t, containingFQN)
184+
}
185+
186+
func TestValidateFQN(t *testing.T) {
187+
moduleRegistry := core.NewModuleRegistry()
188+
189+
// Add a test module
190+
moduleRegistry.Modules["mymodule"] = "/path/to/mymodule.py"
191+
moduleRegistry.FileToModule["/path/to/mymodule.py"] = "mymodule"
192+
193+
tests := []struct {
194+
name string
195+
fqn string
196+
expected bool
197+
}{
198+
{"Valid module FQN", "mymodule.func", true},
199+
{"Invalid module FQN", "unknownmodule.func", false},
200+
{"Empty FQN", "", false},
201+
{"Valid module name without dot", "mymodule", true},
202+
}
203+
204+
for _, tt := range tests {
205+
t.Run(tt.name, func(t *testing.T) {
206+
result := ValidateFQN(tt.fqn, moduleRegistry)
207+
assert.Equal(t, tt.expected, result)
208+
})
209+
}
210+
}
211+
212+
func TestDetectPythonVersion(t *testing.T) {
213+
// Create a temporary project
214+
tmpDir := t.TempDir()
215+
216+
// Test with .python-version file
217+
pythonVersionFile := filepath.Join(tmpDir, ".python-version")
218+
err := os.WriteFile(pythonVersionFile, []byte("3.11.0\n"), 0644)
219+
require.NoError(t, err)
220+
221+
version := DetectPythonVersion(tmpDir)
222+
assert.NotEmpty(t, version)
223+
assert.Contains(t, version, "3.11")
224+
}
225+
226+
func TestDetectPythonVersion_NoPythonVersionFile(t *testing.T) {
227+
// Create an empty temporary directory
228+
tmpDir := t.TempDir()
229+
230+
// Should fall back to checking pyproject.toml or default
231+
version := DetectPythonVersion(tmpDir)
232+
// Should return a default version or detect from system
233+
assert.NotEmpty(t, version)
234+
}
235+
236+
func TestBuildCallGraph_WithEdges(t *testing.T) {
237+
// Create a project with function calls
238+
tmpDir := t.TempDir()
239+
240+
mainPy := filepath.Join(tmpDir, "main.py")
241+
err := os.WriteFile(mainPy, []byte(`
242+
def helper():
243+
return 42
244+
245+
def caller():
246+
result = helper()
247+
return result
248+
`), 0644)
249+
require.NoError(t, err)
250+
251+
// Parse and build call graph
252+
codeGraph := graph.Initialize(tmpDir)
253+
254+
moduleRegistry, err := registry.BuildModuleRegistry(tmpDir)
255+
require.NoError(t, err)
256+
257+
callGraph, err := BuildCallGraph(codeGraph, moduleRegistry, tmpDir)
258+
require.NoError(t, err)
259+
260+
// Verify edges were created
261+
assert.NotEmpty(t, callGraph.Edges)
262+
263+
// Check that caller has edges to helper
264+
foundEdge := false
265+
for callerFQN, callees := range callGraph.Edges {
266+
if len(callees) > 0 {
267+
foundEdge = true
268+
t.Logf("Function %s calls: %v", callerFQN, callees)
269+
}
270+
}
271+
272+
assert.True(t, foundEdge, "Expected at least one call edge")
273+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package builder
2+
3+
import (
4+
"sync"
5+
6+
"github.com/shivasurya/code-pathfinder/sourcecode-parser/graph/callgraph/core"
7+
"github.com/shivasurya/code-pathfinder/sourcecode-parser/graph/callgraph/resolution"
8+
)
9+
10+
// ImportMapCache provides thread-safe caching of ImportMap instances.
11+
// It prevents redundant import extraction by caching results keyed by file path.
12+
//
13+
// Thread-safety:
14+
// - All methods are safe for concurrent use
15+
// - Uses RWMutex for optimized read-heavy workloads
16+
// - GetOrExtract handles double-checked locking pattern
17+
type ImportMapCache struct {
18+
cache map[string]*core.ImportMap // Maps file path to ImportMap
19+
mu sync.RWMutex // Protects cache map
20+
}
21+
22+
// NewImportMapCache creates a new empty import map cache.
23+
func NewImportMapCache() *ImportMapCache {
24+
return &ImportMapCache{
25+
cache: make(map[string]*core.ImportMap),
26+
}
27+
}
28+
29+
// Get retrieves an ImportMap from the cache if it exists.
30+
//
31+
// Parameters:
32+
// - filePath: absolute path to the Python file
33+
//
34+
// Returns:
35+
// - ImportMap and true if found in cache, nil and false otherwise
36+
func (c *ImportMapCache) Get(filePath string) (*core.ImportMap, bool) {
37+
c.mu.RLock()
38+
defer c.mu.RUnlock()
39+
40+
importMap, ok := c.cache[filePath]
41+
return importMap, ok
42+
}
43+
44+
// Put stores an ImportMap in the cache.
45+
//
46+
// Parameters:
47+
// - filePath: absolute path to the Python file
48+
// - importMap: the extracted ImportMap to cache
49+
func (c *ImportMapCache) Put(filePath string, importMap *core.ImportMap) {
50+
c.mu.Lock()
51+
defer c.mu.Unlock()
52+
53+
c.cache[filePath] = importMap
54+
}
55+
56+
// GetOrExtract retrieves an ImportMap from cache or extracts it if not cached.
57+
// This is the main entry point for using the cache.
58+
//
59+
// Parameters:
60+
// - filePath: absolute path to the Python file
61+
// - sourceCode: file contents (only used if extraction needed)
62+
// - registry: module registry for resolving imports
63+
//
64+
// Returns:
65+
// - ImportMap from cache or newly extracted
66+
// - error if extraction fails (cache misses only)
67+
//
68+
// Thread-safety:
69+
// - Multiple goroutines can safely call GetOrExtract concurrently
70+
// - First caller for a file will extract and cache
71+
// - Subsequent callers will get cached result
72+
func (c *ImportMapCache) GetOrExtract(filePath string, sourceCode []byte, registry *core.ModuleRegistry) (*core.ImportMap, error) {
73+
// Try to get from cache (fast path with read lock)
74+
if importMap, ok := c.Get(filePath); ok {
75+
return importMap, nil
76+
}
77+
78+
// Cache miss - extract imports (expensive operation)
79+
importMap, err := resolution.ExtractImports(filePath, sourceCode, registry)
80+
if err != nil {
81+
return nil, err
82+
}
83+
84+
// Store in cache for future use
85+
c.Put(filePath, importMap)
86+
87+
return importMap, nil
88+
}

0 commit comments

Comments
 (0)