mirror of
https://github.com/sbrow/envr.git
synced 2026-06-27 18:48:33 -04:00
Compare commits
2 Commits
ba647f51c1
...
598c622287
| Author | SHA1 | Date | |
|---|---|---|---|
| 598c622287 | |||
| 1fc5f8280e |
@@ -78,10 +78,10 @@ Key behaviors:
|
|||||||
|
|
||||||
- **Stat avoidance via `dirent.type`** — Uses `core:sys/linux` getdents directly, bypassing `core:os` which calls `openat` + `fstat` per entry. File type comes free from the directory entry.
|
- **Stat avoidance via `dirent.type`** — Uses `core:sys/linux` getdents directly, bypassing `core:os` which calls `openat` + `fstat` per entry. File type comes free from the directory entry.
|
||||||
- **Prune ignored directories** — When a directory matches a gitignore pattern, it is not descended into. Skips potentially thousands of readdir calls.
|
- **Prune ignored directories** — When a directory matches a gitignore pattern, it is not descended into. Skips potentially thousands of readdir calls.
|
||||||
|
- **Parallel traversal** — 8-worker thread pool with shared LIFO queue and futex-based semaphore signaling. 5.4x speedup over serial on home directory.
|
||||||
|
|
||||||
### Future (if needed)
|
### Future (if needed)
|
||||||
|
|
||||||
- Work-stealing parallel traversal (per-thread LIFO deques with batch stealing, like fd)
|
|
||||||
- BufWriter on stdout for large result sets
|
- BufWriter on stdout for large result sets
|
||||||
- Arena allocators for path strings
|
- Arena allocators for path strings
|
||||||
|
|
||||||
@@ -141,7 +141,7 @@ Key behaviors:
|
|||||||
**Goal:** Working tool that finds gitignored files in git repos.
|
**Goal:** Working tool that finds gitignored files in git repos.
|
||||||
|
|
||||||
**Built:**
|
**Built:**
|
||||||
- `walker.odin` — Single-threaded DFS using `core:sys/linux` getdents. Finds repos, reads `.gitignore`, emits gitignored files, recurses into subdirs for nested repos.
|
- `walker.odin` — Parallel DFS using `core:sys/linux` getdents with 8-worker thread pool. Finds repos, reads `.gitignore`, emits gitignored files, recurses into subdirs for nested repos.
|
||||||
- `findr.odin` — Minimal CLI: `findr [dirs...]`, no flags.
|
- `findr.odin` — Minimal CLI: `findr [dirs...]`, no flags.
|
||||||
- `test_env.odin` — Test harness with temp dirs and mock filesystems.
|
- `test_env.odin` — Test harness with temp dirs and mock filesystems.
|
||||||
- `findr_test.odin` — 10 integration tests.
|
- `findr_test.odin` — 10 integration tests.
|
||||||
@@ -150,16 +150,20 @@ Key behaviors:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Phase 3: Parallel Traversal (future)
|
### Phase 3: Parallel Traversal ✅
|
||||||
|
|
||||||
**Goal:** Parallelize directory descent for large trees.
|
**Goal:** Parallelize directory descent for large trees.
|
||||||
|
|
||||||
|
**Result:** Worker pool with shared LIFO queue, 8 threads, futex-based semaphore signaling. 852ms vs 4.57s serial (5.4x speedup) on `~`. Serial code has been removed — parallel is the only implementation.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Phase 4: Benchmark (future)
|
### Phase 4: Benchmark ✅
|
||||||
|
|
||||||
**Goal:** Quantify performance vs fd on large directory trees.
|
**Goal:** Quantify performance vs fd on large directory trees.
|
||||||
|
|
||||||
|
**Result:** findr found 227 gitignored files on `~` in 852ms. fd's double-run (all vs unignored) walked ~1.1M entries. findr's pruning of ignored directories (node_modules, dist, etc.) gives a massive advantage.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Phase 5: Integrate into envr (future)
|
### Phase 5: Integrate into envr (future)
|
||||||
@@ -170,7 +174,7 @@ Key behaviors:
|
|||||||
|
|
||||||
| Risk | Mitigation |
|
| Risk | Mitigation |
|
||||||
|---|---|
|
|---|---|
|
||||||
| Single-threaded may be slow on huge trees | Add threading in Phase 3 after correctness |
|
| Single-threaded may be slow on huge trees | Resolved — parallel traversal implemented (Phase 3) |
|
||||||
| Gitignore edge cases (`**/foo`, `foo/**/bar`) | Comprehensive gitignore_test.odin with spec examples |
|
| Gitignore edge cases (`**/foo`, `foo/**/bar`) | Comprehensive gitignore_test.odin with spec examples |
|
||||||
| dirent.type may be UNKNOWN on some filesystems | Fall back to stat only when type is UNKNOWN |
|
| dirent.type may be UNKNOWN on some filesystems | Fall back to stat only when type is UNKNOWN |
|
||||||
| Missing nested `.env` files in monorepos | Accepted limitation — flat gitignore model |
|
| Missing nested `.env` files in monorepos | Accepted limitation — flat gitignore model |
|
||||||
|
|||||||
@@ -3,54 +3,97 @@ package findr
|
|||||||
import "core:fmt"
|
import "core:fmt"
|
||||||
import "core:os"
|
import "core:os"
|
||||||
import "core:strings"
|
import "core:strings"
|
||||||
|
import "core:sync"
|
||||||
import "core:sys/linux"
|
import "core:sys/linux"
|
||||||
|
import "core:thread"
|
||||||
|
|
||||||
|
THREAD_COUNT :: 8
|
||||||
|
|
||||||
RawEntry :: struct {
|
RawEntry :: struct {
|
||||||
name: string,
|
name: string,
|
||||||
type: linux.Dirent_Type,
|
type: linux.Dirent_Type,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WalkerPool :: struct {
|
||||||
|
queue: [dynamic]string,
|
||||||
|
queue_mutex: sync.Mutex,
|
||||||
|
queue_sema: sync.Atomic_Sema,
|
||||||
|
results: ^[dynamic]string,
|
||||||
|
results_mutex: sync.Mutex,
|
||||||
|
active: i64,
|
||||||
|
done: sync.One_Shot_Event,
|
||||||
|
threads: [dynamic]^thread.Thread,
|
||||||
|
}
|
||||||
|
|
||||||
walk :: proc(root: string, results: ^[dynamic]string) {
|
walk :: proc(root: string, results: ^[dynamic]string) {
|
||||||
walk_dir(root, results)
|
pool := new(WalkerPool)
|
||||||
|
pool.queue = make([dynamic]string)
|
||||||
|
pool.results = results
|
||||||
|
pool.active = 1
|
||||||
|
pool.threads = make([dynamic]^thread.Thread)
|
||||||
|
|
||||||
|
root_clone, _ := strings.clone(root)
|
||||||
|
append(&pool.queue, root_clone)
|
||||||
|
sync.atomic_sema_post(&pool.queue_sema)
|
||||||
|
|
||||||
|
for i in 0 ..< THREAD_COUNT {
|
||||||
|
t := thread.create(walk_worker)
|
||||||
|
t.data = rawptr(pool)
|
||||||
|
t.init_context = context
|
||||||
|
thread.start(t)
|
||||||
|
append(&pool.threads, t)
|
||||||
}
|
}
|
||||||
|
|
||||||
walk_dir :: proc(dir_path: string, results: ^[dynamic]string) {
|
sync.one_shot_event_wait(&pool.done)
|
||||||
cpath := strings.clone_to_cstring(dir_path)
|
|
||||||
if cpath == nil do return
|
|
||||||
defer delete(cpath)
|
|
||||||
|
|
||||||
fd, err := linux.open(cpath, {.DIRECTORY, .CLOEXEC})
|
for _ in 0 ..< THREAD_COUNT {
|
||||||
if err != .NONE do return
|
sync.atomic_sema_post(&pool.queue_sema)
|
||||||
defer linux.close(fd)
|
|
||||||
|
|
||||||
buf: [8192]u8
|
|
||||||
has_git := false
|
|
||||||
|
|
||||||
entries := make([dynamic]RawEntry)
|
|
||||||
defer {
|
|
||||||
for &entry in entries {
|
|
||||||
delete(entry.name)
|
|
||||||
}
|
}
|
||||||
delete(entries)
|
|
||||||
|
for t in pool.threads {
|
||||||
|
thread.destroy(t)
|
||||||
}
|
}
|
||||||
|
delete(pool.threads)
|
||||||
|
for path in pool.queue {
|
||||||
|
delete(path)
|
||||||
|
}
|
||||||
|
delete(pool.queue)
|
||||||
|
free(pool)
|
||||||
|
}
|
||||||
|
|
||||||
|
walk_worker :: proc(t: ^thread.Thread) {
|
||||||
|
pool := cast(^WalkerPool)t.data
|
||||||
|
|
||||||
for {
|
for {
|
||||||
n, errno := linux.getdents(fd, buf[:])
|
sync.atomic_sema_wait(&pool.queue_sema)
|
||||||
if n <= 0 || errno != .NONE do break
|
|
||||||
|
|
||||||
offs := 0
|
sync.mutex_lock(&pool.queue_mutex)
|
||||||
for d in linux.dirent_iterate_buf(buf[:n], &offs) {
|
if len(pool.queue) == 0 {
|
||||||
name := linux.dirent_name(d)
|
sync.mutex_unlock(&pool.queue_mutex)
|
||||||
if name == "." || name == ".." do continue
|
if sync.atomic_load_explicit(&pool.active, .Acquire) == 0 {
|
||||||
|
sync.one_shot_event_signal(&pool.done)
|
||||||
if name == ".git" && d.type == .DIR {
|
|
||||||
has_git = true
|
|
||||||
}
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
last := len(pool.queue) - 1
|
||||||
|
dir_path := pool.queue[last]
|
||||||
|
ordered_remove(&pool.queue, last)
|
||||||
|
sync.mutex_unlock(&pool.queue_mutex)
|
||||||
|
|
||||||
cloned := strings.clone(name)
|
process_dir(pool, dir_path)
|
||||||
append(&entries, RawEntry{name = cloned, type = d.type})
|
delete(dir_path)
|
||||||
|
|
||||||
|
old := sync.atomic_sub_explicit(&pool.active, 1, .Release)
|
||||||
|
if old == 1 {
|
||||||
|
sync.one_shot_event_signal(&pool.done)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process_dir :: proc(pool: ^WalkerPool, dir_path: string) {
|
||||||
|
has_git := false
|
||||||
|
entries := read_dir_entries(dir_path, &has_git)
|
||||||
|
defer free_entries(&entries)
|
||||||
|
|
||||||
if has_git {
|
if has_git {
|
||||||
gi := load_gitignore(dir_path)
|
gi := load_gitignore(dir_path)
|
||||||
@@ -65,33 +108,83 @@ walk_dir :: proc(dir_path: string, results: ^[dynamic]string) {
|
|||||||
if gi != nil && is_ignored(gi, entry.name, is_dir) {
|
if gi != nil && is_ignored(gi, entry.name, is_dir) {
|
||||||
if !is_dir {
|
if !is_dir {
|
||||||
full_path := join_path(dir_path, entry.name)
|
full_path := join_path(dir_path, entry.name)
|
||||||
append(results, full_path)
|
sync.mutex_lock(&pool.results_mutex)
|
||||||
|
append(pool.results, full_path)
|
||||||
|
sync.mutex_unlock(&pool.results_mutex)
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if is_dir {
|
if is_dir {
|
||||||
child_path := join_path(dir_path, entry.name)
|
child_path := join_path(dir_path, entry.name)
|
||||||
walk_dir(child_path, results)
|
push_work(pool, child_path)
|
||||||
delete(child_path)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for entry in entries {
|
for entry in entries {
|
||||||
if entry.type == .DIR {
|
if entry.type == .DIR {
|
||||||
child_path := join_path(dir_path, entry.name)
|
child_path := join_path(dir_path, entry.name)
|
||||||
walk_dir(child_path, results)
|
push_work(pool, child_path)
|
||||||
delete(child_path)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
push_work :: proc(pool: ^WalkerPool, path: string) {
|
||||||
|
sync.atomic_add_explicit(&pool.active, 1, .Relaxed)
|
||||||
|
sync.mutex_lock(&pool.queue_mutex)
|
||||||
|
append(&pool.queue, path)
|
||||||
|
sync.mutex_unlock(&pool.queue_mutex)
|
||||||
|
sync.atomic_sema_post(&pool.queue_sema)
|
||||||
|
}
|
||||||
|
|
||||||
|
read_dir_entries :: proc(dir_path: string, has_git: ^bool) -> [dynamic]RawEntry {
|
||||||
|
entries := make([dynamic]RawEntry)
|
||||||
|
|
||||||
|
cpath := strings.clone_to_cstring(dir_path)
|
||||||
|
if cpath == nil do return entries
|
||||||
|
|
||||||
|
fd, err := linux.open(cpath, {.DIRECTORY, .CLOEXEC})
|
||||||
|
delete(cpath)
|
||||||
|
if err != .NONE do return entries
|
||||||
|
|
||||||
|
buf: [8192]u8
|
||||||
|
has_git^ = false
|
||||||
|
|
||||||
|
for {
|
||||||
|
n, errno := linux.getdents(fd, buf[:])
|
||||||
|
if n <= 0 || errno != .NONE do break
|
||||||
|
|
||||||
|
offs := 0
|
||||||
|
for d in linux.dirent_iterate_buf(buf[:n], &offs) {
|
||||||
|
name := linux.dirent_name(d)
|
||||||
|
if name == "." || name == ".." do continue
|
||||||
|
|
||||||
|
if name == ".git" && d.type == .DIR {
|
||||||
|
has_git^ = true
|
||||||
|
}
|
||||||
|
|
||||||
|
cloned := strings.clone(name)
|
||||||
|
append(&entries, RawEntry{name = cloned, type = d.type})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
linux.close(fd)
|
||||||
|
return entries
|
||||||
|
}
|
||||||
|
|
||||||
|
free_entries :: proc(entries: ^[dynamic]RawEntry) {
|
||||||
|
for &entry in entries {
|
||||||
|
delete(entry.name)
|
||||||
|
}
|
||||||
|
delete(entries^)
|
||||||
|
}
|
||||||
|
|
||||||
load_gitignore :: proc(dir_path: string) -> ^Gitignore {
|
load_gitignore :: proc(dir_path: string) -> ^Gitignore {
|
||||||
gi_path := join_path(dir_path, ".gitignore")
|
gi_path := join_path(dir_path, ".gitignore")
|
||||||
defer delete(gi_path)
|
defer delete(gi_path)
|
||||||
|
|
||||||
data, err := os.read_entire_file_from_path(gi_path, context.allocator)
|
data, err := os.read_entire_file_from_path(gi_path, context.allocator)
|
||||||
if err != nil do return nil
|
if err != .NONE do return nil
|
||||||
|
|
||||||
gi := new(Gitignore)
|
gi := new(Gitignore)
|
||||||
gi^ = parse(string(data))
|
gi^ = parse(string(data))
|
||||||
@@ -114,3 +207,4 @@ join_path :: proc(parent, child: string) -> string {
|
|||||||
result, _ := strings.clone(s)
|
result, _ := strings.clone(s)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user