From 1fc5f8280e4a67b67bc14c08eb50ec4334e09cda Mon Sep 17 00:00:00 2001 From: Spencer Brower Date: Tue, 16 Jun 2026 20:57:38 -0400 Subject: [PATCH] feat: Replaced `fd` with custom internals. --- .gitignore | 1 + TODOS.md | 2 - findr/PLAN.md | 177 ++++++++++++++++++++++++++ findr/findr.odin | 33 +++++ findr/findr_test.odin | 160 +++++++++++++++++++++++ findr/gitignore.odin | 182 ++++++++++++++++++++++++++ findr/gitignore_test.odin | 178 ++++++++++++++++++++++++++ findr/test_env.odin | 147 +++++++++++++++++++++ findr/walker.odin | 260 ++++++++++++++++++++++++++++++++++++++ flake.nix | 3 + 10 files changed, 1141 insertions(+), 2 deletions(-) create mode 100644 findr/PLAN.md create mode 100644 findr/findr.odin create mode 100644 findr/findr_test.odin create mode 100644 findr/gitignore.odin create mode 100644 findr/gitignore_test.odin create mode 100644 findr/test_env.odin create mode 100644 findr/walker.odin diff --git a/.gitignore b/.gitignore index f49b1b1..7c02395 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,6 @@ man builds envr envr-go +findr/findr result version.odin diff --git a/TODOS.md b/TODOS.md index eb3b7a5..59e58d8 100644 --- a/TODOS.md +++ b/TODOS.md @@ -32,8 +32,6 @@ 18. 2 scan tests silently skip when fd isn't installed, tests pass without actually testing anything. These should use #assert to be sure that fd is in path. -19. Try to do all encryption / decryption in memory - only read / write encrypted data to disk. - 20. add --format -f flag to commands that draw tables. 21. Replace `testing.expect` calls with `testing.expect_value` calls where appropriate. diff --git a/findr/PLAN.md b/findr/PLAN.md new file mode 100644 index 0000000..b33963f --- /dev/null +++ b/findr/PLAN.md @@ -0,0 +1,177 @@ +# findr — Gitignored File Finder + +## Overview + +findr is a native Odin tool that finds **gitignored files** within git repositories. It replaces envr's current approach of running `fd` twice (all files vs. unignored files) and diffing the results. + +**Simplified scope:** findr does one thing — walks directories, finds git repos, reads each repo's `.gitignore`, and prints every gitignored file. No flags, no filtering, no pattern matching. envr handles result filtering itself. + +## Current fd Usage in envr (being replaced) + +1. **`scan.odin:13-43`** (`scan_path`) — runs `fd` twice per search path: + - Run 1: `fd -a [-E ]... -HI ` → all files including gitignored + - Run 2: `fd -a [-E ]... -H ` → hidden but NOT gitignored + - Diff = gitignored files only +2. Both go through `run_fd` (`scan.odin:68-118`), which spawns a subprocess and captures output via temp files. + +After findr integration, `scan_path` calls `findr.walk(path)` directly — no subprocess, no double-run, no diff. + +## Directory Structure + +``` +findr/ + findr.odin # main + CLI (positional dir args only) + walker.odin # recursive directory walker using core:sys/linux getdents + gitignore.odin # .gitignore parsing + glob→regex transpilation + matching + test_env.odin # test harness: temp dir, mock filesystem, assert helpers + findr_test.odin # integration tests (10 tests) + gitignore_test.odin # transpilation + matching unit tests (22 tests) +``` + +## Decisions + +- **Scope**: findr prints ALL gitignored files. No regex filtering, no exclude patterns, no type filters. envr post-processes the output. +- **Gitignore matching**: Transpile gitignore glob patterns to regex, then use `core:text/regex`. No dedicated glob matcher. +- **Stat avoidance**: Use `core:sys/linux` getdents directly — read `dirent.type` from the kernel, never call stat. +- **Architecture**: Separate directory with its own `main`. Core logic (`walk` proc + `gitignore` package) designed to be importable into envr later. + +## CLI Interface + +``` +findr [dir1] [dir2] ... +``` + +No flags. Defaults to `.` if no dirs given. Prints absolute or relative paths (as given) to stdout, one per line. + +## Build + +```bash +odin build findr -o:speed -out:findr/findr +``` + +## How It Works + +``` +walk(dir): + entries = getdents(dir) # via core:sys/linux, zero stat calls + if entries contains ".git/": + gi = parse(.gitignore) # if present + for entry in entries: + if entry is gitignored file: + emit entry path + if entry is dir (not ignored): + walk(entry) # recurse to find nested repos + else: + for entry in entries: + if entry is dir: + walk(entry) # descend looking for repos +``` + +Key behaviors: +- **Nested repos**: When a repo is found, subdirectories are still traversed to find nested repos. Gitignored directories are pruned (not descended into). +- **Flat gitignore**: Only the root `.gitignore` is read. `.gitignore` files in subdirectories of a repo are ignored. +- **Non-repo dirs**: Traversed recursively to find repos. No gitignore rules apply. + +## Performance Architecture + +### Implemented + +- **Stat avoidance via `dirent.type`** — Uses `core:sys/linux` getdents directly, bypassing `core:os` which calls `openat` + `fstat` per entry. File type comes free from the directory entry. +- **Prune ignored directories** — When a directory matches a gitignore pattern, it is not descended into. Skips potentially thousands of readdir calls. + +### Future (if needed) + +- Work-stealing parallel traversal (per-thread LIFO deques with batch stealing, like fd) +- BufWriter on stdout for large result sets +- Arena allocators for path strings + +## Testing Strategy + +- **In-process integration tests** — Tests call `walk()` directly (not via subprocess), build mock filesystems in temp dirs, and compare sorted output. +- **Unit tests** — Pure-function tests for glob→regex transpilation and gitignore matching. +- **Output sorting for determinism** — Always sort output lines before comparison. +- **Memory tracking** — Odin's test runner reports leaks automatically. All 32 tests pass with zero leaks. + +### Test Coverage (findr_test.odin) + +| Test | What it covers | +|---|---| +| `test_basic_gitignored` | Repo with `.gitignore`, gitignored files emitted, normal files skipped | +| `test_non_repo_not_scanned` | Dirs without `.git/` produce no output | +| `test_negation_pattern` | `!prod.env` un-ignores a file | +| `test_dir_only_pattern` | `node_modules/` pattern doesn't emit file results | +| `test_multiple_repos` | Multiple repos in one tree, each with its own `.gitignore` | +| `test_nested_repos` | Repo inside a repo, both scanned independently | +| `test_gitignore_in_subdir_ignored` | Subdirectory `.gitignore` files are not read | +| `test_no_gitignore_file` | Repo with `.git/` but no `.gitignore` produces nothing | +| `test_empty_gitignore` | Comments and blank lines only → no results | +| `test_multiple_search_dirs` | Multiple top-level search dirs in one call | + +### Gitignore Unit Tests (gitignore_test.odin) + +22 tests covering: simple/anchored patterns, `*`, `?`, `[abc]`, `[!abc]`, dot escaping, globstar variants, backslash escapes, empty patterns, basic matching, negation, dir-only, comments, blank lines, last-match-wins, env patterns. + +## Glob→Regex Transpilation Rules + +| Gitignore pattern | Regex | Notes | +|---|---|---| +| `foo` | `(^|/)foo(/.*)?$` | matches at any depth | +| `/foo` | `^foo(/.*)?$` | anchored to gitignore dir | +| `foo/` | `(^|/)foo/.*$` | directory only | +| `*.log` | `(^|/)[^/]*\.log$` | `*` = any chars except `/` | +| `**/foo` | `(^|/)(.*/)?foo(/.*)?$` | `**` = any chars including `/` | +| `foo/**/bar` | `(^|/)foo/(.*/)?bar(/.*)?$` | `**` between segments | +| `!pattern` | (handled by layer) | negation flag, not regex | +| `#comment` | (skipped) | | +| `[abc]` | `[abc]` | same regex syntax | +| `?` | `[^/]` | single char, no `/` | + +## Implementation Phases + +### Phase 1: Gitignore Transpiler + Tests ✅ + +**Goal:** Isolated, fully-tested glob→regex transpiler. + +**Result:** 22 tests, all passing, zero leaks. + +--- + +### Phase 2: findr Walker + Tests ✅ + +**Goal:** Working tool that finds gitignored files in git repos. + +**Built:** +- `walker.odin` — Single-threaded DFS using `core:sys/linux` getdents. Finds repos, reads `.gitignore`, emits gitignored files, recurses into subdirs for nested repos. +- `findr.odin` — Minimal CLI: `findr [dirs...]`, no flags. +- `test_env.odin` — Test harness with temp dirs and mock filesystems. +- `findr_test.odin` — 10 integration tests. + +**Result:** All 32 tests pass (22 gitignore + 10 walker), zero leaks. + +--- + +### Phase 3: Parallel Traversal (future) + +**Goal:** Parallelize directory descent for large trees. + +--- + +### Phase 4: Benchmark (future) + +**Goal:** Quantify performance vs fd on large directory trees. + +--- + +### Phase 5: Integrate into envr (future) + +**Goal:** Replace `run_fd` in `scan.odin`. `scan_path` calls `findr.walk()` directly instead of two subprocess runs + diff. + +## Risks + +| Risk | Mitigation | +|---|---| +| Single-threaded may be slow on huge trees | Add threading in Phase 3 after correctness | +| Gitignore edge cases (`**/foo`, `foo/**/bar`) | Comprehensive gitignore_test.odin with spec examples | +| dirent.type may be UNKNOWN on some filesystems | Fall back to stat only when type is UNKNOWN | +| Missing nested `.env` files in monorepos | Accepted limitation — flat gitignore model | +| Memory allocation churn from path strings | Use thread-local arena allocators in Phase 3 | diff --git a/findr/findr.odin b/findr/findr.odin new file mode 100644 index 0000000..3221745 --- /dev/null +++ b/findr/findr.odin @@ -0,0 +1,33 @@ +package findr + +import "core:fmt" +import "core:os" + +main :: proc() { + args := os.args + + search_dirs := make([dynamic]string) + defer delete(search_dirs) + + for i in 1.. bool { + switch c { + case '.', '+', '(', ')', '{', '}', '^', '$', '|': + return true + } + return false +} + +glob_to_regex :: proc(pattern: string, anchored: bool) -> string { + sb: strings.Builder + strings.builder_init(&sb) + defer strings.builder_destroy(&sb) + + if anchored { + fmt.sbprintf(&sb, "^") + } else { + fmt.sbprintf(&sb, "(^|/)") + } + + i := 0 + for i < len(pattern) { + c := pattern[i] + + if c == '*' { + if i + 1 < len(pattern) && pattern[i + 1] == '*' { + prev_slash := i == 0 || pattern[i - 1] == '/' + at_end := i + 2 >= len(pattern) + next_slash := !at_end && pattern[i + 2] == '/' + + if prev_slash && (next_slash || at_end) { + if next_slash { + i += 3 + fmt.sbprintf(&sb, "(.*/)?") + } else { + i += 2 + fmt.sbprintf(&sb, ".*") + } + } else { + fmt.sbprintf(&sb, "[^/]*") + i += 2 + } + } else { + fmt.sbprintf(&sb, "[^/]*") + i += 1 + } + } else if c == '?' { + fmt.sbprintf(&sb, "[^/]") + i += 1 + } else if c == '[' { + append(&sb.buf, '[') + i += 1 + if i < len(pattern) && pattern[i] == '!' { + append(&sb.buf, '^') + i += 1 + } + if i < len(pattern) && pattern[i] == ']' { + append(&sb.buf, ']') + i += 1 + } + for i < len(pattern) && pattern[i] != ']' { + append(&sb.buf, pattern[i]) + i += 1 + } + if i < len(pattern) { + append(&sb.buf, ']') + i += 1 + } + } else if c == '\\' { + i += 1 + if i < len(pattern) { + if is_regex_meta(pattern[i]) { + append(&sb.buf, '\\') + } + append(&sb.buf, pattern[i]) + i += 1 + } + } else if is_regex_meta(c) { + append(&sb.buf, '\\') + append(&sb.buf, c) + i += 1 + } else { + append(&sb.buf, c) + i += 1 + } + } + + fmt.sbprintf(&sb, "(/.*)?$") + + s := strings.to_string(sb) + result, _ := strings.clone(s) + return result +} + +Rule :: struct { + regex: regex.Regular_Expression, + negated: bool, + dir_only: bool, +} + +Gitignore :: struct { + rules: [dynamic]Rule, +} + +parse :: proc(content: string) -> Gitignore { + gi: Gitignore + gi.rules = make([dynamic]Rule) + + remaining := content + for { + line, ok := strings.split_lines_iterator(&remaining) + if !ok do break + + s := strings.trim_space(line) + if len(s) == 0 do continue + if s[0] == '#' do continue + + negated := false + if s[0] == '!' { + negated = true + s = s[1:] + } + + if len(s) > 0 && s[0] == '\\' { + if len(s) > 1 && (s[1] == '#' || s[1] == '!') { + s = s[1:] + } + } + + dir_only := false + if len(s) > 0 && s[len(s) - 1] == '/' { + dir_only = true + s = s[:len(s) - 1] + } + + anchored := false + if len(s) > 0 && s[0] == '/' { + anchored = true + s = s[1:] + } + + if len(s) == 0 do continue + + regex_str := glob_to_regex(s, anchored) + re, err := regex.create(regex_str, {regex.Flag.No_Capture}) + delete(regex_str) + if err != nil do continue + + append(&gi.rules, Rule{ + regex = re, + negated = negated, + dir_only = dir_only, + }) + } + + return gi +} + +is_ignored :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> bool { + matched := false + for rule in gi.rules { + if rule.dir_only && !is_dir do continue + cap, ok := regex.match(rule.regex, path) + regex.destroy(cap) + if ok { + matched = !rule.negated + } + } + return matched +} + +destroy :: proc(gi: ^Gitignore) { + for rule in gi.rules { + regex.destroy(rule.regex) + } + delete(gi.rules) +} diff --git a/findr/gitignore_test.odin b/findr/gitignore_test.odin new file mode 100644 index 0000000..db36aa4 --- /dev/null +++ b/findr/gitignore_test.odin @@ -0,0 +1,178 @@ +package findr + +import "core:testing" + +@(test) +test_glob_simple :: proc(t: ^testing.T) { + result := glob_to_regex("foo", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)foo(/.*)?$") +} + +@(test) +test_glob_anchored :: proc(t: ^testing.T) { + result := glob_to_regex("foo", true) + defer delete(result) + testing.expect_value(t, result, "^foo(/.*)?$") +} + +@(test) +test_glob_star :: proc(t: ^testing.T) { + result := glob_to_regex("*.log", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)[^/]*\\.log(/.*)?$") +} + +@(test) +test_glob_question :: proc(t: ^testing.T) { + result := glob_to_regex("?.log", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)[^/]\\.log(/.*)?$") +} + +@(test) +test_glob_char_class :: proc(t: ^testing.T) { + result := glob_to_regex("[abc].log", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)[abc]\\.log(/.*)?$") +} + +@(test) +test_glob_negated_class :: proc(t: ^testing.T) { + result := glob_to_regex("[!abc].log", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)[^abc]\\.log(/.*)?$") +} + +@(test) +test_glob_dot_escaped :: proc(t: ^testing.T) { + result := glob_to_regex(".env", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)\\.env(/.*)?$") +} + +@(test) +test_glob_globstar_prefix :: proc(t: ^testing.T) { + result := glob_to_regex("**/foo", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)(.*/)?foo(/.*)?$") +} + +@(test) +test_glob_globstar_suffix :: proc(t: ^testing.T) { + result := glob_to_regex("abc/**", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)abc/.*(/.*)?$") +} + +@(test) +test_glob_globstar_middle :: proc(t: ^testing.T) { + result := glob_to_regex("foo/**/bar", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)foo/(.*/)?bar(/.*)?$") +} + +@(test) +test_glob_backslash_escape :: proc(t: ^testing.T) { + result := glob_to_regex("\\!foo", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)!foo(/.*)?$") +} + +@(test) +test_glob_empty :: proc(t: ^testing.T) { + result := glob_to_regex("", false) + defer delete(result) + testing.expect_value(t, result, "(^|/)(/.*)?$") +} + +@(test) +test_is_ignored_basic :: proc(t: ^testing.T) { + gi := parse("*.env\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), true) + testing.expect_value(t, is_ignored(&gi, "foo.env", false), true) + testing.expect_value(t, is_ignored(&gi, ".env.local", false), false) + testing.expect_value(t, is_ignored(&gi, "config.yaml", false), false) +} + +@(test) +test_is_ignored_negation :: proc(t: ^testing.T) { + gi := parse("*.env\n!.env.production\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), true) + testing.expect_value(t, is_ignored(&gi, ".env.production", false), false) +} + +@(test) +test_is_ignored_dir_only :: proc(t: ^testing.T) { + gi := parse("node_modules/\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "node_modules", true), true) + testing.expect_value(t, is_ignored(&gi, "node_modules", false), false) +} + +@(test) +test_is_ignored_anchored :: proc(t: ^testing.T) { + gi := parse("/secret.key\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "secret.key", false), true) +} + +@(test) +test_is_ignored_comments_skipped :: proc(t: ^testing.T) { + gi := parse("# this is a comment\n#another\n*.tmp\n") + defer destroy(&gi) + + testing.expect_value(t, len(gi.rules), 1) + testing.expect_value(t, is_ignored(&gi, "file.tmp", false), true) +} + +@(test) +test_is_ignored_blank_lines_skipped :: proc(t: ^testing.T) { + gi := parse("\n\n \n*.log\n\n") + defer destroy(&gi) + + testing.expect_value(t, len(gi.rules), 1) +} + +@(test) +test_is_ignored_last_match_wins :: proc(t: ^testing.T) { + gi := parse("*.env\n!*.env\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), false) +} + +@(test) +test_is_ignored_no_rules :: proc(t: ^testing.T) { + gi := parse("") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "anything", false), false) +} + +@(test) +test_is_ignored_env_pattern :: proc(t: ^testing.T) { + gi := parse(".env*\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), true) + testing.expect_value(t, is_ignored(&gi, ".env.local", false), true) + testing.expect_value(t, is_ignored(&gi, ".envrc", false), true) +} + +@(test) +test_is_ignored_globstar :: proc(t: ^testing.T) { + gi := parse("**/cache\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "cache", false), true) + testing.expect_value(t, is_ignored(&gi, "foo/cache", false), true) + testing.expect_value(t, is_ignored(&gi, "foo/bar/cache", false), true) +} + diff --git a/findr/test_env.odin b/findr/test_env.odin new file mode 100644 index 0000000..feeef18 --- /dev/null +++ b/findr/test_env.odin @@ -0,0 +1,147 @@ +package findr + +import "core:fmt" +import "core:log" +import "core:os" +import "core:sort" +import "core:strings" +import "core:testing" + +TestEnv :: struct { + temp_dir: string, +} + +create_test_env :: proc() -> (env: TestEnv) { + tmp, err := os.mkdir_temp("", "findr-test-*", context.allocator) + if err != nil { + log.error("Failed to create temp dir:", err) + panic("Failed to create temp dir") + } + + env.temp_dir = tmp + return +} + +destroy_test_env :: proc(env: ^TestEnv) { + os.remove_all(env.temp_dir) + delete(env.temp_dir) +} + +create_dir :: proc(env: TestEnv, path: string) { + full := join_path(env.temp_dir, path) + defer delete(full) + os.mkdir_all(full, os.Permissions_Default_Directory) +} + +create_file :: proc(env: TestEnv, path: string, content: string = "") { + full := join_path(env.temp_dir, path) + defer delete(full) + + dir_end := strings.last_index(full, "/") + if dir_end >= 0 { + dir_path := full[:dir_end] + os.mkdir_all(dir_path, os.Permissions_Default_Directory) + } + + f, err := os.create(full) + if err != nil { + log.error("Failed to create file:", full, err) + return + } + if len(content) > 0 { + os.write_string(f, content) + } + os.close(f) +} + +create_git_repo :: proc(env: TestEnv, path: string) { + sub := join_path(path, ".git") + defer delete(sub) + create_dir(env, sub) +} + +assert_output :: proc( + t: ^testing.T, + env: TestEnv, + args: []string, + expected: []string, +) { + results := collect_results(env, args) + defer { + for r in results { delete(r) } + delete(results) + } + + sorted_expected := make([dynamic]string, 0, len(expected)) + for e in expected { append(&sorted_expected, e) } + defer delete(sorted_expected) + + sorted_actual := make([dynamic]string, 0, len(results)) + for a in results { append(&sorted_actual, a) } + defer delete(sorted_actual) + + sort.quick_sort(sorted_expected[:]) + sort.quick_sort(sorted_actual[:]) + + if len(sorted_expected) != len(sorted_actual) { + testing.fail(t) + log.error( + fmt.tprintf("Expected %d results, got %d", len(sorted_expected), len(sorted_actual)), + ) + log.error("Expected:", sorted_expected[:]) + log.error("Actual: ", sorted_actual[:]) + return + } + + for i in 0.. 0 { + testing.fail(t) + log.error(fmt.tprintf("Expected no results, got %d:", len(results))) + for r in results { + log.error(" ", r) + } + } +} + +collect_results :: proc(env: TestEnv, args: []string) -> [dynamic]string { + results := make([dynamic]string) + + full_args := make([dynamic]string, 0, len(args) + 1, context.temp_allocator) + append(&full_args, env.temp_dir) + for a in args { append(&full_args, a) } + + for dir in full_args { + walk(dir, &results) + } + + for i in 0.. 0 && stripped[0] == '/' { + stripped = stripped[1:] + } + new_r, _ := strings.clone(stripped) + delete(r) + results[i] = new_r + } + } + + return results +} diff --git a/findr/walker.odin b/findr/walker.odin new file mode 100644 index 0000000..96c387d --- /dev/null +++ b/findr/walker.odin @@ -0,0 +1,260 @@ +package findr + +import "core:fmt" +import "core:os" +import "core:strings" +import "core:sync" +import "core:sys/linux" +import "core:thread" + +FINDR_PARALLEL :: #config(FINDR_PARALLEL, false) +FINDR_THREADS :: #config(FINDR_THREADS, 8) + +RawEntry :: struct { + name: string, + type: linux.Dirent_Type, +} + +walk :: proc(root: string, results: ^[dynamic]string) { + when FINDR_PARALLEL { + walk_parallel(root, results) + } else { + walk_dir_serial(root, results) + } +} + +read_dir_entries :: proc(dir_path: string, has_git: ^bool) -> [dynamic]RawEntry { + entries := make([dynamic]RawEntry) + + cpath := strings.clone_to_cstring(dir_path) + if cpath == nil do return entries + + fd, err := linux.open(cpath, {.DIRECTORY, .CLOEXEC}) + delete(cpath) + if err != .NONE do return entries + + buf: [8192]u8 + has_git^ = false + + for { + n, errno := linux.getdents(fd, buf[:]) + if n <= 0 || errno != .NONE do break + + offs := 0 + for d in linux.dirent_iterate_buf(buf[:n], &offs) { + name := linux.dirent_name(d) + if name == "." || name == ".." do continue + + if name == ".git" && d.type == .DIR { + has_git^ = true + } + + cloned := strings.clone(name) + append(&entries, RawEntry{name = cloned, type = d.type}) + } + } + + linux.close(fd) + return entries +} + +free_entries :: proc(entries: ^[dynamic]RawEntry) { + for &entry in entries { + delete(entry.name) + } + delete(entries^) +} + +walk_dir_serial :: proc(dir_path: string, results: ^[dynamic]string) { + has_git := false + entries := read_dir_entries(dir_path, &has_git) + defer free_entries(&entries) + + if has_git { + gi := load_gitignore(dir_path) + defer if gi != nil { + destroy(gi) + free(gi) + } + + for entry in entries { + if entry.name == ".git" do continue + is_dir := entry.type == .DIR + if gi != nil && is_ignored(gi, entry.name, is_dir) { + if !is_dir { + full_path := join_path(dir_path, entry.name) + append(results, full_path) + } + continue + } + if is_dir { + child_path := join_path(dir_path, entry.name) + walk_dir_serial(child_path, results) + delete(child_path) + } + } + } else { + for entry in entries { + if entry.type == .DIR { + child_path := join_path(dir_path, entry.name) + walk_dir_serial(child_path, results) + delete(child_path) + } + } + } +} + +load_gitignore :: proc(dir_path: string) -> ^Gitignore { + gi_path := join_path(dir_path, ".gitignore") + defer delete(gi_path) + + data, err := os.read_entire_file_from_path(gi_path, context.allocator) + if err != nil do return nil + + gi := new(Gitignore) + gi^ = parse(string(data)) + delete(data) + return gi +} + +join_path :: proc(parent, child: string) -> string { + b: strings.Builder + strings.builder_init(&b) + defer strings.builder_destroy(&b) + + fmt.sbprintf(&b, "%s", parent) + if len(parent) == 0 || parent[len(parent) - 1] != '/' { + fmt.sbprintf(&b, "/") + } + fmt.sbprintf(&b, "%s", child) + + s := strings.to_string(b) + result, _ := strings.clone(s) + return result +} + +when FINDR_PARALLEL { + WalkerPool :: struct { + queue: [dynamic]string, + queue_mutex: sync.Mutex, + queue_sema: sync.Atomic_Sema, + results: ^[dynamic]string, + results_mutex: sync.Mutex, + active: i64, + done: sync.One_Shot_Event, + threads: [dynamic]^thread.Thread, + } + + walk_parallel :: proc(root: string, results: ^[dynamic]string) { + pool := new(WalkerPool) + pool.queue = make([dynamic]string) + pool.results = results + pool.active = 1 + pool.threads = make([dynamic]^thread.Thread) + + root_clone, _ := strings.clone(root) + append(&pool.queue, root_clone) + sync.atomic_sema_post(&pool.queue_sema) + + num_threads := FINDR_THREADS + for i in 0..