perf: Replaced fd with custom internals.

This commit is contained in:
2026-06-16 20:57:38 -04:00
commit 55ed98659b
8 changed files with 1208 additions and 0 deletions

270
PLAN.md Normal file
View File

@@ -0,0 +1,270 @@
# findr — Gitignored File Finder
## Overview
findr is a native Odin tool that finds **gitignored files** within git repositories. It replaces envr's current approach of running `fd` twice (all files vs. unignored files) and diffing the results.
**Simplified scope:** findr does one thing — walks directories, finds git repos, reads each repo's `.gitignore`, and prints every gitignored file. No flags, no filtering, no pattern matching. envr handles result filtering itself.
## Current fd Usage in envr (being replaced)
1. **`scan.odin:13-43`** (`scan_path`) — runs `fd` twice per search path:
- Run 1: `fd -a <matcher> [-E <exclude>]... -HI <path>` → all files including gitignored
- Run 2: `fd -a <matcher> [-E <exclude>]... -H <path>` → hidden but NOT gitignored
- Diff = gitignored files only
2. Both go through `run_fd` (`scan.odin:68-118`), which spawns a subprocess and captures output via temp files.
After findr integration, `scan_path` calls `findr.walk(path)` directly — no subprocess, no double-run, no diff.
## Directory Structure
```
findr/
findr.odin # main + CLI (positional dir args only)
walker.odin # recursive directory walker using core:sys/linux getdents
gitignore.odin # .gitignore parsing + glob→regex transpilation + matching
test_env.odin # test harness: temp dir, mock filesystem, assert helpers
findr_test.odin # integration tests (10 tests)
gitignore_test.odin # transpilation + matching unit tests (22 tests)
```
## Decisions
- **Scope**: findr prints ALL gitignored files. No regex filtering, no exclude patterns, no type filters. envr post-processes the output.
- **Gitignore matching**: Transpile gitignore glob patterns to regex, then use `core:text/regex`. No dedicated glob matcher.
- **Stat avoidance**: Use `core:sys/linux` getdents directly — read `dirent.type` from the kernel, never call stat.
- **Architecture**: Separate directory with its own `main`. Core logic (`walk` proc + `gitignore` package) designed to be importable into envr later.
## CLI Interface
```
findr [dir1] [dir2] ...
```
No flags. Defaults to `.` if no dirs given. Prints absolute or relative paths (as given) to stdout, one per line.
## Build
```bash
odin build findr -o:speed -out:findr/findr
```
## How It Works
```
walk(dir):
entries = getdents(dir) # via core:sys/linux, zero stat calls
if entries contains ".git/":
gi = parse(.gitignore) # if present
for entry in entries:
if entry is gitignored file:
emit entry path
if entry is dir (not ignored):
walk(entry) # recurse to find nested repos
else:
for entry in entries:
if entry is dir:
walk(entry) # descend looking for repos
```
Key behaviors:
- **Nested repos**: When a repo is found, subdirectories are still traversed to find nested repos. Gitignored directories are pruned (not descended into).
- **Flat gitignore**: Only the root `.gitignore` is read. `.gitignore` files in subdirectories of a repo are ignored.
- **Non-repo dirs**: Traversed recursively to find repos. No gitignore rules apply.
## Performance Architecture
### Implemented
- **Stat avoidance via `dirent.type`** — Uses `core:sys/linux` getdents directly, bypassing `core:os` which calls `openat` + `fstat` per entry. File type comes free from the directory entry.
- **Prune ignored directories** — When a directory matches a gitignore pattern, it is not descended into. Skips potentially thousands of readdir calls.
- **Parallel traversal** — 8-worker thread pool with shared LIFO queue and futex-based semaphore signaling. 5.4x speedup over serial on home directory.
### Future (if needed)
- BufWriter on stdout for large result sets
- Arena allocators for path strings
## Testing Strategy
- **In-process integration tests** — Tests call `walk()` directly (not via subprocess), build mock filesystems in temp dirs, and compare sorted output.
- **Unit tests** — Pure-function tests for glob→regex transpilation and gitignore matching.
- **Output sorting for determinism** — Always sort output lines before comparison.
- **Memory tracking** — Odin's test runner reports leaks automatically. All 32 tests pass with zero leaks.
### Test Coverage (findr_test.odin)
| Test | What it covers |
|---|---|
| `test_basic_gitignored` | Repo with `.gitignore`, gitignored files emitted, normal files skipped |
| `test_non_repo_not_scanned` | Dirs without `.git/` produce no output |
| `test_negation_pattern` | `!prod.env` un-ignores a file |
| `test_dir_only_pattern` | `node_modules/` pattern doesn't emit file results |
| `test_multiple_repos` | Multiple repos in one tree, each with its own `.gitignore` |
| `test_nested_repos` | Repo inside a repo, both scanned independently |
| `test_gitignore_in_subdir_ignored` | Subdirectory `.gitignore` files are not read |
| `test_no_gitignore_file` | Repo with `.git/` but no `.gitignore` produces nothing |
| `test_empty_gitignore` | Comments and blank lines only → no results |
| `test_multiple_search_dirs` | Multiple top-level search dirs in one call |
### Gitignore Unit Tests (gitignore_test.odin)
22 tests covering: simple/anchored patterns, `*`, `?`, `[abc]`, `[!abc]`, dot escaping, globstar variants, backslash escapes, empty patterns, basic matching, negation, dir-only, comments, blank lines, last-match-wins, env patterns.
## Glob→Regex Transpilation Rules
| Gitignore pattern | Regex | Notes |
|---|---|---|
| `foo` | `(^|/)foo(/.*)?$` | matches at any depth |
| `/foo` | `^foo(/.*)?$` | anchored to gitignore dir |
| `foo/` | `(^|/)foo/.*$` | directory only |
| `*.log` | `(^|/)[^/]*\.log$` | `*` = any chars except `/` |
| `**/foo` | `(^|/)(.*/)?foo(/.*)?$` | `**` = any chars including `/` |
| `foo/**/bar` | `(^|/)foo/(.*/)?bar(/.*)?$` | `**` between segments |
| `!pattern` | (handled by layer) | negation flag, not regex |
| `#comment` | (skipped) | |
| `[abc]` | `[abc]` | same regex syntax |
| `?` | `[^/]` | single char, no `/` |
## Implementation Phases
### Phase 1: Gitignore Transpiler + Tests ✅
**Goal:** Isolated, fully-tested glob→regex transpiler.
**Result:** 22 tests, all passing, zero leaks.
---
### Phase 2: findr Walker + Tests ✅
**Goal:** Working tool that finds gitignored files in git repos.
**Built:**
- `walker.odin` — Parallel DFS using `core:sys/linux` getdents with 8-worker thread pool. Finds repos, reads `.gitignore`, emits gitignored files, recurses into subdirs for nested repos.
- `findr.odin` — Minimal CLI: `findr [dirs...]`, no flags.
- `test_env.odin` — Test harness with temp dirs and mock filesystems.
- `findr_test.odin` — 10 integration tests.
**Result:** All 32 tests pass (22 gitignore + 10 walker), zero leaks.
---
### Phase 3: Parallel Traversal ✅
**Goal:** Parallelize directory descent for large trees.
**Result:** Worker pool with shared LIFO queue, 8 threads, futex-based semaphore signaling. 852ms vs 4.57s serial (5.4x speedup) on `~`. Serial code has been removed — parallel is the only implementation.
---
### Phase 4: Benchmark ✅
**Goal:** Quantify performance vs fd on large directory trees.
**Result:** findr found 227 gitignored files on `~` in 852ms. fd's double-run (all vs unignored) walked ~1.1M entries. findr's pruning of ignored directories (node_modules, dist, etc.) gives a massive advantage.
---
### Phase 5: Integrate into envr (future)
**Goal:** Replace ALL `fd` subprocess usage in envr with in-process findr calls. Remove `Feature.Fd` entirely.
#### Part A: Extend findr API (`findr/walker.odin`)
1. **Add `WalkMode` enum** and `mode` field to `WalkerPool`:
```odin
WalkMode :: enum { GitignoredFiles, GitRepos }
```
2. **Extract `run_pool`** helper — shared pool setup/teardown (create threads, wait for done, cleanup). Both `walk` and `find_repos` call it.
3. **New `walk` signature with filtering:**
```odin
walk :: proc(root: string, results: ^[dynamic]string, matcher: string = "", exclude: []string = nil)
```
- Compiles `matcher` into a regex (stored as `pool.matcher_re`); tested against each file's basename via `regex.find`. Empty = emit all.
- Parses `exclude` patterns into a `^Gitignore` via existing `parse()` (stored as `pool.exclude_gi`). Entries matching any exclude pattern are skipped entirely (not emitted, not descended into).
- Sets `pool.mode = .GitignoredFiles`
4. **`process_dir` filtering logic** (in the `has_git` branch):
- Exclude check first: `is_ignored(exclude_gi, entry.name, is_dir)` → skip entirely (prune dirs, skip files)
- Gitignore check: if ignored, emit file only if `matcher_re` is nil or matches basename
- Not excluded/ignored: descend if dir
- Non-repo branch also prunes dirs matching exclude patterns
5. **New `find_repos` function:**
```odin
find_repos :: proc(root: string) -> [dynamic]string
```
- Creates pool with `mode = .GitRepos`, calls `run_pool`, returns collected repo roots
- Parallel (reuses worker pool architecture)
6. **New `process_dir_repos`** — simpler than `process_dir`:
- If `has_git`: record `dir_path` as repo root
- Always descend into subdirs (except `.git` itself) to find nested repos
- No gitignore/exclude/matcher processing
7. **`walk_worker` switch** — centralized control flow per AGENTS.md convention:
```odin
switch pool.mode {
case .GitignoredFiles: process_dir(pool, dir_path)
case .GitRepos: process_dir_repos(pool, dir_path)
}
```
8. **Cleanup in `walk`:** destroy `matcher_re` and `exclude_gi` after `run_pool` completes.
9. **Add `import "core:text/regex"`** to walker.odin.
**No changes to:** `findr.odin`, `test_env.odin`, `gitignore.odin` (default params preserve existing behavior).
#### Part B: Rewrite `scan_path` (`scan.odin`)
- Add `import "findr"`
- `scan_path` becomes ~3 lines: call `findr.walk(search_path, &paths, cfg.ScanConfig.Matcher, cfg.ScanConfig.Exclude[:])`
- **Delete:** `build_fd_args`, `run_fd`, `next_fd_tmp_path`, `fd_counter`, `fd_seq`, `cant_scan`
- Remove unused imports (`core:sync`, `core:terminal`)
#### Part C: Rewrite `find_git_roots` (`config.odin`)
- Add `import "findr"`
- Replace `run_fd` call with `findr.find_repos(sp)` — no more `filepath.dir` post-processing needed (find_repos returns repo roots directly)
#### Part D: Remove `Feature.Fd` everywhere
| File | Change |
|---|---|
| `features.odin` | Remove `Fd` from enum, remove fd binary check |
| `cmd_scan.odin` | Remove feats/cant_scan guard + "install fd" error |
| `cmd_check.odin` | Same removal |
| `cmd_deps.odin` | Remove fd table row |
| `db.odin` | Change check to `.Git not_in feats` only; update error message |
| `scan_test.odin` | Remove `test_scan_meets_expectations` (cant_scan test); remove `cant_scan` assertions from other tests |
#### Part E: Verification
```bash
odin build findr -o:speed -out:findr/findr
odin test findr
odin build . -o:speed -out:envr
odin test .
```
#### Execution order
1. **findr API changes** → build + test findr (32 tests should pass with default params)
2. **Rewrite scan_path** + delete dead code
3. **Rewrite find_git_roots**
4. **Remove Feature.Fd** across all files
5. **Update tests** → build + test everything
## Risks
| Risk | Mitigation |
|---|---|
| Single-threaded may be slow on huge trees | Resolved — parallel traversal implemented (Phase 3) |
| Gitignore edge cases (`**/foo`, `foo/**/bar`) | Comprehensive gitignore_test.odin with spec examples |
| dirent.type may be UNKNOWN on some filesystems | Fall back to stat only when type is UNKNOWN |
| Missing nested `.env` files in monorepos | Accepted limitation — flat gitignore model |
| Memory allocation churn from path strings | Use thread-local arena allocators in Phase 3 |

27
f.nu Executable file
View File

@@ -0,0 +1,27 @@
#!/usr/bin/env nu
def main [] {
let all = (fd -HI -a .env . ~/ | lines | sort)
let unignored = (fd -H -a .env ~/ | lines | sort)
$all | filter { |it| not ($it in $unignored) } | str join "\n"
# sorted_list_intersect $all $unignored | str join "\n"
}
def sorted_list_intersect [xs1: list, xs2: list] {
let len1 = ($xs1 | length)
let len2 = ($xs2 | length)
mut i = 0
mut j = 0
while ($i < $len1 and $j < $len2) {
if ($xs1 | get $i) < ($xs2 | get $j) {
$i = $i + 1
} else if ($xs2 | get $j) < ($xs1 | get $i) {
$j = $j + 1
} else {
echo ($xs2 | get $j)
$i = $i + 1
$j = $j + 1
}
}
}

35
findr.odin Normal file
View File

@@ -0,0 +1,35 @@
package findr
import "core:fmt"
import "core:os"
main :: proc() {
args := os.args
search_dirs := make([dynamic]string)
defer delete(search_dirs)
for i in 1 ..< len(args) {
append(&search_dirs, args[i])
}
if len(search_dirs) == 0 {
append(&search_dirs, ".")
}
results := make([dynamic]string)
defer {
for r in results {delete(r)}
delete(results)
}
thread_count := os.get_processor_core_count()
for dir in search_dirs {
walk(dir, &results, thread_count)
}
for r in results {
fmt.println(r)
}
}

164
findr_test.odin Normal file
View File

@@ -0,0 +1,164 @@
package findr
import "core:os"
import "core:testing"
@(test)
test_basic_gitignored :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.env\n")
create_file(env, "repo/.env")
create_file(env, "repo/secrets.env")
create_file(env, "repo/normal.txt")
assert_output(t, env, nil, {"repo/.env", "repo/secrets.env"})
}
@(test)
test_non_repo_not_scanned :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_dir(env, "norepo")
create_file(env, "norepo/.gitignore", "*.env\n")
create_file(env, "norepo/.env")
assert_output_empty(t, env, nil)
}
@(test)
test_negation_pattern :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.env\n!prod.env\n")
create_file(env, "repo/.env")
create_file(env, "repo/secrets.env")
create_file(env, "repo/prod.env")
assert_output(t, env, nil, {"repo/.env", "repo/secrets.env"})
}
@(test)
test_dir_only_pattern :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "node_modules/\n")
create_file(env, "repo/node_modules", "should not match (it's a file)")
create_dir(env, "repo/ignored_dir")
create_file(env, "repo/.gitignore", "ignored_dir/\n")
// dir-only patterns don't produce file results
assert_output(t, env, nil, {})
}
@(test)
test_multiple_repos :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo1")
create_file(env, "repo1/.gitignore", "*.env\n")
create_file(env, "repo1/a.env")
create_git_repo(env, "repo2")
create_file(env, "repo2/.gitignore", "*.key\n")
create_file(env, "repo2/secret.key")
assert_output(t, env, nil, {"repo1/a.env", "repo2/secret.key"})
}
@(test)
test_nested_repos :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "parent")
create_file(env, "parent/.gitignore", "*.env\n")
create_file(env, "parent/top.env")
create_git_repo(env, "parent/child")
create_file(env, "parent/child/.gitignore", "*.key\n")
create_file(env, "parent/child/api.key")
assert_output(t, env, nil, {"parent/top.env", "parent/child/api.key"})
}
@(test)
test_gitignore_in_subdir_ignored :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.env\n")
create_dir(env, "repo/sub")
create_file(env, "repo/sub/.gitignore", "*.txt\n")
create_file(env, "repo/sub/secret.txt")
create_file(env, "repo/sub/.env")
// .gitignore in subdir is not read (flat model).
// secret.txt should NOT appear (subdir .gitignore ignored).
// .env should NOT appear (it's nested, not top-level of repo).
assert_output(t, env, nil, {})
}
@(test)
test_no_gitignore_file :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.env")
assert_output_empty(t, env, nil)
}
@(test)
test_empty_gitignore :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "\n\n# comment\n\n")
create_file(env, "repo/.env")
assert_output_empty(t, env, nil)
}
@(test)
test_multiple_search_dirs :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "dir1/repo")
create_file(env, "dir1/repo/.gitignore", "*.env\n")
create_file(env, "dir1/repo/a.env")
create_git_repo(env, "dir2/repo")
create_file(env, "dir2/repo/.gitignore", "*.env\n")
create_file(env, "dir2/repo/b.env")
dir1 := join_path(env.temp_dir, "dir1")
defer delete(dir1)
dir2 := join_path(env.temp_dir, "dir2")
defer delete(dir2)
results := make([dynamic]string)
defer {
for r in results {delete(r)}
delete(results)
}
thread_count := os.get_processor_core_count()
walk(dir1, &results, thread_count)
walk(dir2, &results, thread_count)
testing.expect_value(t, len(results), 2)
}

182
gitignore.odin Normal file
View File

@@ -0,0 +1,182 @@
package findr
import "core:fmt"
import "core:strings"
import "core:text/regex"
is_regex_meta :: proc(c: u8) -> bool {
switch c {
case '.', '+', '(', ')', '{', '}', '^', '$', '|':
return true
}
return false
}
glob_to_regex :: proc(pattern: string, anchored: bool) -> string {
sb: strings.Builder
strings.builder_init(&sb)
defer strings.builder_destroy(&sb)
if anchored {
fmt.sbprintf(&sb, "^")
} else {
fmt.sbprintf(&sb, "(^|/)")
}
i := 0
for i < len(pattern) {
c := pattern[i]
if c == '*' {
if i + 1 < len(pattern) && pattern[i + 1] == '*' {
prev_slash := i == 0 || pattern[i - 1] == '/'
at_end := i + 2 >= len(pattern)
next_slash := !at_end && pattern[i + 2] == '/'
if prev_slash && (next_slash || at_end) {
if next_slash {
i += 3
fmt.sbprintf(&sb, "(.*/)?")
} else {
i += 2
fmt.sbprintf(&sb, ".*")
}
} else {
fmt.sbprintf(&sb, "[^/]*")
i += 2
}
} else {
fmt.sbprintf(&sb, "[^/]*")
i += 1
}
} else if c == '?' {
fmt.sbprintf(&sb, "[^/]")
i += 1
} else if c == '[' {
append(&sb.buf, '[')
i += 1
if i < len(pattern) && pattern[i] == '!' {
append(&sb.buf, '^')
i += 1
}
if i < len(pattern) && pattern[i] == ']' {
append(&sb.buf, ']')
i += 1
}
for i < len(pattern) && pattern[i] != ']' {
append(&sb.buf, pattern[i])
i += 1
}
if i < len(pattern) {
append(&sb.buf, ']')
i += 1
}
} else if c == '\\' {
i += 1
if i < len(pattern) {
if is_regex_meta(pattern[i]) {
append(&sb.buf, '\\')
}
append(&sb.buf, pattern[i])
i += 1
}
} else if is_regex_meta(c) {
append(&sb.buf, '\\')
append(&sb.buf, c)
i += 1
} else {
append(&sb.buf, c)
i += 1
}
}
fmt.sbprintf(&sb, "(/.*)?$")
s := strings.to_string(sb)
result, _ := strings.clone(s)
return result
}
Rule :: struct {
regex: regex.Regular_Expression,
negated: bool,
dir_only: bool,
}
Gitignore :: struct {
rules: [dynamic]Rule,
}
parse :: proc(content: string) -> Gitignore {
gi: Gitignore
gi.rules = make([dynamic]Rule)
remaining := content
for {
line, ok := strings.split_lines_iterator(&remaining)
if !ok do break
s := strings.trim_space(line)
if len(s) == 0 do continue
if s[0] == '#' do continue
negated := false
if s[0] == '!' {
negated = true
s = s[1:]
}
if len(s) > 0 && s[0] == '\\' {
if len(s) > 1 && (s[1] == '#' || s[1] == '!') {
s = s[1:]
}
}
dir_only := false
if len(s) > 0 && s[len(s) - 1] == '/' {
dir_only = true
s = s[:len(s) - 1]
}
anchored := false
if len(s) > 0 && s[0] == '/' {
anchored = true
s = s[1:]
}
if len(s) == 0 do continue
regex_str := glob_to_regex(s, anchored)
re, err := regex.create(regex_str, {regex.Flag.No_Capture})
delete(regex_str)
if err != nil do continue
append(&gi.rules, Rule{
regex = re,
negated = negated,
dir_only = dir_only,
})
}
return gi
}
is_ignored :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> bool {
matched := false
for rule in gi.rules {
if rule.dir_only && !is_dir do continue
cap, ok := regex.match(rule.regex, path)
regex.destroy(cap)
if ok {
matched = !rule.negated
}
}
return matched
}
destroy :: proc(gi: ^Gitignore) {
for rule in gi.rules {
regex.destroy(rule.regex)
}
delete(gi.rules)
}

178
gitignore_test.odin Normal file
View File

@@ -0,0 +1,178 @@
package findr
import "core:testing"
@(test)
test_glob_simple :: proc(t: ^testing.T) {
result := glob_to_regex("foo", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)foo(/.*)?$")
}
@(test)
test_glob_anchored :: proc(t: ^testing.T) {
result := glob_to_regex("foo", true)
defer delete(result)
testing.expect_value(t, result, "^foo(/.*)?$")
}
@(test)
test_glob_star :: proc(t: ^testing.T) {
result := glob_to_regex("*.log", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)[^/]*\\.log(/.*)?$")
}
@(test)
test_glob_question :: proc(t: ^testing.T) {
result := glob_to_regex("?.log", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)[^/]\\.log(/.*)?$")
}
@(test)
test_glob_char_class :: proc(t: ^testing.T) {
result := glob_to_regex("[abc].log", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)[abc]\\.log(/.*)?$")
}
@(test)
test_glob_negated_class :: proc(t: ^testing.T) {
result := glob_to_regex("[!abc].log", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)[^abc]\\.log(/.*)?$")
}
@(test)
test_glob_dot_escaped :: proc(t: ^testing.T) {
result := glob_to_regex(".env", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)\\.env(/.*)?$")
}
@(test)
test_glob_globstar_prefix :: proc(t: ^testing.T) {
result := glob_to_regex("**/foo", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)(.*/)?foo(/.*)?$")
}
@(test)
test_glob_globstar_suffix :: proc(t: ^testing.T) {
result := glob_to_regex("abc/**", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)abc/.*(/.*)?$")
}
@(test)
test_glob_globstar_middle :: proc(t: ^testing.T) {
result := glob_to_regex("foo/**/bar", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)foo/(.*/)?bar(/.*)?$")
}
@(test)
test_glob_backslash_escape :: proc(t: ^testing.T) {
result := glob_to_regex("\\!foo", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)!foo(/.*)?$")
}
@(test)
test_glob_empty :: proc(t: ^testing.T) {
result := glob_to_regex("", false)
defer delete(result)
testing.expect_value(t, result, "(^|/)(/.*)?$")
}
@(test)
test_is_ignored_basic :: proc(t: ^testing.T) {
gi := parse("*.env\n")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, ".env", false), true)
testing.expect_value(t, is_ignored(&gi, "foo.env", false), true)
testing.expect_value(t, is_ignored(&gi, ".env.local", false), false)
testing.expect_value(t, is_ignored(&gi, "config.yaml", false), false)
}
@(test)
test_is_ignored_negation :: proc(t: ^testing.T) {
gi := parse("*.env\n!.env.production\n")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, ".env", false), true)
testing.expect_value(t, is_ignored(&gi, ".env.production", false), false)
}
@(test)
test_is_ignored_dir_only :: proc(t: ^testing.T) {
gi := parse("node_modules/\n")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, "node_modules", true), true)
testing.expect_value(t, is_ignored(&gi, "node_modules", false), false)
}
@(test)
test_is_ignored_anchored :: proc(t: ^testing.T) {
gi := parse("/secret.key\n")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, "secret.key", false), true)
}
@(test)
test_is_ignored_comments_skipped :: proc(t: ^testing.T) {
gi := parse("# this is a comment\n#another\n*.tmp\n")
defer destroy(&gi)
testing.expect_value(t, len(gi.rules), 1)
testing.expect_value(t, is_ignored(&gi, "file.tmp", false), true)
}
@(test)
test_is_ignored_blank_lines_skipped :: proc(t: ^testing.T) {
gi := parse("\n\n \n*.log\n\n")
defer destroy(&gi)
testing.expect_value(t, len(gi.rules), 1)
}
@(test)
test_is_ignored_last_match_wins :: proc(t: ^testing.T) {
gi := parse("*.env\n!*.env\n")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, ".env", false), false)
}
@(test)
test_is_ignored_no_rules :: proc(t: ^testing.T) {
gi := parse("")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, "anything", false), false)
}
@(test)
test_is_ignored_env_pattern :: proc(t: ^testing.T) {
gi := parse(".env*\n")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, ".env", false), true)
testing.expect_value(t, is_ignored(&gi, ".env.local", false), true)
testing.expect_value(t, is_ignored(&gi, ".envrc", false), true)
}
@(test)
test_is_ignored_globstar :: proc(t: ^testing.T) {
gi := parse("**/cache\n")
defer destroy(&gi)
testing.expect_value(t, is_ignored(&gi, "cache", false), true)
testing.expect_value(t, is_ignored(&gi, "foo/cache", false), true)
testing.expect_value(t, is_ignored(&gi, "foo/bar/cache", false), true)
}

144
test_env.odin Normal file
View File

@@ -0,0 +1,144 @@
package findr
import "core:fmt"
import "core:log"
import "core:os"
import "core:sort"
import "core:strings"
import "core:testing"
TestEnv :: struct {
temp_dir: string,
}
create_test_env :: proc() -> (env: TestEnv) {
tmp, err := os.mkdir_temp("", "findr-test-*", context.allocator)
if err != nil {
log.error("Failed to create temp dir:", err)
panic("Failed to create temp dir")
}
env.temp_dir = tmp
return
}
destroy_test_env :: proc(env: ^TestEnv) {
os.remove_all(env.temp_dir)
delete(env.temp_dir)
}
create_dir :: proc(env: TestEnv, path: string) {
full := join_path(env.temp_dir, path)
defer delete(full)
os.mkdir_all(full, os.Permissions_Default_Directory)
}
create_file :: proc(env: TestEnv, path: string, content: string = "") {
full := join_path(env.temp_dir, path)
defer delete(full)
dir_end := strings.last_index(full, "/")
if dir_end >= 0 {
dir_path := full[:dir_end]
os.mkdir_all(dir_path, os.Permissions_Default_Directory)
}
f, err := os.create(full)
if err != nil {
log.error("Failed to create file:", full, err)
return
}
if len(content) > 0 {
os.write_string(f, content)
}
os.close(f)
}
create_git_repo :: proc(env: TestEnv, path: string) {
sub := join_path(path, ".git")
defer delete(sub)
create_dir(env, sub)
}
assert_output :: proc(t: ^testing.T, env: TestEnv, args: []string, expected: []string) {
results := collect_results(env, args)
defer {
for r in results {delete(r)}
delete(results)
}
sorted_expected := make([dynamic]string, 0, len(expected))
for e in expected {append(&sorted_expected, e)}
defer delete(sorted_expected)
sorted_actual := make([dynamic]string, 0, len(results))
for a in results {append(&sorted_actual, a)}
defer delete(sorted_actual)
sort.quick_sort(sorted_expected[:])
sort.quick_sort(sorted_actual[:])
if len(sorted_expected) != len(sorted_actual) {
testing.fail(t)
log.error(
fmt.tprintf("Expected %d results, got %d", len(sorted_expected), len(sorted_actual)),
)
log.error("Expected:", sorted_expected[:])
log.error("Actual: ", sorted_actual[:])
return
}
for i in 0 ..< len(sorted_expected) {
if sorted_expected[i] != sorted_actual[i] {
testing.fail(t)
log.error(fmt.tprintf("Mismatch at index %d", i))
log.error("Expected:", sorted_expected[:])
log.error("Actual: ", sorted_actual[:])
return
}
}
}
assert_output_empty :: proc(t: ^testing.T, env: TestEnv, args: []string) {
results := collect_results(env, args)
defer {
for r in results {delete(r)}
delete(results)
}
if len(results) > 0 {
testing.fail(t)
log.error(fmt.tprintf("Expected no results, got %d:", len(results)))
for r in results {
log.error(" ", r)
}
}
}
collect_results :: proc(env: TestEnv, args: []string) -> [dynamic]string {
results := make([dynamic]string)
full_args := make([dynamic]string, 0, len(args) + 1, context.temp_allocator)
append(&full_args, env.temp_dir)
for a in args {append(&full_args, a)}
thread_count := os.get_processor_core_count()
for dir in full_args {
walk(dir, &results, thread_count)
}
for i in 0 ..< len(results) {
r := results[i]
if strings.has_prefix(r, env.temp_dir) {
stripped := r[len(env.temp_dir):]
if len(stripped) > 0 && stripped[0] == '/' {
stripped = stripped[1:]
}
new_r, _ := strings.clone(stripped)
delete(r)
results[i] = new_r
}
}
return results
}

208
walker.odin Normal file
View File

@@ -0,0 +1,208 @@
package findr
import "core:fmt"
import "core:os"
import "core:strings"
import "core:sync"
import "core:sys/linux"
import "core:thread"
RawEntry :: struct {
name: string,
type: linux.Dirent_Type,
}
WalkerPool :: struct {
queue: [dynamic]string,
queue_mutex: sync.Mutex,
queue_sema: sync.Atomic_Sema,
results: ^[dynamic]string,
results_mutex: sync.Mutex,
active: i64,
done: sync.One_Shot_Event,
threads: [dynamic]^thread.Thread,
}
walk :: proc(root: string, results: ^[dynamic]string, thread_count: int) {
pool := new(WalkerPool)
pool.queue = make([dynamic]string)
pool.results = results
pool.active = 1
pool.threads = make([dynamic]^thread.Thread)
root_clone, _ := strings.clone(root)
append(&pool.queue, root_clone)
sync.atomic_sema_post(&pool.queue_sema)
for i in 0 ..< thread_count {
t := thread.create(walk_worker)
t.data = rawptr(pool)
t.init_context = context
thread.start(t)
append(&pool.threads, t)
}
sync.one_shot_event_wait(&pool.done)
for _ in 0 ..< thread_count {
sync.atomic_sema_post(&pool.queue_sema)
}
for t in pool.threads {
thread.destroy(t)
}
delete(pool.threads)
for path in pool.queue {
delete(path)
}
delete(pool.queue)
free(pool)
}
walk_worker :: proc(t: ^thread.Thread) {
pool := cast(^WalkerPool)t.data
for {
sync.atomic_sema_wait(&pool.queue_sema)
sync.mutex_lock(&pool.queue_mutex)
if len(pool.queue) == 0 {
sync.mutex_unlock(&pool.queue_mutex)
if sync.atomic_load_explicit(&pool.active, .Acquire) == 0 {
sync.one_shot_event_signal(&pool.done)
}
break
}
last := len(pool.queue) - 1
dir_path := pool.queue[last]
ordered_remove(&pool.queue, last)
sync.mutex_unlock(&pool.queue_mutex)
process_dir(pool, dir_path)
delete(dir_path)
old := sync.atomic_sub_explicit(&pool.active, 1, .Release)
if old == 1 {
sync.one_shot_event_signal(&pool.done)
}
}
}
process_dir :: proc(pool: ^WalkerPool, dir_path: string) {
has_git := false
entries := read_dir_entries(dir_path, &has_git)
defer free_entries(&entries)
if has_git {
gi := load_gitignore(dir_path)
defer if gi != nil {
destroy(gi)
free(gi)
}
for entry in entries {
if entry.name == ".git" do continue
is_dir := entry.type == .DIR
if gi != nil && is_ignored(gi, entry.name, is_dir) {
if !is_dir {
full_path := join_path(dir_path, entry.name)
sync.mutex_lock(&pool.results_mutex)
append(pool.results, full_path)
sync.mutex_unlock(&pool.results_mutex)
}
continue
}
if is_dir {
child_path := join_path(dir_path, entry.name)
push_work(pool, child_path)
}
}
} else {
for entry in entries {
if entry.type == .DIR {
child_path := join_path(dir_path, entry.name)
push_work(pool, child_path)
}
}
}
}
push_work :: proc(pool: ^WalkerPool, path: string) {
sync.atomic_add_explicit(&pool.active, 1, .Relaxed)
sync.mutex_lock(&pool.queue_mutex)
append(&pool.queue, path)
sync.mutex_unlock(&pool.queue_mutex)
sync.atomic_sema_post(&pool.queue_sema)
}
read_dir_entries :: proc(dir_path: string, has_git: ^bool) -> [dynamic]RawEntry {
entries := make([dynamic]RawEntry)
cpath := strings.clone_to_cstring(dir_path)
if cpath == nil do return entries
fd, err := linux.open(cpath, {.DIRECTORY, .CLOEXEC})
delete(cpath)
if err != .NONE do return entries
buf: [8192]u8
has_git^ = false
for {
n, errno := linux.getdents(fd, buf[:])
if n <= 0 || errno != .NONE do break
offs := 0
for d in linux.dirent_iterate_buf(buf[:n], &offs) {
name := linux.dirent_name(d)
if name == "." || name == ".." do continue
if name == ".git" && d.type == .DIR {
has_git^ = true
}
cloned := strings.clone(name)
append(&entries, RawEntry{name = cloned, type = d.type})
}
}
linux.close(fd)
return entries
}
free_entries :: proc(entries: ^[dynamic]RawEntry) {
for &entry in entries {
delete(entry.name)
}
delete(entries^)
}
load_gitignore :: proc(dir_path: string) -> ^Gitignore {
gi_path := join_path(dir_path, ".gitignore")
defer delete(gi_path)
data, err := os.read_entire_file_from_path(gi_path, context.allocator)
if err != .NONE do return nil
gi := new(Gitignore)
gi^ = parse(string(data))
delete(data)
return gi
}
join_path :: proc(parent, child: string) -> string {
b: strings.Builder
strings.builder_init(&b)
defer strings.builder_destroy(&b)
fmt.sbprintf(&b, "%s", parent)
if len(parent) == 0 || parent[len(parent) - 1] != '/' {
fmt.sbprintf(&b, "/")
}
fmt.sbprintf(&b, "%s", child)
s := strings.to_string(b)
result, _ := strings.clone(s)
return result
}