12 Commits

Author SHA1 Message Date
19d03ff71a perf(findr): Replaced regex engine with glob. 2026-06-17 14:44:22 -04:00
ce57009b92 perf(findr): Added spall support. 2026-06-17 13:56:33 -04:00
f51c0d6755 perf(findr): Improved performance of join_path* procedures. 2026-06-17 13:25:45 -04:00
cbab562d62 perf(findr): Each thread gets its own buffer. 2026-06-17 13:08:11 -04:00
116ed6de4c perf(findr): Use buffered writer. 2026-06-17 13:04:33 -04:00
3e5889d5c0 wip: "full" finder 2026-06-17 12:42:58 -04:00
6fe61251ee fix(find): Ignored volatile dirs in benchmarks. 2026-06-17 12:13:42 -04:00
ba56748cc0 wip: "full" finder 2026-06-17 11:45:33 -04:00
0b380c3674 wip: "full" finder 2026-06-17 10:43:00 -04:00
9fcf09601e wip: "full" findr
Creating direct equivilant of fd for performance testing, before reducing
scope to needed features.
2026-06-17 10:32:24 -04:00
c1e93b66e0 wip: findr. 2026-06-17 10:04:04 -04:00
440f944b33 perf: Replaced fd with custom internals. 2026-06-17 10:03:58 -04:00
32 changed files with 1480 additions and 535 deletions

View File

@@ -33,6 +33,7 @@ repositories.
- An SSH key pair (for encryption/decryption) - An SSH key pair (for encryption/decryption)
- The following binaries: - The following binaries:
- [fd](https://github.com/sharkdp/fd) - [fd](https://github.com/sharkdp/fd)
- [git](https://git-scm.com)
## Installation ## Installation

View File

@@ -35,7 +35,13 @@ Stdout will be captured by redirecting `os.stdout` to a pipe.
## Hard to test (interactive / external deps) ## Hard to test (interactive / external deps)
### `cmd_deps` (cmd_deps.odin)
- Needs `git` and/or `fd` in PATH
- Test TTY and non-TTY paths
- Skip if dependencies not available (with `#assert` like TODO 28 suggests)
### `cmd_scan` (cmd_scan.odin) ### `cmd_scan` (cmd_scan.odin)
- Needs `fd` installed
- Test with fixture git repo containing `.env` files - Test with fixture git repo containing `.env` files
- Test `find_unbacked` integration (already partially tested in `cmd_check_test.odin`) - Test `find_unbacked` integration (already partially tested in `cmd_check_test.odin`)
- Non-TTY JSON output path - Non-TTY JSON output path
@@ -60,4 +66,5 @@ Stdout will be captured by redirecting `os.stdout` to a pipe.
- DB integration tests should use in-memory SQLite (`:memory:`) where possible. - DB integration tests should use in-memory SQLite (`:memory:`) where possible.
- Temp dir fixtures should follow the pattern in `scan_test.odin`. - Temp dir fixtures should follow the pattern in `scan_test.odin`.
- External dependency tests (`fd`, `git`) should use `#assert` to ensure the dependency is present rather than silently skipping (TODO 28).
- Tests that manipulate the `HOME` env var must use a mutex to prevent races with parallel test execution. - Tests that manipulate the `HOME` env var must use a mutex to prevent races with parallel test execution.

View File

@@ -26,6 +26,8 @@
15. Add a text filter to the multi_select. 15. Add a text filter to the multi_select.
16. Create backup / fallback fd.
17. Add tests for untested commands. 17. Add tests for untested commands.
18. 2 scan tests silently skip when fd isn't installed, tests pass without actually testing anything. These should use #assert to be sure that fd is in path. 18. 2 scan tests silently skip when fd isn't installed, tests pass without actually testing anything. These should use #assert to be sure that fd is in path.
@@ -38,8 +40,6 @@
23. procedures should be ordered by use, main at the top, then in the order they are called from main. 23. procedures should be ordered by use, main at the top, then in the order they are called from main.
24. [x] Remove git dependency.
## Double-check AI output ## Double-check AI output
- [ ] cli.odin - [ ] cli.odin
@@ -78,11 +78,3 @@
- [ ] ssh_test.odin - [ ] ssh_test.odin
- [ ] table.odin - [ ] table.odin
- [ ] table_test.odin - [ ] table_test.odin
- [ ] findr/findr_test.odin
- [ ] findr/gitignore.odin
- [ ] findr/gitignore_test.odin
- [ ] findr/glob.odin
- [ ] findr/glob_test.odin
- [ ] findr/repos.odin
- [ ] findr/test_env.odin
- [ ] findr/walker.odin

View File

@@ -30,6 +30,10 @@ The application relies on external tools that need to be installed separately on
- Install via: `winget install sharkdp.fd` or `choco install fd` - Install via: `winget install sharkdp.fd` or `choco install fd`
- Alternative: `scoop install fd` - Alternative: `scoop install fd`
2. **git** - Version control system
- Install via: `winget install Git.Git` or download from git-scm.com
- Usually already available on most development machines
## Minor Compatibility Notes ## Minor Compatibility Notes
### File Permissions ### File Permissions
@@ -61,6 +65,7 @@ if editor == "" {
1. Install required dependencies: 1. Install required dependencies:
```powershell ```powershell
winget install sharkdp.fd winget install sharkdp.fd
winget install Git.Git
``` ```
2. Fix the path handling bug in `app/env_file.go:209` 2. Fix the path handling bug in `app/env_file.go:209`

View File

@@ -43,6 +43,13 @@ key somewhere, otherwise your data could be lost forever.`,
{"list", "envr list", "View your tracked files", "", {}}, {"list", "envr list", "View your tracked files", "", {}},
{"remove", "envr remove <path>", "Remove a .env file from your database", "", {}}, {"remove", "envr remove <path>", "Remove a .env file from your database", "", {}},
{"check", "envr check [path]", "Check if files are backed up", "", {}}, {"check", "envr check [path]", "Check if files are backed up", "", {}},
{
"deps",
"envr deps",
"Check for missing binaries",
"envr relies on external binaries for certain functionality.\n\nThe check command reports on which binaries are available and which are not.",
{},
},
{"version", "envr version", "Show envr's version", "", {}}, {"version", "envr version", "Show envr's version", "", {}},
{"edit-config", "envr edit-config", "Edit your config with your default editor", "", {}}, {"edit-config", "envr edit-config", "Edit your config with your default editor", "", {}},
{ {

View File

@@ -5,6 +5,8 @@ import "core:os"
import "core:path/filepath" import "core:path/filepath"
cmd_check :: proc(cmd: ^Command) { cmd_check :: proc(cmd: ^Command) {
feats := check_features()
check_path: string check_path: string
if len(cmd.args) > 0 { if len(cmd.args) > 0 {
check_path = cmd.args[0] check_path = cmd.args[0]
@@ -40,6 +42,15 @@ cmd_check :: proc(cmd: ^Command) {
files_in_path: [dynamic]string files_in_path: [dynamic]string
if is_dir { if is_dir {
if cant_scan(feats) {
fmt.wprintln(
cmd.err,
"Error: please install fd to use the check command (https://github.com/sharkdp/fd)",
flush = false,
)
return
}
scanned, scan_ok := scan_path(abs_path, db.cfg) scanned, scan_ok := scan_path(abs_path, db.cfg)
if !scan_ok { if !scan_ok {
fmt.wprintln(cmd.err, "Error scanning directory for .env files", flush = false) fmt.wprintln(cmd.err, "Error scanning directory for .env files", flush = false)

33
cmd_deps.odin Normal file
View File

@@ -0,0 +1,33 @@
package main
import "core:fmt"
import "core:os"
import "core:terminal"
// TODO: Improve table rendering
cmd_deps :: proc(cmd: ^Command) {
feats := check_features()
headers := []string{"Feature", "Status"}
rows: [dynamic][]string
if .Git in feats {
append(&rows, []string{"Git", "\u2713 Available"})
} else {
append(&rows, []string{"Git", "\u2717 Missing"})
}
if .Fd in feats {
append(&rows, []string{"fd", "\u2713 Available"})
} else {
append(&rows, []string{"fd", "\u2717 Missing"})
}
if terminal.is_terminal(os.stdout) {
render_table(cmd.out, headers, rows[:])
} else {
render_json_rows(cmd.out, headers, rows[:])
fmt.wprint(cmd.out, "\n", flush = false)
}
}

View File

@@ -6,6 +6,16 @@ import "core:os"
import "core:terminal" import "core:terminal"
cmd_scan :: proc(cmd: ^Command) { cmd_scan :: proc(cmd: ^Command) {
feats := check_features()
if cant_scan(feats) {
fmt.wprintln(
cmd.err,
"Error: please install fd to use the scan command (https://github.com/sharkdp/fd)",
flush = false,
)
return
}
db, db_ok := db_open(cmd.config_path) db, db_ok := db_open(cmd.config_path)
if !db_ok { if !db_ok {
return return

View File

@@ -6,8 +6,6 @@ import "core:os"
import "core:path/filepath" import "core:path/filepath"
import "core:strings" import "core:strings"
import "findr"
SshKeyPair :: struct { SshKeyPair :: struct {
Private: string `json:"private"`, Private: string `json:"private"`,
Public: string `json:"public"`, Public: string `json:"public"`,
@@ -220,7 +218,22 @@ search_paths :: proc(cfg: Config) -> (paths: [dynamic]string) {
find_git_roots :: proc(cfg: Config) -> (roots: [dynamic]string, ok: bool) { find_git_roots :: proc(cfg: Config) -> (roots: [dynamic]string, ok: bool) {
paths := search_paths(cfg) paths := search_paths(cfg)
findr.find_repos(paths[:], &roots, os.get_processor_core_count())
for sp in paths {
args := []string{"fd", "-H", "-t", "d", "^\\.git$", sp}
lines, fd_ok := run_fd(args)
if !fd_ok {
return
}
for line in lines {
cleaned, _ := filepath.clean(line)
parent := filepath.dir(cleaned)
cloned, _ := strings.clone(parent)
append(&roots, cloned)
}
}
ok = true ok = true
return return
} }

71
db.odin
View File

@@ -2,12 +2,12 @@ package main
import "core:crypto/hash" import "core:crypto/hash"
import "core:encoding/hex" import "core:encoding/hex"
import "core:encoding/ini"
import "core:encoding/json" import "core:encoding/json"
import "core:fmt" import "core:fmt"
import "core:os" import "core:os"
import "core:path/filepath" import "core:path/filepath"
import "core:strings" import "core:strings"
import "core:time"
import "sqlite" import "sqlite"
@@ -51,6 +51,14 @@ delete_envfile :: proc(f: ^EnvFile) {
delete(f.contents) delete(f.contents)
} }
make_temp_path :: proc() -> string {
ts := time.time_to_unix(time.now())
b: strings.Builder
strings.builder_init(&b)
defer strings.builder_destroy(&b)
fmt.sbprintf(&b, "/tmp/envr-%d-%d.db", os.get_pid(), ts)
return strings.to_string(b)
}
db_open :: proc(cfg_path: string) -> (Db, bool) { db_open :: proc(cfg_path: string) -> (Db, bool) {
cfg, ok := load_config(cfg_path) cfg, ok := load_config(cfg_path)
@@ -228,24 +236,59 @@ db_restore_from_encrypted :: proc(db: ^rawptr, cfg: Config) -> bool {
get_git_remotes :: proc(dir: string) -> [dynamic]string { get_git_remotes :: proc(dir: string) -> [dynamic]string {
remotes: [dynamic]string remotes: [dynamic]string
remote_set: map[string]bool remote_set: map[string]bool
defer delete(remote_set)
config_path, _ := filepath.join({dir, ".git", "config"}, context.temp_allocator) b: strings.Builder
m, _, ok := ini.load_map_from_path(config_path, context.allocator) strings.builder_init(&b)
if !ok { defer strings.builder_destroy(&b)
fmt.sbprintf(&b, "%s-git-remotes", make_temp_path())
tmp_path := strings.to_string(b)
tmp_file, tmp_err := os.open(tmp_path, os.O_CREATE | os.O_WRONLY | os.O_TRUNC)
if tmp_err != nil {
return remotes return remotes
} }
defer ini.delete_map(m)
for section_name, section in m { args := []string{"git", "remote", "-v"}
if strings.has_prefix(section_name, "remote ") { desc := os.Process_Desc {
if url, ok := section["url"]; ok { command = args,
remote_set[url] = true stdout = tmp_file,
stderr = nil,
working_dir = dir,
} }
p, start_err := os.process_start(desc)
os.close(tmp_file)
if start_err != nil {
os.remove(tmp_path)
return remotes
}
state, wait_err := os.process_wait(p)
if wait_err != nil || state.exit_code != 0 {
os.remove(tmp_path)
return remotes
}
data, read_err := os.read_entire_file_from_path(tmp_path, context.allocator)
defer delete(data)
os.remove(tmp_path)
if read_err != nil {
return remotes
}
lines := strings.split(string(data), "\n")
for &line in lines {
line = strings.trim_space(line)
if len(line) == 0 {
continue
}
parts := strings.fields(line)
if len(parts) >= 2 {
remote_set[parts[1]] = true
} }
} }
for remote in remote_set { for remote, _ in remote_set {
cloned, _ := strings.clone(remote) cloned, _ := strings.clone(remote)
append(&remotes, cloned) append(&remotes, cloned)
} }
@@ -473,6 +516,12 @@ update_dir :: proc(f: ^EnvFile, new_dir: string) {
} }
find_moved_dirs :: proc(d: ^Db, f: ^EnvFile) -> ([dynamic]string, bool) { find_moved_dirs :: proc(d: ^Db, f: ^EnvFile) -> ([dynamic]string, bool) {
feats := check_features()
if .Fd not_in feats || .Git not_in feats {
fmt.println("Error: fd and git are required for moved dir detection")
return {}, false
}
roots, roots_ok := find_git_roots(d.cfg) roots, roots_ok := find_git_roots(d.cfg)
if !roots_ok { if !roots_ok {
return {}, false return {}, false

View File

@@ -319,85 +319,11 @@ test_shares_remote_both_empty :: proc(t: ^testing.T) {
testing.expect(t, !shares_remote(&f, remotes), "both empty should not share") testing.expect(t, !shares_remote(&f, remotes), "both empty should not share")
} }
delete_remotes :: proc(remotes: [dynamic]string) {
for &r in remotes {
delete(r)
}
delete(remotes)
}
@(test) @(test)
test_get_git_remotes_single :: proc(t: ^testing.T) { test_make_temp_path_format :: proc(t: ^testing.T) {
base := fmt.tprintf("/tmp/envr-test-remotes-%d", os.get_pid()) p := make_temp_path()
os.mkdir_all(base) testing.expect(t, strings.has_suffix(p, ".db"), "should end with .db")
defer os.remove_all(base) testing.expect(t, strings.contains(p, fmt.tprintf("%d", os.get_pid())), "should contain PID")
git_dir := fmt.tprintf("%s/.git", base)
os.mkdir_all(git_dir)
config_content := "[core]\n\trepositoryformatversion = 0\n[remote \"origin\"]\n\turl = git@github.com:user/repo.git\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n"
config_path := fmt.tprintf("%s/config", git_dir)
err := os.write_entire_file(config_path, transmute([]u8)config_content)
testing.expect(t, err == nil, "should write .git/config")
remotes := get_git_remotes(base)
defer delete_remotes(remotes)
testing.expect(t, len(remotes) == 1, "should find 1 remote")
if len(remotes) != 1 do return
testing.expect_value(t, remotes[0], "git@github.com:user/repo.git")
}
@(test)
test_get_git_remotes_multiple :: proc(t: ^testing.T) {
base := fmt.tprintf("/tmp/envr-test-remotes-multi-%d", os.get_pid())
os.mkdir_all(base)
defer os.remove_all(base)
git_dir := fmt.tprintf("%s/.git", base)
os.mkdir_all(git_dir)
config_content := "[remote \"origin\"]\n\turl = git@github.com:user/repo.git\n[remote \"upstream\"]\n\turl = https://gitlab.com/upstream/repo.git\n"
config_path := fmt.tprintf("%s/config", git_dir)
err := os.write_entire_file(config_path, transmute([]u8)config_content)
testing.expect(t, err == nil, "should write .git/config")
remotes := get_git_remotes(base)
defer delete_remotes(remotes)
testing.expect(t, len(remotes) == 2, "should find 2 remotes")
}
@(test)
test_get_git_remotes_no_config :: proc(t: ^testing.T) {
base := fmt.tprintf("/tmp/envr-test-remotes-none-%d", os.get_pid())
os.mkdir_all(base)
defer os.remove_all(base)
remotes := get_git_remotes(base)
defer delete_remotes(remotes)
testing.expect(t, len(remotes) == 0, "should return empty when no .git/config")
}
@(test)
test_get_git_remotes_no_remotes :: proc(t: ^testing.T) {
base := fmt.tprintf("/tmp/envr-test-remotes-empty-%d", os.get_pid())
os.mkdir_all(base)
defer os.remove_all(base)
git_dir := fmt.tprintf("%s/.git", base)
os.mkdir_all(git_dir)
config_content := "[core]\n\trepositoryformatversion = 0\n\tbare = false\n"
config_path := fmt.tprintf("%s/config", git_dir)
err := os.write_entire_file(config_path, transmute([]u8)config_content)
testing.expect(t, err == nil, "should write .git/config")
remotes := get_git_remotes(base)
defer delete_remotes(remotes)
testing.expect(t, len(remotes) == 0, "should return empty when no remote sections")
} }
@(test) @(test)

View File

@@ -45,6 +45,7 @@ at before, restore your backup with:
* [envr backup](envr_backup.md) - Import a .env file into envr * [envr backup](envr_backup.md) - Import a .env file into envr
* [envr check](envr_check.md) - check if files in the current directory are backed up * [envr check](envr_check.md) - check if files in the current directory are backed up
* [envr deps](envr_deps.md) - Check for missing binaries
* [envr edit-config](envr_edit-config.md) - Edit your config with your default editor * [envr edit-config](envr_edit-config.md) - Edit your config with your default editor
* [envr init](envr_init.md) - Set up envr * [envr init](envr_init.md) - Set up envr
* [envr list](envr_list.md) - View your tracked files * [envr list](envr_list.md) - View your tracked files

24
docs/cli/envr_deps.md Normal file
View File

@@ -0,0 +1,24 @@
## envr deps
Check for missing binaries
### Synopsis
envr relies on external binaries for certain functionality.
The check command reports on which binaries are available and which are not.
```
envr deps [flags]
```
### Options
```
-h, --help help for deps
```
### SEE ALSO
* [envr](envr.md) - Manage your .env files.

51
features.odin Normal file
View File

@@ -0,0 +1,51 @@
package main
import "base:runtime"
import "core:mem"
import "core:os"
import "core:strings"
Feature :: enum {
Git,
Fd,
}
AvailableFeatures :: bit_set[Feature]
check_features :: proc() -> AvailableFeatures {
feats: AvailableFeatures
s: mem.Scratch
mem.scratch_init(&s, 4 * mem.DEFAULT_PAGE_SIZE)
defer mem.scratch_destroy(&s)
context.temp_allocator = mem.scratch_allocator(&s)
path_env := os.get_env("PATH", context.temp_allocator)
paths := strings.split(path_env, ":", context.temp_allocator)
if find_binary(paths, "git") != "" {
feats += {.Git}
}
if find_binary(paths, "fd") != "" {
feats += {.Fd}
}
return feats
}
find_binary :: proc(
paths: []string,
name: string,
allocator: runtime.Allocator = context.temp_allocator,
) -> string {
for p in paths {
candidate := strings.join({strings.trim_right(p, "/"), name}, "/", allocator)
_, err := os.stat(candidate, allocator)
if err == nil {
return candidate
}
}
return ""
}

34
features_test.odin Normal file
View File

@@ -0,0 +1,34 @@
package main
import "core:os"
import "core:strings"
import "core:testing"
@(test)
test_find_binary_exists :: proc(t: ^testing.T) {
path := os.get_env("PATH", context.temp_allocator)
paths := strings.split(path, ":", context.temp_allocator)
result := find_binary(paths, "sh")
testing.expect(t, result != "", "sh should be found on PATH")
}
@(test)
test_find_binary_not_exists :: proc(t: ^testing.T) {
old_path := os.get_env("PATH", context.temp_allocator)
defer {
if old_path != "" {
os.set_env("PATH", old_path)
}
}
os.set_env("PATH", "/tmp/envr-nope")
path := os.get_env("PATH", context.temp_allocator)
paths := strings.split(path, ":", context.temp_allocator)
result := find_binary(paths, "no_such_binary_xyz")
testing.expect(t, result == "", "nonexistent binary should not be found")
}

View File

@@ -0,0 +1,34 @@
# Performance Ideas
Current state after regex→glob migration. findr beats fd in 3/4 cases.
## Benchmark results (2026-06-17)
| Case | fd | findr | Ratio |
|------|------|-------|-------|
| 1 `-E .jj` | 172ms | 135ms | **1.27x faster** |
| 2 `-H` | 1.184s | 1.097s | **1.08x faster** |
| 3 `-HI` | 1.251s | 1.670s | **1.34x slower** |
| 4 `-E .git` | 274ms | 202ms | **1.36x faster** |
Case 3 (`-HI`) skips gitignore entirely, so it's pure I/O + allocation. System time is 2x fd's (12.1s vs 5.5s), pointing to syscall/allocation overhead.
## Completed
1. **Per-thread result buffers** — each thread accumulates locally, merges once at exit. Eliminates per-result mutex contention.
2. **Lean path join**`join_path`/`join_path_dir` use stack buffer + `copy` + single alloc instead of `strings.Builder` + `fmt.sbprintf` + `clone`.
3. **Regex→glob migration** — replaced regex NFA with backtracking glob matcher. Eliminated 27% of CPU spent on `add_thread`/`is_ignored`. Biggest win.
## Remaining ideas
1. **Larger getdents buffer** (8KB → 64KB+)
Fewer syscalls per directory with many entries. Low effort.
2. **Eliminate entry name cloning**
`strings.clone(name)` in `read_dir_entries` heap-allocates per dirent. Names are valid in the getdents buffer during `process_dir`, so the clone may be unnecessary. Low effort.
3. **Arena allocator per thread**
Bump allocator for all transient strings, free once at exit. Bigger change, helps everywhere.
4. **Batched channel** (fd's approach)
Replace global results array with buffered channel of batches. Enables streaming output and sorting like fd does.

342
findr/PLAN.md Normal file
View File

@@ -0,0 +1,342 @@
# findr — Native Odin File Finder (fd Replacement)
## Overview
findr is a native Odin file finder that replaces `fd` in envr. It supports three ignore modes for A/B benchmarking against specific fd commands, plus a unique "emit ONLY gitignored files" mode that gives envr a single-pass advantage over fd's double-run-and-diff approach.
## Directory Structure
```
findr/
findr.odin # main + CLI (hand-rolled arg parsing)
walker.odin # parallel directory walker (getdents + thread pool)
gitignore.odin # .gitignore parsing + glob→regex transpilation + matching
test_env.odin # test harness: temp dir, mock filesystem, assert helpers
findr_test.odin # integration tests
gitignore_test.odin # transpilation + matching unit tests (22 tests)
```
## CLI Interface
```
findr [-I] [--ignored] [--no-hidden] [-E <glob>]... [pattern] [path]...
```
Defaults: `include_hidden=true, ignore_mode=.Respected` (matches fd's `-H` behavior).
| fd command | findr equivalent |
|---|---|
| `fd -a \.env -E ... -HI ~/` | `findr -I -E ... \.env ~/` |
| `fd -a \.env -E ... -H ~/` | `findr -E ... \.env ~/` |
| `fd . -H ~/` | `findr ~/` |
| `fd . -HI ~/` | `findr -I ~/` |
| `fd . ~/` (no flags) | `findr --no-hidden ~/` |
| *(findr original)* | `findr --ignored ~/` |
## Build
```bash
odin build findr -o:speed -out:findr/findr
odin test findr
```
## Architecture
### Two Orthogonal Axes (matching fd's semantics)
1. **Hidden files** (`.` prefix): `include_hidden=true` includes them, `false` excludes them
2. **Gitignore**: three modes (see `IgnoreMode` below)
### Types
```odin
IgnoreMode :: enum {
Respected, // skip gitignored, prune ignored dirs (fd -H default)
All, // ignore .gitignore entirely, descend everywhere (fd -HI)
Ignored, // emit ONLY gitignored files, prune ignored dirs (findr original)
}
WalkOptions :: struct {
pattern: string, // regex on basename; "" = match all
excludes: []string, // glob patterns to skip entirely (fd -E)
include_hidden: bool, // true = include dotfiles (fd -H)
ignore_mode: IgnoreMode,
}
```
### process_dir Filtering Order Per Entry
Each directory traversal carries a `WorkItem` with the absolute path, a relative path from repo root, and a `^GIContext` linked list of gitignore contexts (one per ancestor directory with a `.gitignore`).
1. Skip `.git` directory
2. **Load nested `.gitignore`**: If this directory has a `.gitignore`, push a new `GIContext` onto the chain (tracked in `pool.all_contexts` for cleanup)
3. **Per entry**:
- Skip non-regular files (symlinks, sockets, etc. — parity with `fd -t f`)
- **Excludes**: if entry matches any exclude glob → skip entirely
- **Hidden**: if `!include_hidden && name[0] == '.'` → skip entirely
- **Gitignore status**: check `GIContext` chain deepest-to-root via `check_chain`, passing the **relative path** (not basename). First match wins (correct gitignore precedence). Nested negation overrides parent rules.
- **Mode-based decision**:
| Mode | gitignored file | gitignored dir | normal file | normal dir |
|---|---|---|---|---|
| `.All` | emit if pattern matches | descend | emit if pattern matches | descend |
| `.Respected` | skip | prune | emit if pattern matches | descend |
| `.Ignored` | emit if pattern matches | prune | skip | descend |
**Nested repos**: When a directory contains `.git/`, the gitignore context chain is reset (new repo root). The relative path resets to `""`. Nested repos are always traversed to find deeper repos.
### Performance Architecture
- **Stat avoidance via `dirent.type`** — Uses `core:sys/linux` getdents directly, bypassing `core:os` which calls `openat` + `fstat` per entry.
- **Prune ignored directories** — When a directory matches a gitignore/exclude pattern, it is not descended into.
- **Parallel traversal** — Worker thread pool with shared LIFO queue and futex-based semaphore signaling. 5.4x speedup over serial on home directory.
## Decisions
- **Gitignore matching**: Transpile gitignore glob patterns to regex, then use `core:text/regex`. No dedicated glob matcher.
- **Pattern matching**: Pattern is a regex (same as fd), matched against basename via `regex.match` (unanchored search).
- **Excludes**: Glob patterns compiled via the same gitignore transpiler (`parse()`). Reuses tested transpilation logic.
- **Nested gitignore**: Every `.gitignore` file within a repo is read, not just the root. Each directory's rules are scoped relative to that directory's path. Negation in a child overrides parent rules (correct gitignore precedence).
- **Stat avoidance**: Use `core:sys/linux` getdents directly — read `dirent.type` from the kernel, never call stat. `DT_UNKNOWN` treated as regular file (correct for ext4/tmpfs; may miss dirs on XFS/BTRFS/FUSE — Phase 7 concern).
## Testing Strategy
- **In-process integration tests** — Tests call `walk()` directly (not via subprocess), build mock filesystems in temp dirs, and compare sorted output.
- **Unit tests** — Pure-function tests for glob→regex transpilation and gitignore matching.
- **Output sorting for determinism** — Always sort output lines before comparison.
- **Memory tracking** — Odin's test runner reports leaks automatically.
### Test Coverage (findr_test.odin)
**`.Ignored` mode (original findr behavior):**
| Test | What it covers |
|---|---|
| `test_basic_gitignored` | Repo with `.gitignore`, gitignored files emitted, normal files skipped |
| `test_non_repo_not_scanned` | Dirs without `.git/` produce no output |
| `test_negation_pattern` | `!prod.env` un-ignores a file |
| `test_dir_only_pattern` | `node_modules/` pattern doesn't emit file results |
| `test_multiple_repos` | Multiple repos in one tree, each with its own `.gitignore` |
| `test_nested_repos` | Repo inside a repo, both scanned independently |
| `test_no_gitignore_file` | Repo with `.git/` but no `.gitignore` produces nothing |
| `test_empty_gitignore` | Comments and blank lines only → no results |
| `test_multiple_search_dirs` | Multiple top-level search dirs in one call |
| `test_nested_gitignore_read` | Nested `.gitignore` rules applied (subdir patterns work) |
| `test_nested_gitignore_negation` | Nested negation overrides parent pattern |
| `test_multisegment_pattern` | `build/output.txt` matches relative path, not just basename |
**`.All` mode (fd -HI parity):**
| Test | What it covers |
|---|---|
| `test_all_mode_emits_all_files` | All files emitted regardless of gitignore |
| `test_all_mode_descends_everywhere` | Gitignored dirs still descended |
**`.Respected` mode (fd -H parity):**
| Test | What it covers |
|---|---|
| `test_respected_mode_skips_gitignored` | Gitignored files skipped |
| `test_respected_mode_prunes_ignored_dirs` | Gitignored dirs pruned |
| `test_nested_gitignore_respected_mode` | Nested negation respected in `.Respected` mode |
**Filters:**
| Test | What it covers |
|---|---|
| `test_excludes_prune_dirs` | Excluded dirs not descended |
| `test_pattern_filters_results` | Only pattern-matching files emitted |
| `test_no_hidden_skips_dotfiles` | Hidden files skipped when include_hidden=false |
## Glob→Regex Transpilation Rules
| Gitignore pattern | Regex | Notes |
|---|---|---|
| `foo` | `(^|/)foo(/.*)?$` | matches at any depth |
| `/foo` | `^foo(/.*)?$` | anchored to gitignore dir |
| `foo/` | `(^|/)foo/.*$` | directory only |
| `*.log` | `(^|/)[^/]*\.log$` | `*` = any chars except `/` |
| `**/foo` | `(^|/)(.*/)?foo(/.*)?$` | `**` = any chars including `/` |
| `foo/**/bar` | `(^|/)foo/(.*/)?bar(/.*)?$` | `**` between segments |
| `!pattern` | (handled by layer) | negation flag, not regex |
| `#comment` | (skipped) | |
| `[abc]` | `[abc]` | same regex syntax |
| `?` | `[^/]` | single char, no `/` |
## Implementation Phases
### Phase 1: Gitignore Transpiler + Tests ✅
22 tests, all passing, zero leaks.
### Phase 2: findr Walker + Tests ✅
Parallel DFS using getdents with worker thread pool. 32 total tests pass, zero leaks.
### Phase 3: Parallel Traversal ✅
8-worker thread pool, shared LIFO queue, futex-based semaphore. 852ms vs 4.57s serial (5.4x speedup). Serial code removed — parallel is the only implementation.
### Phase 4: Benchmark ✅
findr found 227 gitignored files on `~` in 852ms. fd's double-run walked ~1.1M entries.
### Phase 5: fd-Parity API ✅
**Goal:** Make findr replicate specific fd commands for A/B benchmarking, plus keep the unique gitignored-only mode.
**Built:**
- `IgnoreMode` enum (`.Respected`, `.All`, `.Ignored`) and `WalkOptions` struct
- New `walk` signature: `walk(root, results, opts: WalkOptions, thread_count)`
- Rewritten `process_dir` with centralized mode-based filtering
- Pattern matching via `core:text/regex` on basenames
- Exclude patterns compiled via existing `gitignore.parse()`
- CLI arg parsing: `-I`, `--ignored`, `--no-hidden`, `-E <glob>`
- 7 new integration tests (17 total) covering all three modes, excludes, pattern, and hidden filtering
**Result:** All tests pass (22 gitignore + 20 walker = 42), zero leaks.
### Phase 6: Parity (partially done)
**Goal:** Achieve file-count parity with fd. An invalid benchmark (different result sets) is useless.
#### Steps 1-2: Nested gitignore + relative path matching ✅
**What was done:**
1. **`Match` enum + `check_match`** in `gitignore.odin` — Tri-state return (`None`/`Ignored`/`Unignored`) so nested negation overrides work correctly. `is_ignored` wraps it as before.
2. **`GIContext` linked list** in `walker.odin` — Each context holds a `^Gitignore`, `base_rel` (relative path from repo root to this dir), and `parent: ^GIContext`. `process_dir` loads `.gitignore` in every directory within a repo (not just roots). `check_chain` walks deepest-to-root, first match wins (correct gitignore precedence).
3. **`WorkItem` struct** replaced plain `string` in the work queue:
```odin
WorkItem :: struct {
path: string, // absolute directory path
rel: string, // relative path from repo root ("" = root)
gi_ctx: ^GIContext, // gitignore chain (nil = outside any repo)
}
```
4. **Relative path matching** — `check_chain` strips each context's `base_rel` prefix to get the locally-scoped relative path. Multi-segment patterns like `build/output.txt` now match correctly.
5. **Symlink filtering** — Only `DT_REG` and `DT_UNKNOWN` entries are emitted (matching `fd -t f`). Symlinks (`DT_LNK`) are skipped.
6. **`DT_UNKNOWN` handling** — Treated as regular files (no stat fallback). Correct for ext4/tmpfs; may miss directories on XFS/BTRFS/FUSE.
**Memory management:** All `GIContext` objects tracked in `pool.all_contexts` (mutex-protected append). Gitignore objects and context structs freed in bulk when `walk` completes.
**Parity achieved** (`~`, 5M+ files):
| Mode | findr | fd equivalent | diff |
|---|---|---|---|
| `.All` (-I) | 5,426,451 | `fd -HI -t f --exclude .git` | **0 (exact)** |
| `.Respected` | 4,442,505 | `fd -H -t f --exclude .git` | +1,417 (0.03%) |
| `--no-hidden` | 393,605 | `fd -t f --exclude .git` | +17 (0.004%) |
On the envr repo itself, all three modes are **exact match (0 diffs)**. The tiny residual diffs on `~` are likely from global gitignore (`~/.config/git/ignore`) and `.git/info/exclude` which fd reads but findr doesn't.
#### Step 3: DT_UNKNOWN stat fallback (TODO)
On XFS/BTRFS/FUSE filesystems, `dirent.type` returns `DT_UNKNOWN`. Currently findr treats these as regular files, which means directories may be missed (not descended into). Add a stat fallback in `read_dir_entries` when `d.type == .UNKNOWN` to determine the real type before proceeding. This is not needed for ext4/tmpfs (what tests and most Linux systems use).
### Phase 7: Performance Optimization (next)
**Goal:** Make findr competitive with or faster than fd across all modes. Current benchmark (`~`, hyperfine 5 runs):
| Command | Mean | vs fd equivalent |
|---|---|---|
| `findr --ignored` | 984ms | *(no fd equivalent)* |
| `findr --no-hidden` | 542ms | 3.2x slower than `fd -t f` (170ms) |
| `findr` (respected) | 4.134s | 2.4x slower than `fd -H -t f` (1.745s) |
| `findr -I` (all) | 3.821s | 1.9x slower than `fd -HI -t f` (1.972s) |
**Bottleneck analysis:**
1. **Mutex contention on result collection** — Every file append goes through `sync.mutex_lock(&pool.results_mutex)` → `append` → `sync.mutex_unlock`. With 5M+ files across 16 threads, workers serialize on the mutex.
2. **`--ignored` regression** — Was 402ms before nested gitignore support, now 984ms. The overhead comes from loading `.gitignore` in every directory and checking the context chain per entry. Since `--ignored` mode prunes gitignored dirs, many of these `.gitignore` loads are wasted (the dir won't be descended into anyway). Optimization: skip loading `.gitignore` for directories that will be pruned.
3. **Per-string heap allocation** — Every path string is individually `strings.clone`'d and `delete`'d. Millions of alloc/free calls.
**Optimization plan:**
1. **Per-thread result buffers** — Each worker accumulates results in a thread-local `[dynamic]string`. Merge into shared array once at the end (single-threaded concat).
2. **Lazy gitignore loading for `.Ignored` mode** — Only load `.gitignore` when we need to decide whether to emit or descend. In `.Ignored` mode, we can check the parent context first and skip loading if the directory itself is already ignored.
3. **Arena allocator for paths** — Replace per-string `strings.clone` with a bump allocator. Free everything in one `arena_destroy` at the end.
4. **Larger getdents buffer** — Increase from 8KB to 64KB to reduce syscall count.
5. **BufWriter on stdout** — Batch `write` syscalls instead of per-line `fmt.println`.
**Success criteria:**
- `.All` mode faster than `fd -HI -t f --exclude .git`
- `.Respected` mode faster than `fd -H -t f --exclude .git`
- `--ignored` mode faster than `fd -HI -t f --exclude .git` (restore pre-regression advantage)
- Re-benchmark after each step using `findr/bench.sh`
### Phase 8: Integrate into envr
**Goal:** Replace ALL `fd` subprocess usage in envr with in-process findr calls. Remove `Feature.Fd` entirely.
#### Part A: Rewrite `scan_path` (`scan.odin`)
Replace the double-run-and-diff approach with a single `findr.walk` call using `.Ignored` mode:
```odin
// Before: fd -HI + fd -H, then diff
// After:
findr.walk(search_path, &paths, WalkOptions{
pattern = cfg.ScanConfig.Matcher,
excludes = cfg.ScanConfig.Exclude[:],
include_hidden = true,
ignore_mode = .Ignored,
}, thread_count)
```
**Delete:** `build_fd_args`, `run_fd`, `next_fd_tmp_path`, `fd_counter`, `fd_seq`, `cant_scan`.
#### Part B: Add `find_repos` and rewrite `find_git_roots` (`config.odin`)
Add a `find_repos` proc to findr that walks a tree and collects directories containing `.git/`:
```odin
find_repos :: proc(root: string, results: ^[dynamic]string, thread_count: int)
```
- Reuses worker pool architecture
- `process_dir` emits `dir_path` when `has_git == true`
- Always descends into subdirs (except `.git`) to find nested repos
- No gitignore/exclude/pattern processing
Replace `find_git_roots`'s `run_fd` call with `findr.find_repos`.
#### Part C: Remove `Feature.Fd` everywhere
| File | Change |
|---|---|
| `features.odin` | Remove `Fd` from enum, remove fd binary check |
| `cmd_scan.odin` | Remove feats/cant_scan guard + "install fd" error |
| `cmd_check.odin` | Same removal |
| `cmd_deps.odin` | Remove fd table row |
| `db.odin` | Change check to `.Git not_in feats` only; update error message |
| `scan_test.odin` | Remove `cant_scan` tests and assertions |
#### Part D: Verification
```bash
odin build findr -o:speed -out:findr/findr
odin test findr
odin build . -o:speed -out:envr
odin test .
```
## Risks
| Risk | Mitigation |
|---|---|
| Gitignore edge cases (`**/foo`, `foo/**/bar`) | Comprehensive gitignore_test.odin with spec examples |
| `DT_UNKNOWN` on XFS/BTRFS/FUSE | Phase 6 Step 3: stat fallback for unknown types |
| Global gitignore (`~/.config/git/ignore`) and `.git/info/exclude` not read | Causes ~0.03% delta vs fd. Acceptable for envr's use case (finds `.env` files in repos). |
| Thread safety of `regex.match` on shared `Regular_Expression` | Odin regex is read-only after compilation; `match` returns per-call `Captures` |

71
findr/bench.sh Executable file
View File

@@ -0,0 +1,71 @@
#!/usr/bin/env bash
set -euo pipefail
BENCH_DIR="$(cd "$(dirname "$0")" && pwd)"
TARGET="${1:-$HOME}"
RESULTS_FILE="$BENCH_DIR/bench-results.md"
FINDR="$BENCH_DIR/findr"
echo "=== findr benchmark suite ==="
echo "Target: $TARGET"
echo
# --- pre-flight checks ---
if ! command -v fd &>/dev/null; then
echo "ERROR: fd is not on PATH" >&2
exit 1
fi
if ! command -v hyperfine &>/dev/null; then
echo "ERROR: hyperfine is not on PATH" >&2
exit 1
fi
# --- build findr if missing or stale ---
NEEDS_BUILD=false
if [[ ! -f "$BENCH_DIR/findr" ]]; then
NEEDS_BUILD=true
else
# rebuild if any .odin source is newer than the binary
if find "$BENCH_DIR" -name '*.odin' -newer "$BENCH_DIR/findr" | grep -q .; then
NEEDS_BUILD=true
fi
fi
if $NEEDS_BUILD; then
echo "Building findr..."
odin build "$BENCH_DIR" -o:speed -out:"$BENCH_DIR/findr"
fi
echo
# --- file counts ---
echo "=== File counts ==="
printf " fd -a -E .jj . : %8d\n" "$(fd -a -E .jj . "$TARGET" 2>/dev/null | wc -l)"
printf " findr -E .jj : %8d\n" "$("$FINDR" -E .jj "$TARGET" 2>/dev/null | wc -l)"
echo
printf " fd -a -E .git -E .jj -H . : %8d\n" "$(fd -a -E .git -E .jj -H . "$TARGET" 2>/dev/null | wc -l)"
printf " findr -E .git -E .jj -H : %8d\n" "$("$FINDR" -E .git -E .jj -H "$TARGET" 2>/dev/null | wc -l)"
echo
printf " fd -a -E .git -E .jj -HI . : %8d\n" "$(fd -a -E .git -E .jj -HI . "$TARGET" 2>/dev/null | wc -l)"
printf " findr -E .git -E .jj -HI : %8d\n" "$("$FINDR" -E .git -E .jj -HI "$TARGET" 2>/dev/null | wc -l)"
echo
printf " fd -a -E .git -E .jj . : %8d\n" "$(fd -a -E .git -E .jj . "$TARGET" 2>/dev/null | wc -l)"
printf " findr -E .git -E .jj : %8d\n" "$("$FINDR" -E .git -E .jj "$TARGET" 2>/dev/null | wc -l)"
echo
# --- benchmarks ---
echo "=== Benchmarks (hyperfine, 5 runs, 2 warmups) ==="
echo
hyperfine \
--warmup 2 \
--runs 5 \
--export-markdown "$RESULTS_FILE" \
"fd -a -E .jj . \"$TARGET\" > /dev/null" \
"$FINDR -E .jj \"$TARGET\" > /dev/null" \
"fd -a -E .git -E .jj -H . \"$TARGET\" > /dev/null" \
"$FINDR -E .git -E .jj -H \"$TARGET\" > /dev/null" \
"fd -a -E .git -E .jj -HI . \"$TARGET\" > /dev/null" \
"$FINDR -E .git -E .jj -HI \"$TARGET\" > /dev/null" \
"fd -a -E .git -E .jj . \"$TARGET\" > /dev/null" \
"$FINDR -E .git -E .jj \"$TARGET\" > /dev/null"
echo
echo "=== Results written to $RESULTS_FILE ==="

27
findr/f.nu Executable file
View File

@@ -0,0 +1,27 @@
#!/usr/bin/env nu
def main [] {
let all = (fd -HI -a .env . ~/ | lines | sort)
let unignored = (fd -H -a .env ~/ | lines | sort)
$all | filter { |it| not ($it in $unignored) } | str join "\n"
# sorted_list_intersect $all $unignored | str join "\n"
}
def sorted_list_intersect [xs1: list, xs2: list] {
let len1 = ($xs1 | length)
let len2 = ($xs2 | length)
mut i = 0
mut j = 0
while ($i < $len1 and $j < $len2) {
if ($xs1 | get $i) < ($xs2 | get $j) {
$i = $i + 1
} else if ($xs2 | get $j) < ($xs1 | get $i) {
$j = $j + 1
} else {
echo ($xs2 | get $j)
$i = $i + 1
$j = $j + 1
}
}
}

91
findr/findr.odin Normal file
View File

@@ -0,0 +1,91 @@
package findr
import "core:bufio"
import "core:os"
import "core:strings"
main :: proc() {
prof_init()
defer prof_destroy()
args := os.args
opts: WalkOptions
opts.include_hidden = false
opts.ignore_mode = .Respected
excludes := make([dynamic]string)
defer delete(excludes)
pattern := ""
paths := make([dynamic]string)
defer delete(paths)
i := 1
for i < len(args) {
arg := args[i]
switch {
case arg == "--ignored":
opts.ignore_mode = .Ignored
case arg == "-E":
i += 1
if i < len(args) {
append(&excludes, args[i])
}
case strings.has_prefix(arg, "-E"):
append(&excludes, arg[2:])
case len(arg) > 1 && arg[0] == '-':
for c, j in arg[1:] {
switch c {
case 'H':
opts.include_hidden = true
case 'I':
opts.ignore_mode = .All
case 'a':
// no-op: accepted for fd compatibility
}
}
case:
if pattern == "" {
pattern = arg
} else {
append(&paths, arg)
}
}
i += 1
}
if len(paths) == 0 && pattern != "" && os.exists(pattern) {
append(&paths, pattern)
pattern = ""
}
opts.pattern = pattern
if len(excludes) > 0 {
opts.excludes = excludes[:]
}
if len(paths) == 0 {
append(&paths, ".")
}
results := make([dynamic]string)
defer {
for r in results {delete(r)}
delete(results)
}
thread_count := os.get_processor_core_count()
walk(paths[:], &results, opts, thread_count)
w: bufio.Writer
bufio.writer_init(&w, os.to_stream(os.stdout), 1 << 13)
defer bufio.writer_destroy(&w)
for r in results {
bufio.writer_write_string(&w, r)
bufio.writer_write_byte(&w, '\n')
}
bufio.writer_flush(&w)
}

View File

@@ -7,7 +7,7 @@ import "core:sys/linux"
import "core:testing" import "core:testing"
// ============================================================================ // ============================================================================
// Gitignored file emission tests (emit ONLY gitignored files, descend everywhere) // .Ignored mode tests (original findr behavior — emit ONLY gitignored files)
// ============================================================================ // ============================================================================
@(test) @(test)
@@ -21,7 +21,7 @@ test_basic_gitignored :: proc(t: ^testing.T) {
create_file(env, "repo/secrets.env") create_file(env, "repo/secrets.env")
create_file(env, "repo/normal.txt") create_file(env, "repo/normal.txt")
assert_output(t, env, nil, {}, { assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"repo/.env", "repo/secrets.env", "repo/.env", "repo/secrets.env",
}) })
} }
@@ -35,7 +35,7 @@ test_non_repo_not_scanned :: proc(t: ^testing.T) {
create_file(env, "norepo/.gitignore", "*.env\n") create_file(env, "norepo/.gitignore", "*.env\n")
create_file(env, "norepo/.env") create_file(env, "norepo/.env")
assert_output_empty(t, env, nil, {}) assert_output_empty(t, env, nil, {include_hidden = true, ignore_mode = .Ignored})
} }
@(test) @(test)
@@ -49,11 +49,28 @@ test_negation_pattern :: proc(t: ^testing.T) {
create_file(env, "repo/secrets.env") create_file(env, "repo/secrets.env")
create_file(env, "repo/prod.env") create_file(env, "repo/prod.env")
assert_output(t, env, nil, {}, { assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"repo/.env", "repo/secrets.env", "repo/.env", "repo/secrets.env",
}) })
} }
@(test)
test_dir_only_pattern :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "node_modules/\n")
create_file(env, "repo/node_modules", "should not match (it's a file)")
create_dir(env, "repo/ignored_dir")
create_file(env, "repo/.gitignore", "ignored_dir/\n")
assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"repo/ignored_dir/",
})
}
@(test) @(test)
test_multiple_repos :: proc(t: ^testing.T) { test_multiple_repos :: proc(t: ^testing.T) {
env := create_test_env() env := create_test_env()
@@ -67,7 +84,7 @@ test_multiple_repos :: proc(t: ^testing.T) {
create_file(env, "repo2/.gitignore", "*.key\n") create_file(env, "repo2/.gitignore", "*.key\n")
create_file(env, "repo2/secret.key") create_file(env, "repo2/secret.key")
assert_output(t, env, nil, {}, { assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"repo1/a.env", "repo2/secret.key", "repo1/a.env", "repo2/secret.key",
}) })
} }
@@ -85,7 +102,7 @@ test_nested_repos :: proc(t: ^testing.T) {
create_file(env, "parent/child/.gitignore", "*.key\n") create_file(env, "parent/child/.gitignore", "*.key\n")
create_file(env, "parent/child/api.key") create_file(env, "parent/child/api.key")
assert_output(t, env, nil, {}, { assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"parent/top.env", "parent/child/api.key", "parent/top.env", "parent/child/api.key",
}) })
} }
@@ -102,7 +119,10 @@ test_nested_gitignore_read :: proc(t: ^testing.T) {
create_file(env, "repo/sub/secret.txt") create_file(env, "repo/sub/secret.txt")
create_file(env, "repo/sub/.env") create_file(env, "repo/sub/.env")
assert_output(t, env, nil, {}, { // Both root and nested .gitignore are read.
// secret.txt: ignored by sub/.gitignore (*.txt)
// .env: ignored by root .gitignore (*.env)
assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"repo/sub/secret.txt", "repo/sub/.env", "repo/sub/secret.txt", "repo/sub/.env",
}) })
} }
@@ -119,11 +139,34 @@ test_nested_gitignore_negation :: proc(t: ^testing.T) {
create_file(env, "repo/sub/important.log") create_file(env, "repo/sub/important.log")
create_file(env, "repo/sub/debug.log") create_file(env, "repo/sub/debug.log")
assert_output(t, env, nil, {}, { // Nested negation overrides root pattern.
// important.log: un-ignored by sub/.gitignore → NOT emitted in .Ignored mode
// debug.log: still ignored by root → emitted
assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"repo/sub/debug.log", "repo/sub/debug.log",
}) })
} }
@(test)
test_nested_gitignore_respected_mode :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.log\n")
create_dir(env, "repo/sub")
create_file(env, "repo/sub/.gitignore", "!important.log\n")
create_file(env, "repo/sub/important.log")
create_file(env, "repo/sub/debug.log")
// In .Respected mode:
// important.log: un-ignored by nested negation → emitted
// debug.log: ignored by root → skipped
assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Respected}, {
"repo/", "repo/.gitignore", "repo/sub/", "repo/sub/.gitignore", "repo/sub/important.log",
})
}
@(test) @(test)
test_multisegment_pattern :: proc(t: ^testing.T) { test_multisegment_pattern :: proc(t: ^testing.T) {
env := create_test_env() env := create_test_env()
@@ -136,7 +179,11 @@ test_multisegment_pattern :: proc(t: ^testing.T) {
create_file(env, "repo/build/other.txt") create_file(env, "repo/build/other.txt")
create_file(env, "repo/output.txt") create_file(env, "repo/output.txt")
assert_output(t, env, nil, {}, { // Multi-segment pattern matches relative path, not just basename.
// build/output.txt: matches → ignored
// build/other.txt: doesn't match → not ignored
// output.txt: doesn't match (needs build/ prefix) → not ignored
assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {
"repo/build/output.txt", "repo/build/output.txt",
}) })
} }
@@ -149,7 +196,7 @@ test_no_gitignore_file :: proc(t: ^testing.T) {
create_git_repo(env, "repo") create_git_repo(env, "repo")
create_file(env, "repo/.env") create_file(env, "repo/.env")
assert_output_empty(t, env, nil, {}) assert_output_empty(t, env, nil, {include_hidden = true, ignore_mode = .Ignored})
} }
@(test) @(test)
@@ -161,7 +208,7 @@ test_empty_gitignore :: proc(t: ^testing.T) {
create_file(env, "repo/.gitignore", "\n\n# comment\n\n") create_file(env, "repo/.gitignore", "\n\n# comment\n\n")
create_file(env, "repo/.env") create_file(env, "repo/.env")
assert_output_empty(t, env, nil, {}) assert_output_empty(t, env, nil, {include_hidden = true, ignore_mode = .Ignored})
} }
@(test) @(test)
@@ -189,7 +236,7 @@ test_multiple_search_dirs :: proc(t: ^testing.T) {
delete(results) delete(results)
} }
opts := WalkOptions{} opts := WalkOptions{include_hidden = true, ignore_mode = .Ignored}
thread_count := os.get_processor_core_count() thread_count := os.get_processor_core_count()
walk({dir1, dir2}, &results, opts, thread_count) walk({dir1, dir2}, &results, opts, thread_count)
@@ -219,46 +266,78 @@ test_multiple_search_dirs :: proc(t: ^testing.T) {
} }
// ============================================================================ // ============================================================================
// Ignored directory recursion tests // .All mode tests (fd -HI parity — ignore gitignore entirely)
// ============================================================================ // ============================================================================
@(test) @(test)
test_ignored_dir_descended :: proc(t: ^testing.T) { test_all_mode_emits_all_files :: proc(t: ^testing.T) {
env := create_test_env() env := create_test_env()
defer destroy_test_env(&env) defer destroy_test_env(&env)
create_git_repo(env, "repo") create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "secrets/\n") create_file(env, "repo/.gitignore", "*.env\n")
create_dir(env, "repo/secrets") create_file(env, "repo/.env")
create_file(env, "repo/secrets/.env") create_file(env, "repo/secrets.env")
create_file(env, "repo/secrets/api.key") create_file(env, "repo/normal.txt")
// Ignored dir's contents are emitted AND descended into assert_output(t, env, nil, {include_hidden = true, ignore_mode = .All}, {
assert_output(t, env, nil, {}, { "repo/", "repo/.env", "repo/.gitignore", "repo/secrets.env", "repo/normal.txt",
"repo/secrets/", "repo/secrets/.env", "repo/secrets/api.key",
}) })
} }
@(test) @(test)
test_nested_ignored_dir :: proc(t: ^testing.T) { test_all_mode_descends_everywhere :: proc(t: ^testing.T) {
env := create_test_env() env := create_test_env()
defer destroy_test_env(&env) defer destroy_test_env(&env)
create_git_repo(env, "repo") create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "build/\n") create_file(env, "repo/.gitignore", "build/\n")
create_dir(env, "repo/build") create_dir(env, "repo/build")
create_dir(env, "repo/build/sub")
create_file(env, "repo/build/output.txt") create_file(env, "repo/build/output.txt")
create_file(env, "repo/build/sub/deep.env")
assert_output(t, env, nil, {}, { assert_output(t, env, nil, {include_hidden = true, ignore_mode = .All}, {
"repo/build/", "repo/build/output.txt", "repo/", "repo/.gitignore", "repo/build/", "repo/build/output.txt",
"repo/build/sub/", "repo/build/sub/deep.env",
}) })
} }
// ============================================================================ // ============================================================================
// Filter tests (excludes, pattern) // .Respected mode tests (fd -H parity — skip gitignored, prune ignored dirs)
// ============================================================================
@(test)
test_respected_mode_skips_gitignored :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.env\n")
create_file(env, "repo/.env")
create_file(env, "repo/secrets.env")
create_file(env, "repo/normal.txt")
assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Respected}, {
"repo/", "repo/.gitignore", "repo/normal.txt",
})
}
@(test)
test_respected_mode_prunes_ignored_dirs :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "build/\n")
create_dir(env, "repo/build")
create_file(env, "repo/build/output.txt")
create_file(env, "repo/main.txt")
assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Respected}, {
"repo/", "repo/.gitignore", "repo/main.txt",
})
}
// ============================================================================
// Filter tests (excludes, pattern, hidden)
// ============================================================================ // ============================================================================
@(test) @(test)
@@ -273,7 +352,7 @@ test_excludes_prune_dirs :: proc(t: ^testing.T) {
create_file(env, "repo/vendor/lib.env") create_file(env, "repo/vendor/lib.env")
assert_output(t, env, nil, assert_output(t, env, nil,
{excludes = {"vendor"}}, {include_hidden = true, ignore_mode = .Ignored, excludes = {"vendor"}},
{"repo/.env"}, {"repo/.env"},
) )
} }
@@ -290,13 +369,30 @@ test_pattern_filters_results :: proc(t: ^testing.T) {
create_file(env, "repo/master.key") create_file(env, "repo/master.key")
assert_output(t, env, nil, assert_output(t, env, nil,
{pattern = "\\.env$"}, {pattern = "\\.env$", include_hidden = true, ignore_mode = .Ignored},
{"repo/.env", "repo/secrets.env"}, {"repo/.env", "repo/secrets.env"},
) )
} }
@(test)
test_no_hidden_skips_dotfiles :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.env\n")
create_file(env, "repo/.env")
create_file(env, "repo/secrets.env")
create_file(env, "repo/.hidden.env")
assert_output(t, env, nil,
{include_hidden = false, ignore_mode = .Ignored},
{"repo/secrets.env"},
)
}
// ============================================================================ // ============================================================================
// Special file type tests // Special file type tests (SOCK, FIFO, CHR, BLK parity with fd)
// ============================================================================ // ============================================================================
@(test) @(test)
@@ -305,7 +401,7 @@ test_fifo_emitted :: proc(t: ^testing.T) {
defer destroy_test_env(&env) defer destroy_test_env(&env)
create_git_repo(env, "repo") create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.env\n*.fifo\n") create_file(env, "repo/.gitignore", "*.env\n")
fifo_path := join_path(env.temp_dir, "repo/test.fifo") fifo_path := join_path(env.temp_dir, "repo/test.fifo")
defer delete(fifo_path) defer delete(fifo_path)
@@ -314,7 +410,65 @@ test_fifo_emitted :: proc(t: ^testing.T) {
linux.mknod(cpath, linux.S_IFIFO | linux.Mode{.IRUSR, .IWUSR}, 0) linux.mknod(cpath, linux.S_IFIFO | linux.Mode{.IRUSR, .IWUSR}, 0)
assert_output(t, env, nil, assert_output(t, env, nil,
{pattern = "\\.fifo$"}, {include_hidden = true, ignore_mode = .All},
{"repo/test.fifo"}, {"repo/", "repo/.gitignore", "repo/test.fifo"},
)
}
// ============================================================================
// in_repo propagation tests
// ============================================================================
@(test)
test_repo_without_root_gitignore :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_dir(env, "repo/sub")
create_file(env, "repo/sub/.gitignore", "*.tmp\n")
create_file(env, "repo/sub/file.tmp")
create_file(env, "repo/sub/file.txt")
assert_output(t, env, nil,
{include_hidden = true, ignore_mode = .Respected},
{"repo/", "repo/sub/", "repo/sub/.gitignore", "repo/sub/file.txt"},
)
}
// ============================================================================
// .ignore file support tests (fd respects .ignore in addition to .gitignore)
// ============================================================================
@(test)
test_ignore_file_respected :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.ignore", "*.tmp\n")
create_file(env, "repo/file.tmp")
create_file(env, "repo/file.txt")
assert_output(t, env, nil,
{include_hidden = true, ignore_mode = .Respected},
{"repo/", "repo/.ignore", "repo/file.txt"},
)
}
@(test)
test_ignore_overrides_gitignore :: proc(t: ^testing.T) {
env := create_test_env()
defer destroy_test_env(&env)
create_git_repo(env, "repo")
create_file(env, "repo/.gitignore", "*.log\n")
create_file(env, "repo/.ignore", "important.log\n")
create_file(env, "repo/debug.log")
create_file(env, "repo/important.log")
assert_output(t, env, nil,
{include_hidden = true, ignore_mode = .Respected},
{"repo/", "repo/.gitignore", "repo/.ignore"},
) )
} }

View File

@@ -2,6 +2,107 @@ package findr
import "core:testing" import "core:testing"
@(test)
test_glob_simple :: proc(t: ^testing.T) {
testing.expect(t, glob_match("foo", "foo", false))
testing.expect(t, glob_match("foo", "bar/foo", false))
testing.expect(t, !glob_match("foo", "foobar", false))
testing.expect(t, !glob_match("foo", "foo/bar", false))
}
@(test)
test_glob_anchored :: proc(t: ^testing.T) {
testing.expect(t, glob_match("foo", "foo", true))
testing.expect(t, !glob_match("foo", "bar/foo", true))
testing.expect(t, !glob_match("foo", "foobar", true))
}
@(test)
test_glob_star :: proc(t: ^testing.T) {
testing.expect(t, glob_match("*.log", "test.log", false))
testing.expect(t, glob_match("*.log", ".log", false))
testing.expect(t, !glob_match("*.log", "test.txt", false))
testing.expect(t, !glob_match("*.log", "dir/test", false))
}
@(test)
test_glob_question :: proc(t: ^testing.T) {
testing.expect(t, glob_match("?.log", "a.log", false))
testing.expect(t, !glob_match("?.log", "ab.log", false))
testing.expect(t, !glob_match("?.log", ".log", false))
}
@(test)
test_glob_char_class :: proc(t: ^testing.T) {
testing.expect(t, glob_match("[abc].log", "a.log", false))
testing.expect(t, glob_match("[abc].log", "b.log", false))
testing.expect(t, !glob_match("[abc].log", "d.log", false))
}
@(test)
test_glob_negated_class :: proc(t: ^testing.T) {
testing.expect(t, glob_match("[!abc].log", "d.log", false))
testing.expect(t, !glob_match("[!abc].log", "a.log", false))
}
@(test)
test_glob_dot_literal :: proc(t: ^testing.T) {
testing.expect(t, glob_match(".env", ".env", false))
testing.expect(t, glob_match(".env", "dir/.env", false))
testing.expect(t, !glob_match(".env", "env", false))
testing.expect(t, !glob_match(".env", "x.env", false))
}
@(test)
test_glob_globstar_prefix :: proc(t: ^testing.T) {
testing.expect(t, glob_match("**/foo", "foo", false))
testing.expect(t, glob_match("**/foo", "a/b/foo", false))
testing.expect(t, !glob_match("**/foo", "foobar", false))
testing.expect(t, !glob_match("**/foo", "a/foobar", false))
}
@(test)
test_glob_globstar_suffix :: proc(t: ^testing.T) {
testing.expect(t, glob_match("abc/**", "abc/x", false))
testing.expect(t, glob_match("abc/**", "abc/x/y", false))
testing.expect(t, !glob_match("abc/**", "abc", false))
testing.expect(t, !glob_match("abc/**", "abcd/x", false))
}
@(test)
test_glob_globstar_middle :: proc(t: ^testing.T) {
testing.expect(t, glob_match("foo/**/bar", "foo/bar", false))
testing.expect(t, glob_match("foo/**/bar", "foo/x/bar", false))
testing.expect(t, !glob_match("foo/**/bar", "foo/barx", false))
testing.expect(t, !glob_match("foo/**/bar", "foo/x/y/baz", false))
}
@(test)
test_glob_backslash_escape :: proc(t: ^testing.T) {
testing.expect(t, glob_match("\\!foo", "!foo", false))
testing.expect(t, !glob_match("\\!foo", "foo", false))
}
@(test)
test_glob_hash_literal :: proc(t: ^testing.T) {
testing.expect(t, glob_match("#foo", "#foo", false))
testing.expect(t, !glob_match("#foo", "foo", false))
}
@(test)
test_glob_hash_pattern :: proc(t: ^testing.T) {
testing.expect(t, glob_match("#*#", "#test#", false))
testing.expect(t, glob_match("#*#", "##", false))
testing.expect(t, !glob_match("#*#", "test", false))
testing.expect(t, !glob_match("#*#", "#test", false))
}
@(test)
test_glob_empty :: proc(t: ^testing.T) {
testing.expect(t, glob_match("", "", false))
testing.expect(t, !glob_match("", "foo", false))
}
@(test) @(test)
test_is_ignored_basic :: proc(t: ^testing.T) { test_is_ignored_basic :: proc(t: ^testing.T) {
gi := parse("*.env\n") gi := parse("*.env\n")

View File

@@ -201,3 +201,10 @@ glob_destroy :: proc(gp: ^GlobPattern) {
delete(gp.classes) delete(gp.classes)
delete(gp.tokens) delete(gp.tokens)
} }
glob_match :: proc(pattern: string, path: string, anchored: bool) -> bool {
gp := glob_compile(pattern, anchored)
result := glob_match_compiled(&gp, path)
glob_destroy(&gp)
return result
}

View File

@@ -1,111 +0,0 @@
package findr
import "core:testing"
glob_match :: proc(pattern: string, path: string, anchored: bool) -> bool {
gp := glob_compile(pattern, anchored)
result := glob_match_compiled(&gp, path)
glob_destroy(&gp)
return result
}
@(test)
test_glob_simple :: proc(t: ^testing.T) {
testing.expect(t, glob_match("foo", "foo", false))
testing.expect(t, glob_match("foo", "bar/foo", false))
testing.expect(t, !glob_match("foo", "foobar", false))
testing.expect(t, !glob_match("foo", "foo/bar", false))
}
@(test)
test_glob_anchored :: proc(t: ^testing.T) {
testing.expect(t, glob_match("foo", "foo", true))
testing.expect(t, !glob_match("foo", "bar/foo", true))
testing.expect(t, !glob_match("foo", "foobar", true))
}
@(test)
test_glob_star :: proc(t: ^testing.T) {
testing.expect(t, glob_match("*.log", "test.log", false))
testing.expect(t, glob_match("*.log", ".log", false))
testing.expect(t, !glob_match("*.log", "test.txt", false))
testing.expect(t, !glob_match("*.log", "dir/test", false))
}
@(test)
test_glob_question :: proc(t: ^testing.T) {
testing.expect(t, glob_match("?.log", "a.log", false))
testing.expect(t, !glob_match("?.log", "ab.log", false))
testing.expect(t, !glob_match("?.log", ".log", false))
}
@(test)
test_glob_char_class :: proc(t: ^testing.T) {
testing.expect(t, glob_match("[abc].log", "a.log", false))
testing.expect(t, glob_match("[abc].log", "b.log", false))
testing.expect(t, !glob_match("[abc].log", "d.log", false))
}
@(test)
test_glob_negated_class :: proc(t: ^testing.T) {
testing.expect(t, glob_match("[!abc].log", "d.log", false))
testing.expect(t, !glob_match("[!abc].log", "a.log", false))
}
@(test)
test_glob_dot_literal :: proc(t: ^testing.T) {
testing.expect(t, glob_match(".env", ".env", false))
testing.expect(t, glob_match(".env", "dir/.env", false))
testing.expect(t, !glob_match(".env", "env", false))
testing.expect(t, !glob_match(".env", "x.env", false))
}
@(test)
test_glob_globstar_prefix :: proc(t: ^testing.T) {
testing.expect(t, glob_match("**/foo", "foo", false))
testing.expect(t, glob_match("**/foo", "a/b/foo", false))
testing.expect(t, !glob_match("**/foo", "foobar", false))
testing.expect(t, !glob_match("**/foo", "a/foobar", false))
}
@(test)
test_glob_globstar_suffix :: proc(t: ^testing.T) {
testing.expect(t, glob_match("abc/**", "abc/x", false))
testing.expect(t, glob_match("abc/**", "abc/x/y", false))
testing.expect(t, !glob_match("abc/**", "abc", false))
testing.expect(t, !glob_match("abc/**", "abcd/x", false))
}
@(test)
test_glob_globstar_middle :: proc(t: ^testing.T) {
testing.expect(t, glob_match("foo/**/bar", "foo/bar", false))
testing.expect(t, glob_match("foo/**/bar", "foo/x/bar", false))
testing.expect(t, !glob_match("foo/**/bar", "foo/barx", false))
testing.expect(t, !glob_match("foo/**/bar", "foo/x/y/baz", false))
}
@(test)
test_glob_backslash_escape :: proc(t: ^testing.T) {
testing.expect(t, glob_match("\\!foo", "!foo", false))
testing.expect(t, !glob_match("\\!foo", "foo", false))
}
@(test)
test_glob_hash_literal :: proc(t: ^testing.T) {
testing.expect(t, glob_match("#foo", "#foo", false))
testing.expect(t, !glob_match("#foo", "foo", false))
}
@(test)
test_glob_hash_pattern :: proc(t: ^testing.T) {
testing.expect(t, glob_match("#*#", "#test#", false))
testing.expect(t, glob_match("#*#", "##", false))
testing.expect(t, !glob_match("#*#", "test", false))
testing.expect(t, !glob_match("#*#", "#test", false))
}
@(test)
test_glob_empty :: proc(t: ^testing.T) {
testing.expect(t, glob_match("", "", false))
testing.expect(t, !glob_match("", "foo", false))
}

64
findr/prof.odin Normal file
View File

@@ -0,0 +1,64 @@
package findr
import "base:runtime"
import "core:prof/spall"
import "core:sync"
SPALL_ENABLED :: #config(SPALL_ENABLED, ODIN_DEBUG)
spall_ctx: spall.Context
@(thread_local) spall_buffer: spall.Buffer
@(thread_local) spall_backing: []u8
@(instrumentation_enter)
spall_enter :: proc "contextless" (
proc_address, call_site_return_address: rawptr,
loc: runtime.Source_Code_Location,
) {
when SPALL_ENABLED {
spall._buffer_begin(&spall_ctx, &spall_buffer, "", "", loc)
}
}
@(instrumentation_exit)
spall_exit :: proc "contextless" (
proc_address, call_site_return_address: rawptr,
loc: runtime.Source_Code_Location,
) {
when SPALL_ENABLED {
spall._buffer_end(&spall_ctx, &spall_buffer)
}
}
prof_init :: proc() {
when SPALL_ENABLED {
spall_ctx = spall.context_create_with_scale("findr.spall", false, 1.0)
spall_backing = make([]u8, spall.BUFFER_DEFAULT_SIZE)
spall_buffer = spall.buffer_create(spall_backing, u32(sync.current_thread_id()))
spall._buffer_name_thread(&spall_ctx, &spall_buffer, "main")
}
}
prof_destroy :: proc() {
when SPALL_ENABLED {
spall.buffer_destroy(&spall_ctx, &spall_buffer)
delete(spall_backing)
spall.context_destroy(&spall_ctx)
}
}
prof_thread_init :: proc(name: string) {
when SPALL_ENABLED {
spall_backing = make([]u8, spall.BUFFER_DEFAULT_SIZE)
spall_buffer = spall.buffer_create(spall_backing, u32(sync.current_thread_id()))
spall._buffer_name_thread(&spall_ctx, &spall_buffer, name)
}
}
prof_thread_destroy :: proc() {
when SPALL_ENABLED {
spall.buffer_destroy(&spall_ctx, &spall_buffer)
delete(spall_backing)
}
}

13
findr/profile.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -euo pipefail
DIR="$(cd "$(dirname "$0")" && pwd)"
echo "Building findr-prof..."
odin build "$DIR" -debug -out:"$DIR/findr-prof"
echo "Running profiler..."
"$DIR/findr-prof" -E .git -E .jj -HI ~/git.verticalaxion.com
echo
echo "Spall trace: $DIR/findr.spall"

View File

@@ -1,128 +0,0 @@
package findr
import "core:strings"
import "core:sync"
import "core:sys/linux"
import "core:thread"
RepoPool :: struct {
queue: [dynamic]string,
queue_mutex: sync.Mutex,
queue_sema: sync.Atomic_Sema,
results: ^[dynamic]string,
results_lock: sync.Mutex,
active: i64,
done: sync.One_Shot_Event,
threads: []^thread.Thread,
}
find_repos :: proc(roots: []string, results: ^[dynamic]string, thread_count: int) {
if len(roots) == 0 do return
pool := new(RepoPool)
pool.queue = make([dynamic]string)
pool.results = results
pool.active = i64(len(roots))
pool.threads = make([]^thread.Thread, thread_count)
for root in roots {
root_clone, _ := strings.clone(root)
append(&pool.queue, root_clone)
sync.atomic_sema_post(&pool.queue_sema)
}
for i in 0 ..< thread_count {
t := thread.create(repo_worker)
t.data = rawptr(pool)
t.init_context = context
thread.start(t)
pool.threads[i] = t
}
sync.one_shot_event_wait(&pool.done)
for _ in 0 ..< thread_count {
sync.atomic_sema_post(&pool.queue_sema)
}
for t in pool.threads {
thread.destroy(t)
}
delete(pool.threads)
for path in pool.queue {
delete(path)
}
delete(pool.queue)
free(pool)
}
repo_worker :: proc(t: ^thread.Thread) {
pool := cast(^RepoPool)t.data
for {
sync.atomic_sema_wait(&pool.queue_sema)
sync.mutex_lock(&pool.queue_mutex)
if len(pool.queue) == 0 {
sync.mutex_unlock(&pool.queue_mutex)
if sync.atomic_load_explicit(&pool.active, .Acquire) == 0 {
sync.one_shot_event_signal(&pool.done)
}
break
}
last := len(pool.queue) - 1
dir_path := pool.queue[last]
ordered_remove(&pool.queue, last)
sync.mutex_unlock(&pool.queue_mutex)
process_repo_dir(pool, dir_path)
delete(dir_path)
old := sync.atomic_sub_explicit(&pool.active, 1, .Release)
if old == 1 {
sync.one_shot_event_signal(&pool.done)
}
}
}
process_repo_dir :: proc(pool: ^RepoPool, dir_path: string) {
cpath := strings.clone_to_cstring(dir_path)
if cpath == nil do return
defer delete(cpath)
fd, open_err := linux.open(cpath, {.DIRECTORY, .CLOEXEC})
if open_err != .NONE do return
defer linux.close(fd)
if has_git_dir(fd) {
cloned, _ := strings.clone(dir_path)
sync.mutex_lock(&pool.results_lock)
append(pool.results, cloned)
sync.mutex_unlock(&pool.results_lock)
}
buf: [32 * 1024]u8
for {
n, errno := linux.getdents(fd, buf[:])
if n <= 0 || errno != .NONE do break
offs := 0
for d in linux.dirent_iterate_buf(buf[:n], &offs) {
name := linux.dirent_name(d)
if name == "." || name == ".." do continue
if name == ".git" do continue
if d.type == .DIR {
child_path := join_path(dir_path, name)
sync.atomic_add_explicit(&pool.active, 1, .Relaxed)
sync.mutex_lock(&pool.queue_mutex)
append(&pool.queue, child_path)
sync.mutex_unlock(&pool.queue_mutex)
sync.atomic_sema_post(&pool.queue_sema)
}
}
}
}

View File

@@ -4,16 +4,26 @@ import "core:fmt"
import "core:os" import "core:os"
import "core:strings" import "core:strings"
import "core:sync" import "core:sync"
import "core:sync/chan"
import "core:sys/linux" import "core:sys/linux"
import "core:text/regex" import "core:text/regex"
import "core:thread" import "core:thread"
OUTPUT_BUF_SIZE :: 64 * 1024 IgnoreMode :: enum {
Respected, // skip gitignored, prune ignored dirs (fd -H default)
All, // ignore .gitignore entirely, descend everywhere (fd -HI)
Ignored, // emit ONLY gitignored files, prune ignored dirs (findr original)
}
WalkOptions :: struct { WalkOptions :: struct {
pattern: string, // regex on basename; "" = match all pattern: string, // regex on basename; "" = match all
excludes: []string, // glob patterns to skip entirely excludes: []string, // glob patterns to skip entirely (fd -E)
include_hidden: bool, // true = include dotfiles (fd -H)
ignore_mode: IgnoreMode,
}
RawEntry :: struct {
name: string,
type: linux.Dirent_Type,
} }
GIContext :: struct { GIContext :: struct {
@@ -27,17 +37,17 @@ WorkItem :: struct {
rel: string, // relative path from repo root ("" = root) rel: string, // relative path from repo root ("" = root)
gi_ctx: ^GIContext, // gitignore chain (nil = outside any repo) gi_ctx: ^GIContext, // gitignore chain (nil = outside any repo)
in_repo: bool, // true if inside a git repo in_repo: bool, // true if inside a git repo
in_ignored: bool, // true if inside a gitignored directory
} }
WalkerPool :: struct { WalkerPool :: struct {
queue: [dynamic]WorkItem, queue: [dynamic]WorkItem,
queue_mutex: sync.Mutex, queue_mutex: sync.Mutex,
queue_sema: sync.Atomic_Sema, queue_sema: sync.Atomic_Sema,
result_chan: chan.Chan([]u8), results: ^[dynamic]string,
results_mutex: sync.Mutex,
active: i64, active: i64,
done: sync.One_Shot_Event, done: sync.One_Shot_Event,
threads: []^thread.Thread, threads: [dynamic]^thread.Thread,
opts: WalkOptions, opts: WalkOptions,
pattern_re: regex.Regular_Expression, pattern_re: regex.Regular_Expression,
has_pattern: bool, has_pattern: bool,
@@ -46,51 +56,14 @@ WalkerPool :: struct {
contexts_lock: sync.Mutex, contexts_lock: sync.Mutex,
} }
Collector_Data :: struct {
ch: chan.Chan([]u8),
results: ^[dynamic]string,
}
collect_worker :: proc(t: ^thread.Thread) {
data := cast(^Collector_Data)t.data
for {
batch, ok := chan.recv(data.ch)
if !ok do break
start := 0
for i in 0 ..< len(batch) {
if batch[i] == '\n' {
if i > start {
s, _ := strings.clone(string(batch[start:i]))
append(data.results, s)
}
start = i + 1
}
}
delete(batch)
}
}
walk :: proc(roots: []string, results: ^[dynamic]string, opts: WalkOptions, thread_count: int) { walk :: proc(roots: []string, results: ^[dynamic]string, opts: WalkOptions, thread_count: int) {
if len(roots) == 0 do return if len(roots) == 0 do return
ch, _ := chan.create(chan.Chan([]u8), max(2 * thread_count, 2), context.allocator)
defer chan.destroy(ch)
data := new(Collector_Data)
data.ch = ch
data.results = results
defer free(data)
collector := thread.create(collect_worker)
collector.data = rawptr(data)
collector.init_context = context
thread.start(collector)
pool := new(WalkerPool) pool := new(WalkerPool)
pool.queue = make([dynamic]WorkItem) pool.queue = make([dynamic]WorkItem)
pool.result_chan = ch pool.results = results
pool.active = i64(len(roots)) pool.active = i64(len(roots))
pool.threads = make([]^thread.Thread, thread_count) pool.threads = make([dynamic]^thread.Thread)
pool.all_contexts = make([dynamic]^GIContext) pool.all_contexts = make([dynamic]^GIContext)
pool.opts = opts pool.opts = opts
pool.exclude_gi = nil pool.exclude_gi = nil
@@ -127,7 +100,7 @@ walk :: proc(roots: []string, results: ^[dynamic]string, opts: WalkOptions, thre
t.data = rawptr(pool) t.data = rawptr(pool)
t.init_context = context t.init_context = context
thread.start(t) thread.start(t)
pool.threads[i] = t append(&pool.threads, t)
} }
sync.one_shot_event_wait(&pool.done) sync.one_shot_event_wait(&pool.done)
@@ -167,47 +140,16 @@ walk :: proc(roots: []string, results: ^[dynamic]string, opts: WalkOptions, thre
} }
free(pool) free(pool)
chan.close(ch)
thread.join(collector)
thread.destroy(collector)
}
flush_buf :: proc(ch: chan.Chan([]u8), local: ^[dynamic]u8) {
if len(local) == 0 do return
batch := local[:]
local^ = make([dynamic]u8, 0, OUTPUT_BUF_SIZE)
chan.send(ch, batch)
}
append_path :: proc(buf: ^[dynamic]u8, parent, name: string, trailing_slash: bool) {
need_sep := len(parent) > 0 && parent[len(parent) - 1] != '/'
size := len(parent) + len(name) + 1
if need_sep do size += 1
if trailing_slash do size += 1
old_len := len(buf)
reserve(buf, old_len + size)
resize(buf, old_len + size)
pos := old_len
pos += copy(buf[pos:], parent)
if need_sep {buf[pos] = '/'; pos += 1}
pos += copy(buf[pos:], name)
if trailing_slash {buf[pos] = '/'; pos += 1}
buf[pos] = '\n'
} }
walk_worker :: proc(t: ^thread.Thread) { walk_worker :: proc(t: ^thread.Thread) {
pool := cast(^WalkerPool)t.data pool := cast(^WalkerPool)t.data
local_buf := make([dynamic]u8, 0, OUTPUT_BUF_SIZE) prof_thread_init("walker")
defer { defer prof_thread_destroy()
if len(local_buf) > 0 {
flush_buf(pool.result_chan, &local_buf) local_results := make([dynamic]string, 0, 256)
} defer delete(local_results)
delete(local_buf)
}
for { for {
sync.atomic_sema_wait(&pool.queue_sema) sync.atomic_sema_wait(&pool.queue_sema)
@@ -225,36 +167,30 @@ walk_worker :: proc(t: ^thread.Thread) {
ordered_remove(&pool.queue, last) ordered_remove(&pool.queue, last)
sync.mutex_unlock(&pool.queue_mutex) sync.mutex_unlock(&pool.queue_mutex)
process_dir(pool, item, &local_buf) process_dir(pool, item, &local_results)
delete(item.path) delete(item.path)
if len(item.rel) > 0 {delete(item.rel)} if len(item.rel) > 0 {delete(item.rel)}
if len(local_buf) >= OUTPUT_BUF_SIZE {
flush_buf(pool.result_chan, &local_buf)
}
old := sync.atomic_sub_explicit(&pool.active, 1, .Release) old := sync.atomic_sub_explicit(&pool.active, 1, .Release)
if old == 1 { if old == 1 {
sync.one_shot_event_signal(&pool.done) sync.one_shot_event_signal(&pool.done)
} }
} }
if len(local_results) > 0 {
sync.mutex_lock(&pool.results_mutex)
for res in local_results {
append(pool.results, res)
}
sync.mutex_unlock(&pool.results_mutex)
}
} }
process_dir :: proc(pool: ^WalkerPool, item: WorkItem, local_buf: ^[dynamic]u8) { process_dir :: proc(pool: ^WalkerPool, item: WorkItem, local_results: ^[dynamic]string) {
dir_path := item.path dir_path := item.path
cpath := strings.clone_to_cstring(dir_path)
if cpath == nil do return
defer delete(cpath)
fd, open_err := linux.open(cpath, {.DIRECTORY, .CLOEXEC})
if open_err != .NONE do return
defer linux.close(fd)
has_git := false has_git := false
if !item.in_ignored { entries := read_dir_entries(dir_path, &has_git)
has_git = has_git_dir(fd) defer free_entries(&entries)
}
gi_ctx := item.gi_ctx gi_ctx := item.gi_ctx
rel := item.rel rel := item.rel
@@ -266,10 +202,7 @@ process_dir :: proc(pool: ^WalkerPool, item: WorkItem, local_buf: ^[dynamic]u8)
child_in_repo := has_git || item.in_repo child_in_repo := has_git || item.in_repo
gi: ^Gitignore = nil gi := load_ignore_patterns(dir_path, child_in_repo)
if !item.in_ignored {
gi = load_ignore_patterns(dir_path, child_in_repo)
}
if gi != nil { if gi != nil {
new_ctx := new(GIContext) new_ctx := new(GIContext)
new_ctx.gi = gi new_ctx.gi = gi
@@ -285,41 +218,44 @@ process_dir :: proc(pool: ^WalkerPool, item: WorkItem, local_buf: ^[dynamic]u8)
gi_ctx = new_ctx gi_ctx = new_ctx
} }
buf: [32 * 1024]u8
rel_buf: [4096]u8 rel_buf: [4096]u8
for { for entry in entries {
n, errno := linux.getdents(fd, buf[:]) if entry.name == ".git" do continue
if n <= 0 || errno != .NONE do break
offs := 0 is_dir := entry.type == .DIR
for d in linux.dirent_iterate_buf(buf[:n], &offs) { is_nondir := entry.type != .DIR
name := linux.dirent_name(d)
if name == "." || name == ".." do continue
if name == ".git" do continue
is_dir := d.type == .DIR if pool.exclude_gi != nil && is_ignored(pool.exclude_gi, entry.name, is_dir) {
is_nondir := d.type != .DIR
if pool.exclude_gi != nil && is_ignored(pool.exclude_gi, name, is_dir) {
continue continue
} }
entry_rel := build_rel(rel_buf[:], rel, name) if !pool.opts.include_hidden && len(entry.name) > 0 && entry.name[0] == '.' {
continue
}
entry_rel := build_rel(rel_buf[:], rel, entry.name)
ignored := false ignored := false
if item.in_ignored { if gi_ctx != nil && pool.opts.ignore_mode != .All {
ignored = true
} else if gi_ctx != nil {
ignored = check_chain(gi_ctx, entry_rel, is_dir) ignored = check_chain(gi_ctx, entry_rel, is_dir)
} }
if is_dir { should_emit: bool
if ignored && matches_pattern(pool, name) { if ignored {
append_path(local_buf, dir_path, name, true) should_emit = pool.opts.ignore_mode == .Ignored
} else {
should_emit = pool.opts.ignore_mode != .Ignored
} }
if is_dir {
if should_emit && matches_pattern(pool, entry.name) {
dir_path_out := join_path_dir(dir_path, entry.name)
append(local_results, dir_path_out)
}
if !ignored {
child_rel, _ := strings.clone(entry_rel) child_rel, _ := strings.clone(entry_rel)
child_path := join_path(dir_path, name) child_path := join_path(dir_path, entry.name)
push_work( push_work(
pool, pool,
WorkItem { WorkItem {
@@ -327,13 +263,13 @@ process_dir :: proc(pool: ^WalkerPool, item: WorkItem, local_buf: ^[dynamic]u8)
rel = child_rel, rel = child_rel,
gi_ctx = gi_ctx, gi_ctx = gi_ctx,
in_repo = child_in_repo, in_repo = child_in_repo,
in_ignored = ignored,
}, },
) )
} else if is_nondir {
if ignored && matches_pattern(pool, name) {
append_path(local_buf, dir_path, name, false)
} }
} else if is_nondir {
if should_emit && matches_pattern(pool, entry.name) {
full_path := join_path(dir_path, entry.name)
append(local_results, full_path)
} }
} }
} }
@@ -391,13 +327,46 @@ push_work :: proc(pool: ^WalkerPool, item: WorkItem) {
sync.atomic_sema_post(&pool.queue_sema) sync.atomic_sema_post(&pool.queue_sema)
} }
has_git_dir :: proc(fd: linux.Fd) -> bool { read_dir_entries :: proc(dir_path: string, has_git: ^bool) -> [dynamic]RawEntry {
git_fd, err := linux.openat(fd, ".git", {.DIRECTORY, .CLOEXEC}) entries := make([dynamic]RawEntry)
if err == .NONE {
linux.close(git_fd) cpath := strings.clone_to_cstring(dir_path)
return true if cpath == nil do return entries
fd, err := linux.open(cpath, {.DIRECTORY, .CLOEXEC})
delete(cpath)
if err != .NONE do return entries
buf: [8192]u8
has_git^ = false
for {
n, errno := linux.getdents(fd, buf[:])
if n <= 0 || errno != .NONE do break
offs := 0
for d in linux.dirent_iterate_buf(buf[:n], &offs) {
name := linux.dirent_name(d)
if name == "." || name == ".." do continue
if name == ".git" && d.type == .DIR {
has_git^ = true
} }
return false
cloned := strings.clone(name)
append(&entries, RawEntry{name = cloned, type = d.type})
}
}
linux.close(fd)
return entries
}
free_entries :: proc(entries: ^[dynamic]RawEntry) {
for &entry in entries {
delete(entry.name)
}
delete(entries^)
} }
load_ignore_patterns :: proc(dir_path: string, in_repo: bool) -> ^Gitignore { load_ignore_patterns :: proc(dir_path: string, in_repo: bool) -> ^Gitignore {
@@ -447,3 +416,19 @@ join_path :: proc(parent, child: string) -> string {
copy(buf[pos:], child) copy(buf[pos:], child)
return string(buf) return string(buf)
} }
join_path_dir :: proc(parent, child: string) -> string {
need_sep := len(parent) == 0 || parent[len(parent) - 1] != '/'
total := len(parent) + len(child) + 1 // +1 for trailing '/'
if need_sep do total += 1
buf := make([]u8, total, context.allocator)
pos := copy(buf, parent)
if need_sep {
buf[pos] = '/'
pos += 1
}
pos += copy(buf[pos:], child)
buf[pos] = '/'
return string(buf)
}

View File

@@ -95,6 +95,7 @@
devShells.default = pkgs.mkShell { devShells.default = pkgs.mkShell {
buildInputs = with pkgs; [ buildInputs = with pkgs; [
fd
nushell nushell
libsodium libsodium

View File

@@ -18,6 +18,8 @@ main :: proc() {
cmd_init(&cmd) cmd_init(&cmd)
case "version": case "version":
cmd_version(&cmd) cmd_version(&cmd)
case "deps":
cmd_deps(&cmd)
case "list": case "list":
cmd_list(&cmd) cmd_list(&cmd)
case "backup", "add": case "backup", "add":

127
scan.odin
View File

@@ -1,21 +1,137 @@
package main package main
import "core:fmt"
import "core:os" import "core:os"
import "core:strings"
import "core:sync"
import "core:terminal"
import "findr" fd_counter: sync.Atomic_Mutex
fd_seq: int
// Caller is responsible for freeing paths // Caller is responsible for freeing paths
scan_path :: proc(search_path: string, cfg: Config) -> (paths: [dynamic]string, ok: bool) { scan_path :: proc(search_path: string, cfg: Config) -> (paths: [dynamic]string, ok: bool) {
opts := findr.WalkOptions { if terminal.is_terminal(os.stdout) {
pattern = cfg.ScanConfig.Matcher, fmt.printf("Searching for all files in \"%s\"...\n", search_path)
excludes = cfg.ScanConfig.Exclude[:],
} }
findr.walk({search_path}, &paths, opts, os.get_processor_core_count()) all_files, all_ok := run_fd(build_fd_args(search_path, cfg, true))
if !all_ok {
return
}
if terminal.is_terminal(os.stdout) {
fmt.printf("Search for unignored fies in \"%s\"...\n", search_path)
}
unignored_files, unignored_ok := run_fd(build_fd_args(search_path, cfg, false))
if !unignored_ok {
return
}
unignored_set := make(map[string]bool, len(unignored_files), context.temp_allocator)
for file in unignored_files {
unignored_set[file] = true
}
for file in all_files {
if !(file in unignored_set) {
append(&paths, file)
}
}
ok = true ok = true
return return
} }
@(private = "file")
build_fd_args :: proc(search_path: string, cfg: Config, include_ignored: bool) -> []string {
args_len := 3 + 2 * len(cfg.ScanConfig.Exclude) + 2
args := make([dynamic]string, 0, args_len, context.temp_allocator)
append(&args, "fd")
append(&args, "-a")
append(&args, cfg.ScanConfig.Matcher)
for exclude in cfg.ScanConfig.Exclude {
append(&args, "-E")
append(&args, exclude)
}
if include_ignored {
append(&args, "-HI")
} else {
append(&args, "-H")
}
append(&args, search_path)
return args[:]
}
run_fd :: proc(args: []string) -> (lines: []string, ok: bool) {
tmp_path := next_fd_tmp_path()
tmp_file, tmp_err := os.open(tmp_path, os.O_CREATE | os.O_WRONLY | os.O_TRUNC)
if tmp_err != nil {
// TODO: Log a message here
return
}
desc := os.Process_Desc {
command = args,
stdout = tmp_file,
stderr = nil,
}
p, start_err := os.process_start(desc)
os.close(tmp_file)
if start_err != nil {
os.remove(tmp_path)
return
}
state, wait_err := os.process_wait(p)
if wait_err != nil || state.exit_code != 0 {
os.remove(tmp_path)
return
}
data, read_err := os.read_entire_file_from_path(tmp_path, context.temp_allocator)
os.remove(tmp_path)
if read_err != nil {
return
}
output := string(data)
output = strings.trim_space(output)
if len(output) == 0 {
ok = true
return
}
raw_lines := strings.split(output, "\n", context.temp_allocator)
result := make([dynamic]string, 0, len(raw_lines), context.temp_allocator)
for line in raw_lines {
trimmed := strings.trim_space(line)
if len(trimmed) > 0 {
append(&result, trimmed)
}
}
return result[:], true
}
@(private = "file")
next_fd_tmp_path :: proc() -> string {
sync.atomic_mutex_lock(&fd_counter)
n := fd_seq
fd_seq += 1
sync.atomic_mutex_unlock(&fd_counter)
return fmt.tprintf("/tmp/envr-fd-%d-%d", os.get_pid(), n)
}
cant_scan :: proc(feats: AvailableFeatures) -> bool {
return Feature.Fd not_in feats
}
find_unbacked :: proc(local_files: []string, db_files: []EnvFile) -> []string { find_unbacked :: proc(local_files: []string, db_files: []EnvFile) -> []string {
// Lives until the end of the function
backed_set := make(map[string]bool, len(db_files), context.temp_allocator) backed_set := make(map[string]bool, len(db_files), context.temp_allocator)
for file in db_files { for file in db_files {
backed_set[file.Path] = true backed_set[file.Path] = true
@@ -29,3 +145,4 @@ find_unbacked :: proc(local_files: []string, db_files: []EnvFile) -> []string {
} }
return unbacked[:] return unbacked[:]
} }

View File

@@ -3,10 +3,14 @@ package main
import "core:fmt" import "core:fmt"
import "core:os" import "core:os"
import "core:path/filepath" import "core:path/filepath"
import "core:strings"
import "core:testing" import "core:testing"
@(test) @(test)
test_scan_path_finds_gitignored_env_files :: proc(t: ^testing.T) { test_scan_path_finds_gitignored_env_files :: proc(t: ^testing.T) {
feats := check_features()
testing.expect(t, cant_scan(feats) == false)
base := fmt.tprintf("/tmp/envr-scan-test-%d", os.get_pid()) base := fmt.tprintf("/tmp/envr-scan-test-%d", os.get_pid())
os.mkdir_all(base) os.mkdir_all(base)
defer os.remove_all(base) defer os.remove_all(base)
@@ -38,12 +42,7 @@ test_scan_path_finds_gitignored_env_files :: proc(t: ^testing.T) {
} }
results, ok := scan_path(base, cfg) results, ok := scan_path(base, cfg)
defer { defer delete(results)
for path in results {
delete(path)
}
delete(results)
}
testing.expect(t, ok, "scan_path should succeed") testing.expect(t, ok, "scan_path should succeed")
found_env := false found_env := false
@@ -70,6 +69,9 @@ test_scan_path_finds_gitignored_env_files :: proc(t: ^testing.T) {
@(test) @(test)
test_scan_path_empty_dir :: proc(t: ^testing.T) { test_scan_path_empty_dir :: proc(t: ^testing.T) {
feats := check_features()
testing.expect(t, cant_scan(feats) == false)
base := fmt.tprintf("/tmp/envr-scan-empty-%d", os.get_pid()) base := fmt.tprintf("/tmp/envr-scan-empty-%d", os.get_pid())
os.mkdir_all(base) os.mkdir_all(base)
defer os.remove_all(base) defer os.remove_all(base)
@@ -83,3 +85,12 @@ test_scan_path_empty_dir :: proc(t: ^testing.T) {
testing.expect(t, ok, "scan_path should succeed") testing.expect(t, ok, "scan_path should succeed")
testing.expect(t, len(results) == 0, fmt.tprintf("expected 0 results, got %d", len(results))) testing.expect(t, len(results) == 0, fmt.tprintf("expected 0 results, got %d", len(results)))
} }
@(test)
test_scan_meets_expectations :: proc(t: ^testing.T) {
testing.expect(t, cant_scan({}), "no features should mean can't scan")
testing.expect(t, cant_scan({.Git}), "Git alone should mean can't scan")
testing.expect(t, !cant_scan({.Fd}), "having Fd should mean can scan")
testing.expect(t, !cant_scan({.Fd, .Git}), "both Fd and Git should mean can scan")
}