diff --git a/PLAN.md b/PLAN.md index 28bdb90..5892cb7 100644 --- a/PLAN.md +++ b/PLAN.md @@ -1,99 +1,115 @@ -# findr — Gitignored File Finder +# findr — Native Odin File Finder (fd Replacement) ## Overview -findr is a native Odin tool that finds **gitignored files** within git repositories. It replaces envr's current approach of running `fd` twice (all files vs. unignored files) and diffing the results. - -**Simplified scope:** findr does one thing — walks directories, finds git repos, reads each repo's `.gitignore`, and prints every gitignored file. No flags, no filtering, no pattern matching. envr handles result filtering itself. - -## Current fd Usage in envr (being replaced) - -1. **`scan.odin:13-43`** (`scan_path`) — runs `fd` twice per search path: - - Run 1: `fd -a [-E ]... -HI ` → all files including gitignored - - Run 2: `fd -a [-E ]... -H ` → hidden but NOT gitignored - - Diff = gitignored files only -2. Both go through `run_fd` (`scan.odin:68-118`), which spawns a subprocess and captures output via temp files. - -After findr integration, `scan_path` calls `findr.walk(path)` directly — no subprocess, no double-run, no diff. +findr is a native Odin file finder that replaces `fd` in envr. It supports three ignore modes for A/B benchmarking against specific fd commands, plus a unique "emit ONLY gitignored files" mode that gives envr a single-pass advantage over fd's double-run-and-diff approach. ## Directory Structure ``` findr/ - findr.odin # main + CLI (positional dir args only) - walker.odin # recursive directory walker using core:sys/linux getdents + findr.odin # main + CLI (hand-rolled arg parsing) + walker.odin # parallel directory walker (getdents + thread pool) gitignore.odin # .gitignore parsing + glob→regex transpilation + matching test_env.odin # test harness: temp dir, mock filesystem, assert helpers - findr_test.odin # integration tests (10 tests) + findr_test.odin # integration tests gitignore_test.odin # transpilation + matching unit tests (22 tests) ``` -## Decisions - -- **Scope**: findr prints ALL gitignored files. No regex filtering, no exclude patterns, no type filters. envr post-processes the output. -- **Gitignore matching**: Transpile gitignore glob patterns to regex, then use `core:text/regex`. No dedicated glob matcher. -- **Stat avoidance**: Use `core:sys/linux` getdents directly — read `dirent.type` from the kernel, never call stat. -- **Architecture**: Separate directory with its own `main`. Core logic (`walk` proc + `gitignore` package) designed to be importable into envr later. - ## CLI Interface ``` -findr [dir1] [dir2] ... +findr [-I] [--ignored] [--no-hidden] [-E ]... [pattern] [path]... ``` -No flags. Defaults to `.` if no dirs given. Prints absolute or relative paths (as given) to stdout, one per line. +Defaults: `include_hidden=true, ignore_mode=.Respected` (matches fd's `-H` behavior). + +| fd command | findr equivalent | +|---|---| +| `fd -a \.env -E ... -HI ~/` | `findr -I -E ... \.env ~/` | +| `fd -a \.env -E ... -H ~/` | `findr -E ... \.env ~/` | +| `fd . -H ~/` | `findr ~/` | +| `fd . -HI ~/` | `findr -I ~/` | +| `fd . ~/` (no flags) | `findr --no-hidden ~/` | +| *(findr original)* | `findr --ignored ~/` | ## Build ```bash odin build findr -o:speed -out:findr/findr +odin test findr ``` -## How It Works +## Architecture -``` -walk(dir): - entries = getdents(dir) # via core:sys/linux, zero stat calls - if entries contains ".git/": - gi = parse(.gitignore) # if present - for entry in entries: - if entry is gitignored file: - emit entry path - if entry is dir (not ignored): - walk(entry) # recurse to find nested repos - else: - for entry in entries: - if entry is dir: - walk(entry) # descend looking for repos +### Two Orthogonal Axes (matching fd's semantics) + +1. **Hidden files** (`.` prefix): `include_hidden=true` includes them, `false` excludes them +2. **Gitignore**: three modes (see `IgnoreMode` below) + +### Types + +```odin +IgnoreMode :: enum { + Respected, // skip gitignored, prune ignored dirs (fd -H default) + All, // ignore .gitignore entirely, descend everywhere (fd -HI) + Ignored, // emit ONLY gitignored files, prune ignored dirs (findr original) +} + +WalkOptions :: struct { + pattern: string, // regex on basename; "" = match all + excludes: []string, // glob patterns to skip entirely (fd -E) + include_hidden: bool, // true = include dotfiles (fd -H) + ignore_mode: IgnoreMode, +} ``` -Key behaviors: -- **Nested repos**: When a repo is found, subdirectories are still traversed to find nested repos. Gitignored directories are pruned (not descended into). -- **Flat gitignore**: Only the root `.gitignore` is read. `.gitignore` files in subdirectories of a repo are ignored. -- **Non-repo dirs**: Traversed recursively to find repos. No gitignore rules apply. +### process_dir Filtering Order Per Entry -## Performance Architecture +Each directory traversal carries a `WorkItem` with the absolute path, a relative path from repo root, and a `^GIContext` linked list of gitignore contexts (one per ancestor directory with a `.gitignore`). -### Implemented +1. Skip `.git` directory +2. **Load nested `.gitignore`**: If this directory has a `.gitignore`, push a new `GIContext` onto the chain (tracked in `pool.all_contexts` for cleanup) +3. **Per entry**: + - Skip non-regular files (symlinks, sockets, etc. — parity with `fd -t f`) + - **Excludes**: if entry matches any exclude glob → skip entirely + - **Hidden**: if `!include_hidden && name[0] == '.'` → skip entirely + - **Gitignore status**: check `GIContext` chain deepest-to-root via `check_chain`, passing the **relative path** (not basename). First match wins (correct gitignore precedence). Nested negation overrides parent rules. + - **Mode-based decision**: -- **Stat avoidance via `dirent.type`** — Uses `core:sys/linux` getdents directly, bypassing `core:os` which calls `openat` + `fstat` per entry. File type comes free from the directory entry. -- **Prune ignored directories** — When a directory matches a gitignore pattern, it is not descended into. Skips potentially thousands of readdir calls. -- **Parallel traversal** — 8-worker thread pool with shared LIFO queue and futex-based semaphore signaling. 5.4x speedup over serial on home directory. +| Mode | gitignored file | gitignored dir | normal file | normal dir | +|---|---|---|---|---| +| `.All` | emit if pattern matches | descend | emit if pattern matches | descend | +| `.Respected` | skip | prune | emit if pattern matches | descend | +| `.Ignored` | emit if pattern matches | prune | skip | descend | -### Future (if needed) +**Nested repos**: When a directory contains `.git/`, the gitignore context chain is reset (new repo root). The relative path resets to `""`. Nested repos are always traversed to find deeper repos. -- BufWriter on stdout for large result sets -- Arena allocators for path strings +### Performance Architecture + +- **Stat avoidance via `dirent.type`** — Uses `core:sys/linux` getdents directly, bypassing `core:os` which calls `openat` + `fstat` per entry. +- **Prune ignored directories** — When a directory matches a gitignore/exclude pattern, it is not descended into. +- **Parallel traversal** — Worker thread pool with shared LIFO queue and futex-based semaphore signaling. 5.4x speedup over serial on home directory. + +## Decisions + +- **Gitignore matching**: Transpile gitignore glob patterns to regex, then use `core:text/regex`. No dedicated glob matcher. +- **Pattern matching**: Pattern is a regex (same as fd), matched against basename via `regex.match` (unanchored search). +- **Excludes**: Glob patterns compiled via the same gitignore transpiler (`parse()`). Reuses tested transpilation logic. +- **Nested gitignore**: Every `.gitignore` file within a repo is read, not just the root. Each directory's rules are scoped relative to that directory's path. Negation in a child overrides parent rules (correct gitignore precedence). +- **Stat avoidance**: Use `core:sys/linux` getdents directly — read `dirent.type` from the kernel, never call stat. `DT_UNKNOWN` treated as regular file (correct for ext4/tmpfs; may miss dirs on XFS/BTRFS/FUSE — Phase 7 concern). ## Testing Strategy - **In-process integration tests** — Tests call `walk()` directly (not via subprocess), build mock filesystems in temp dirs, and compare sorted output. - **Unit tests** — Pure-function tests for glob→regex transpilation and gitignore matching. - **Output sorting for determinism** — Always sort output lines before comparison. -- **Memory tracking** — Odin's test runner reports leaks automatically. All 32 tests pass with zero leaks. +- **Memory tracking** — Odin's test runner reports leaks automatically. ### Test Coverage (findr_test.odin) +**`.Ignored` mode (original findr behavior):** + | Test | What it covers | |---|---| | `test_basic_gitignored` | Repo with `.gitignore`, gitignored files emitted, normal files skipped | @@ -102,14 +118,35 @@ Key behaviors: | `test_dir_only_pattern` | `node_modules/` pattern doesn't emit file results | | `test_multiple_repos` | Multiple repos in one tree, each with its own `.gitignore` | | `test_nested_repos` | Repo inside a repo, both scanned independently | -| `test_gitignore_in_subdir_ignored` | Subdirectory `.gitignore` files are not read | | `test_no_gitignore_file` | Repo with `.git/` but no `.gitignore` produces nothing | | `test_empty_gitignore` | Comments and blank lines only → no results | | `test_multiple_search_dirs` | Multiple top-level search dirs in one call | +| `test_nested_gitignore_read` | Nested `.gitignore` rules applied (subdir patterns work) | +| `test_nested_gitignore_negation` | Nested negation overrides parent pattern | +| `test_multisegment_pattern` | `build/output.txt` matches relative path, not just basename | -### Gitignore Unit Tests (gitignore_test.odin) +**`.All` mode (fd -HI parity):** -22 tests covering: simple/anchored patterns, `*`, `?`, `[abc]`, `[!abc]`, dot escaping, globstar variants, backslash escapes, empty patterns, basic matching, negation, dir-only, comments, blank lines, last-match-wins, env patterns. +| Test | What it covers | +|---|---| +| `test_all_mode_emits_all_files` | All files emitted regardless of gitignore | +| `test_all_mode_descends_everywhere` | Gitignored dirs still descended | + +**`.Respected` mode (fd -H parity):** + +| Test | What it covers | +|---|---| +| `test_respected_mode_skips_gitignored` | Gitignored files skipped | +| `test_respected_mode_prunes_ignored_dirs` | Gitignored dirs pruned | +| `test_nested_gitignore_respected_mode` | Nested negation respected in `.Respected` mode | + +**Filters:** + +| Test | What it covers | +|---|---| +| `test_excludes_prune_dirs` | Excluded dirs not descended | +| `test_pattern_filters_results` | Only pattern-matching files emitted | +| `test_no_hidden_skips_dotfiles` | Hidden files skipped when include_hidden=false | ## Glob→Regex Transpilation Rules @@ -130,108 +167,152 @@ Key behaviors: ### Phase 1: Gitignore Transpiler + Tests ✅ -**Goal:** Isolated, fully-tested glob→regex transpiler. - -**Result:** 22 tests, all passing, zero leaks. - ---- +22 tests, all passing, zero leaks. ### Phase 2: findr Walker + Tests ✅ -**Goal:** Working tool that finds gitignored files in git repos. - -**Built:** -- `walker.odin` — Parallel DFS using `core:sys/linux` getdents with 8-worker thread pool. Finds repos, reads `.gitignore`, emits gitignored files, recurses into subdirs for nested repos. -- `findr.odin` — Minimal CLI: `findr [dirs...]`, no flags. -- `test_env.odin` — Test harness with temp dirs and mock filesystems. -- `findr_test.odin` — 10 integration tests. - -**Result:** All 32 tests pass (22 gitignore + 10 walker), zero leaks. - ---- +Parallel DFS using getdents with worker thread pool. 32 total tests pass, zero leaks. ### Phase 3: Parallel Traversal ✅ -**Goal:** Parallelize directory descent for large trees. - -**Result:** Worker pool with shared LIFO queue, 8 threads, futex-based semaphore signaling. 852ms vs 4.57s serial (5.4x speedup) on `~`. Serial code has been removed — parallel is the only implementation. - ---- +8-worker thread pool, shared LIFO queue, futex-based semaphore. 852ms vs 4.57s serial (5.4x speedup). Serial code removed — parallel is the only implementation. ### Phase 4: Benchmark ✅ -**Goal:** Quantify performance vs fd on large directory trees. +findr found 227 gitignored files on `~` in 852ms. fd's double-run walked ~1.1M entries. -**Result:** findr found 227 gitignored files on `~` in 852ms. fd's double-run (all vs unignored) walked ~1.1M entries. findr's pruning of ignored directories (node_modules, dist, etc.) gives a massive advantage. +### Phase 5: fd-Parity API ✅ ---- +**Goal:** Make findr replicate specific fd commands for A/B benchmarking, plus keep the unique gitignored-only mode. -### Phase 5: Integrate into envr (future) +**Built:** +- `IgnoreMode` enum (`.Respected`, `.All`, `.Ignored`) and `WalkOptions` struct +- New `walk` signature: `walk(root, results, opts: WalkOptions, thread_count)` +- Rewritten `process_dir` with centralized mode-based filtering +- Pattern matching via `core:text/regex` on basenames +- Exclude patterns compiled via existing `gitignore.parse()` +- CLI arg parsing: `-I`, `--ignored`, `--no-hidden`, `-E ` +- 7 new integration tests (17 total) covering all three modes, excludes, pattern, and hidden filtering -**Goal:** Replace ALL `fd` subprocess usage in envr with in-process findr calls. Remove `Feature.Fd` entirely. +**Result:** All tests pass (22 gitignore + 20 walker = 42), zero leaks. -#### Part A: Extend findr API (`findr/walker.odin`) +### Phase 6: Parity (partially done) -1. **Add `WalkMode` enum** and `mode` field to `WalkerPool`: +**Goal:** Achieve file-count parity with fd. An invalid benchmark (different result sets) is useless. + +#### Steps 1-2: Nested gitignore + relative path matching ✅ + +**What was done:** + +1. **`Match` enum + `check_match`** in `gitignore.odin` — Tri-state return (`None`/`Ignored`/`Unignored`) so nested negation overrides work correctly. `is_ignored` wraps it as before. + +2. **`GIContext` linked list** in `walker.odin` — Each context holds a `^Gitignore`, `base_rel` (relative path from repo root to this dir), and `parent: ^GIContext`. `process_dir` loads `.gitignore` in every directory within a repo (not just roots). `check_chain` walks deepest-to-root, first match wins (correct gitignore precedence). + +3. **`WorkItem` struct** replaced plain `string` in the work queue: ```odin - WalkMode :: enum { GitignoredFiles, GitRepos } - ``` - -2. **Extract `run_pool`** helper — shared pool setup/teardown (create threads, wait for done, cleanup). Both `walk` and `find_repos` call it. - -3. **New `walk` signature with filtering:** - ```odin - walk :: proc(root: string, results: ^[dynamic]string, matcher: string = "", exclude: []string = nil) - ``` - - Compiles `matcher` into a regex (stored as `pool.matcher_re`); tested against each file's basename via `regex.find`. Empty = emit all. - - Parses `exclude` patterns into a `^Gitignore` via existing `parse()` (stored as `pool.exclude_gi`). Entries matching any exclude pattern are skipped entirely (not emitted, not descended into). - - Sets `pool.mode = .GitignoredFiles` - -4. **`process_dir` filtering logic** (in the `has_git` branch): - - Exclude check first: `is_ignored(exclude_gi, entry.name, is_dir)` → skip entirely (prune dirs, skip files) - - Gitignore check: if ignored, emit file only if `matcher_re` is nil or matches basename - - Not excluded/ignored: descend if dir - - Non-repo branch also prunes dirs matching exclude patterns - -5. **New `find_repos` function:** - ```odin - find_repos :: proc(root: string) -> [dynamic]string - ``` - - Creates pool with `mode = .GitRepos`, calls `run_pool`, returns collected repo roots - - Parallel (reuses worker pool architecture) - -6. **New `process_dir_repos`** — simpler than `process_dir`: - - If `has_git`: record `dir_path` as repo root - - Always descend into subdirs (except `.git` itself) to find nested repos - - No gitignore/exclude/matcher processing - -7. **`walk_worker` switch** — centralized control flow per AGENTS.md convention: - ```odin - switch pool.mode { - case .GitignoredFiles: process_dir(pool, dir_path) - case .GitRepos: process_dir_repos(pool, dir_path) + WorkItem :: struct { + path: string, // absolute directory path + rel: string, // relative path from repo root ("" = root) + gi_ctx: ^GIContext, // gitignore chain (nil = outside any repo) } ``` -8. **Cleanup in `walk`:** destroy `matcher_re` and `exclude_gi` after `run_pool` completes. +4. **Relative path matching** — `check_chain` strips each context's `base_rel` prefix to get the locally-scoped relative path. Multi-segment patterns like `build/output.txt` now match correctly. -9. **Add `import "core:text/regex"`** to walker.odin. +5. **Symlink filtering** — Only `DT_REG` and `DT_UNKNOWN` entries are emitted (matching `fd -t f`). Symlinks (`DT_LNK`) are skipped. -**No changes to:** `findr.odin`, `test_env.odin`, `gitignore.odin` (default params preserve existing behavior). +6. **`DT_UNKNOWN` handling** — Treated as regular files (no stat fallback). Correct for ext4/tmpfs; may miss directories on XFS/BTRFS/FUSE. -#### Part B: Rewrite `scan_path` (`scan.odin`) +**Memory management:** All `GIContext` objects tracked in `pool.all_contexts` (mutex-protected append). Gitignore objects and context structs freed in bulk when `walk` completes. -- Add `import "findr"` -- `scan_path` becomes ~3 lines: call `findr.walk(search_path, &paths, cfg.ScanConfig.Matcher, cfg.ScanConfig.Exclude[:])` -- **Delete:** `build_fd_args`, `run_fd`, `next_fd_tmp_path`, `fd_counter`, `fd_seq`, `cant_scan` -- Remove unused imports (`core:sync`, `core:terminal`) +**Parity achieved** (`~`, 5M+ files): -#### Part C: Rewrite `find_git_roots` (`config.odin`) +| Mode | findr | fd equivalent | diff | +|---|---|---|---| +| `.All` (-I) | 5,426,451 | `fd -HI -t f --exclude .git` | **0 (exact)** | +| `.Respected` | 4,442,505 | `fd -H -t f --exclude .git` | +1,417 (0.03%) | +| `--no-hidden` | 393,605 | `fd -t f --exclude .git` | +17 (0.004%) | -- Add `import "findr"` -- Replace `run_fd` call with `findr.find_repos(sp)` — no more `filepath.dir` post-processing needed (find_repos returns repo roots directly) +On the envr repo itself, all three modes are **exact match (0 diffs)**. The tiny residual diffs on `~` are likely from global gitignore (`~/.config/git/ignore`) and `.git/info/exclude` which fd reads but findr doesn't. -#### Part D: Remove `Feature.Fd` everywhere +#### Step 3: DT_UNKNOWN stat fallback (TODO) + +On XFS/BTRFS/FUSE filesystems, `dirent.type` returns `DT_UNKNOWN`. Currently findr treats these as regular files, which means directories may be missed (not descended into). Add a stat fallback in `read_dir_entries` when `d.type == .UNKNOWN` to determine the real type before proceeding. This is not needed for ext4/tmpfs (what tests and most Linux systems use). + +### Phase 7: Performance Optimization (next) + +**Goal:** Make findr competitive with or faster than fd across all modes. Current benchmark (`~`, hyperfine 5 runs): + +| Command | Mean | vs fd equivalent | +|---|---|---| +| `findr --ignored` | 984ms | *(no fd equivalent)* | +| `findr --no-hidden` | 542ms | 3.2x slower than `fd -t f` (170ms) | +| `findr` (respected) | 4.134s | 2.4x slower than `fd -H -t f` (1.745s) | +| `findr -I` (all) | 3.821s | 1.9x slower than `fd -HI -t f` (1.972s) | + +**Bottleneck analysis:** + +1. **Mutex contention on result collection** — Every file append goes through `sync.mutex_lock(&pool.results_mutex)` → `append` → `sync.mutex_unlock`. With 5M+ files across 16 threads, workers serialize on the mutex. + +2. **`--ignored` regression** — Was 402ms before nested gitignore support, now 984ms. The overhead comes from loading `.gitignore` in every directory and checking the context chain per entry. Since `--ignored` mode prunes gitignored dirs, many of these `.gitignore` loads are wasted (the dir won't be descended into anyway). Optimization: skip loading `.gitignore` for directories that will be pruned. + +3. **Per-string heap allocation** — Every path string is individually `strings.clone`'d and `delete`'d. Millions of alloc/free calls. + +**Optimization plan:** + +1. **Per-thread result buffers** — Each worker accumulates results in a thread-local `[dynamic]string`. Merge into shared array once at the end (single-threaded concat). + +2. **Lazy gitignore loading for `.Ignored` mode** — Only load `.gitignore` when we need to decide whether to emit or descend. In `.Ignored` mode, we can check the parent context first and skip loading if the directory itself is already ignored. + +3. **Arena allocator for paths** — Replace per-string `strings.clone` with a bump allocator. Free everything in one `arena_destroy` at the end. + +4. **Larger getdents buffer** — Increase from 8KB to 64KB to reduce syscall count. + +5. **BufWriter on stdout** — Batch `write` syscalls instead of per-line `fmt.println`. + +**Success criteria:** +- `.All` mode faster than `fd -HI -t f --exclude .git` +- `.Respected` mode faster than `fd -H -t f --exclude .git` +- `--ignored` mode faster than `fd -HI -t f --exclude .git` (restore pre-regression advantage) +- Re-benchmark after each step using `findr/bench.sh` + +### Phase 8: Integrate into envr + +**Goal:** Replace ALL `fd` subprocess usage in envr with in-process findr calls. Remove `Feature.Fd` entirely. + +#### Part A: Rewrite `scan_path` (`scan.odin`) + +Replace the double-run-and-diff approach with a single `findr.walk` call using `.Ignored` mode: + +```odin +// Before: fd -HI + fd -H, then diff +// After: +findr.walk(search_path, &paths, WalkOptions{ + pattern = cfg.ScanConfig.Matcher, + excludes = cfg.ScanConfig.Exclude[:], + include_hidden = true, + ignore_mode = .Ignored, +}, thread_count) +``` + +**Delete:** `build_fd_args`, `run_fd`, `next_fd_tmp_path`, `fd_counter`, `fd_seq`, `cant_scan`. + +#### Part B: Add `find_repos` and rewrite `find_git_roots` (`config.odin`) + +Add a `find_repos` proc to findr that walks a tree and collects directories containing `.git/`: + +```odin +find_repos :: proc(root: string, results: ^[dynamic]string, thread_count: int) +``` + +- Reuses worker pool architecture +- `process_dir` emits `dir_path` when `has_git == true` +- Always descends into subdirs (except `.git`) to find nested repos +- No gitignore/exclude/pattern processing + +Replace `find_git_roots`'s `run_fd` call with `findr.find_repos`. + +#### Part C: Remove `Feature.Fd` everywhere | File | Change | |---|---| @@ -240,9 +321,9 @@ Key behaviors: | `cmd_check.odin` | Same removal | | `cmd_deps.odin` | Remove fd table row | | `db.odin` | Change check to `.Git not_in feats` only; update error message | -| `scan_test.odin` | Remove `test_scan_meets_expectations` (cant_scan test); remove `cant_scan` assertions from other tests | +| `scan_test.odin` | Remove `cant_scan` tests and assertions | -#### Part E: Verification +#### Part D: Verification ```bash odin build findr -o:speed -out:findr/findr @@ -251,20 +332,11 @@ odin build . -o:speed -out:envr odin test . ``` -#### Execution order - -1. **findr API changes** → build + test findr (32 tests should pass with default params) -2. **Rewrite scan_path** + delete dead code -3. **Rewrite find_git_roots** -4. **Remove Feature.Fd** across all files -5. **Update tests** → build + test everything - ## Risks | Risk | Mitigation | |---|---| -| Single-threaded may be slow on huge trees | Resolved — parallel traversal implemented (Phase 3) | | Gitignore edge cases (`**/foo`, `foo/**/bar`) | Comprehensive gitignore_test.odin with spec examples | -| dirent.type may be UNKNOWN on some filesystems | Fall back to stat only when type is UNKNOWN | -| Missing nested `.env` files in monorepos | Accepted limitation — flat gitignore model | -| Memory allocation churn from path strings | Use thread-local arena allocators in Phase 3 | +| `DT_UNKNOWN` on XFS/BTRFS/FUSE | Phase 6 Step 3: stat fallback for unknown types | +| Global gitignore (`~/.config/git/ignore`) and `.git/info/exclude` not read | Causes ~0.03% delta vs fd. Acceptable for envr's use case (finds `.env` files in repos). | +| Thread safety of `regex.match` on shared `Regular_Expression` | Odin regex is read-only after compilation; `match` returns per-call `Captures` | diff --git a/bench.sh b/bench.sh new file mode 100755 index 0000000..a4e8542 --- /dev/null +++ b/bench.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -euo pipefail + +BENCH_DIR="$(cd "$(dirname "$0")" && pwd)" +TARGET="${1:-$HOME}" +RESULTS_FILE="$BENCH_DIR/bench-results.md" +FINDR="$BENCH_DIR/findr" + +echo "=== findr benchmark suite ===" +echo "Target: $TARGET" +echo + +# --- pre-flight checks --- +if ! command -v fd &>/dev/null; then + echo "ERROR: fd is not on PATH" >&2 + exit 1 +fi +if ! command -v hyperfine &>/dev/null; then + echo "ERROR: hyperfine is not on PATH" >&2 + exit 1 +fi + +# --- build findr if missing or stale --- +NEEDS_BUILD=false +if [[ ! -f "$BENCH_DIR/findr" ]]; then + NEEDS_BUILD=true +else + # rebuild if any .odin source is newer than the binary + if find "$BENCH_DIR" -name '*.odin' -newer "$BENCH_DIR/findr" | grep -q .; then + NEEDS_BUILD=true + fi +fi +if $NEEDS_BUILD; then + echo "Building findr..." + odin build "$BENCH_DIR" -o:speed -out:"$BENCH_DIR/findr" +fi +echo + +# --- file counts --- +echo "=== File counts ===" +printf " findr --ignored : %8d\n" "$("$FINDR" --ignored "$TARGET" 2>/dev/null | wc -l)" +echo +printf " fd . -t f --exclude .git : %8d\n" "$(fd . -t f --exclude .git "$TARGET" 2>/dev/null | wc -l)" +printf " findr --no-hidden : %8d\n" "$("$FINDR" --no-hidden "$TARGET" 2>/dev/null | wc -l)" +echo +printf " fd . -H -t f --exclude .git : %8d\n" "$(fd . -H -t f --exclude .git "$TARGET" 2>/dev/null | wc -l)" +printf " findr (respect) : %8d\n" "$("$FINDR" "$TARGET" 2>/dev/null | wc -l)" +echo +printf " fd . -HI -t f --exclude .git: %8d\n" "$(fd . -HI -t f --exclude .git "$TARGET" 2>/dev/null | wc -l)" +printf " findr -I (all) : %8d\n" "$("$FINDR" -I "$TARGET" 2>/dev/null | wc -l)" +echo + +# --- benchmarks --- +echo "=== Benchmarks (hyperfine, 5 runs, 2 warmups) ===" +echo +hyperfine \ + --warmup 2 \ + --runs 5 \ + --export-markdown "$RESULTS_FILE" \ + "$FINDR --ignored \"$TARGET\" > /dev/null" \ + "fd . -t f --exclude .git \"$TARGET\" > /dev/null" \ + "$FINDR --no-hidden \"$TARGET\" > /dev/null" \ + "fd . -H -t f --exclude .git \"$TARGET\" > /dev/null" \ + "$FINDR \"$TARGET\" > /dev/null" \ + "fd . -HI -t f --exclude .git \"$TARGET\" > /dev/null" \ + "$FINDR -I \"$TARGET\" > /dev/null" +echo + +echo "=== Results written to $RESULTS_FILE ===" diff --git a/findr.odin b/findr.odin index 93fdb3e..66ee957 100644 --- a/findr.odin +++ b/findr.odin @@ -2,19 +2,63 @@ package findr import "core:fmt" import "core:os" +import "core:strings" main :: proc() { args := os.args - search_dirs := make([dynamic]string) - defer delete(search_dirs) + opts: WalkOptions + opts.include_hidden = true + opts.ignore_mode = .Respected - for i in 1 ..< len(args) { - append(&search_dirs, args[i]) + excludes := make([dynamic]string) + defer delete(excludes) + + pattern := "" + paths := make([dynamic]string) + defer delete(paths) + + i := 1 + for i < len(args) { + arg := args[i] + switch { + case arg == "-I": + opts.ignore_mode = .All + case arg == "--ignored": + opts.ignore_mode = .Ignored + case arg == "--no-hidden": + opts.include_hidden = false + case arg == "-E": + i += 1 + if i < len(args) { + append(&excludes, args[i]) + } + case strings.has_prefix(arg, "-E"): + append(&excludes, arg[2:]) + case len(arg) > 0 && arg[0] == '-': + // unknown flag, skip + case: + if pattern == "" { + pattern = arg + } else { + append(&paths, arg) + } + } + i += 1 } - if len(search_dirs) == 0 { - append(&search_dirs, ".") + if len(paths) == 0 && pattern != "" && os.exists(pattern) { + append(&paths, pattern) + pattern = "" + } + + opts.pattern = pattern + if len(excludes) > 0 { + opts.excludes = excludes[:] + } + + if len(paths) == 0 { + append(&paths, ".") } results := make([dynamic]string) @@ -24,12 +68,11 @@ main :: proc() { } thread_count := os.get_processor_core_count() - for dir in search_dirs { - walk(dir, &results, thread_count) + for dir in paths { + walk(dir, &results, opts, thread_count) } for r in results { fmt.println(r) } } - diff --git a/findr_test.odin b/findr_test.odin index a3c7e18..5a88247 100644 --- a/findr_test.odin +++ b/findr_test.odin @@ -3,6 +3,10 @@ package findr import "core:os" import "core:testing" +// ============================================================================ +// .Ignored mode tests (original findr behavior — emit ONLY gitignored files) +// ============================================================================ + @(test) test_basic_gitignored :: proc(t: ^testing.T) { env := create_test_env() @@ -14,7 +18,9 @@ test_basic_gitignored :: proc(t: ^testing.T) { create_file(env, "repo/secrets.env") create_file(env, "repo/normal.txt") - assert_output(t, env, nil, {"repo/.env", "repo/secrets.env"}) + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, { + "repo/.env", "repo/secrets.env", + }) } @(test) @@ -26,7 +32,7 @@ test_non_repo_not_scanned :: proc(t: ^testing.T) { create_file(env, "norepo/.gitignore", "*.env\n") create_file(env, "norepo/.env") - assert_output_empty(t, env, nil) + assert_output_empty(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}) } @(test) @@ -40,7 +46,9 @@ test_negation_pattern :: proc(t: ^testing.T) { create_file(env, "repo/secrets.env") create_file(env, "repo/prod.env") - assert_output(t, env, nil, {"repo/.env", "repo/secrets.env"}) + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, { + "repo/.env", "repo/secrets.env", + }) } @(test) @@ -55,8 +63,7 @@ test_dir_only_pattern :: proc(t: ^testing.T) { create_dir(env, "repo/ignored_dir") create_file(env, "repo/.gitignore", "ignored_dir/\n") - // dir-only patterns don't produce file results - assert_output(t, env, nil, {}) + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, {}) } @(test) @@ -72,7 +79,9 @@ test_multiple_repos :: proc(t: ^testing.T) { create_file(env, "repo2/.gitignore", "*.key\n") create_file(env, "repo2/secret.key") - assert_output(t, env, nil, {"repo1/a.env", "repo2/secret.key"}) + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, { + "repo1/a.env", "repo2/secret.key", + }) } @(test) @@ -88,11 +97,13 @@ test_nested_repos :: proc(t: ^testing.T) { create_file(env, "parent/child/.gitignore", "*.key\n") create_file(env, "parent/child/api.key") - assert_output(t, env, nil, {"parent/top.env", "parent/child/api.key"}) + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, { + "parent/top.env", "parent/child/api.key", + }) } @(test) -test_gitignore_in_subdir_ignored :: proc(t: ^testing.T) { +test_nested_gitignore_read :: proc(t: ^testing.T) { env := create_test_env() defer destroy_test_env(&env) @@ -103,10 +114,73 @@ test_gitignore_in_subdir_ignored :: proc(t: ^testing.T) { create_file(env, "repo/sub/secret.txt") create_file(env, "repo/sub/.env") - // .gitignore in subdir is not read (flat model). - // secret.txt should NOT appear (subdir .gitignore ignored). - // .env should NOT appear (it's nested, not top-level of repo). - assert_output(t, env, nil, {}) + // Both root and nested .gitignore are read. + // secret.txt: ignored by sub/.gitignore (*.txt) + // .env: ignored by root .gitignore (*.env) + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, { + "repo/sub/secret.txt", "repo/sub/.env", + }) +} + +@(test) +test_nested_gitignore_negation :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.log\n") + create_dir(env, "repo/sub") + create_file(env, "repo/sub/.gitignore", "!important.log\n") + create_file(env, "repo/sub/important.log") + create_file(env, "repo/sub/debug.log") + + // Nested negation overrides root pattern. + // important.log: un-ignored by sub/.gitignore → NOT emitted in .Ignored mode + // debug.log: still ignored by root → emitted + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, { + "repo/sub/debug.log", + }) +} + +@(test) +test_nested_gitignore_respected_mode :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.log\n") + create_dir(env, "repo/sub") + create_file(env, "repo/sub/.gitignore", "!important.log\n") + create_file(env, "repo/sub/important.log") + create_file(env, "repo/sub/debug.log") + + // In .Respected mode: + // important.log: un-ignored by nested negation → emitted + // debug.log: ignored by root → skipped + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Respected}, { + "repo/.gitignore", "repo/sub/.gitignore", "repo/sub/important.log", + }) +} + +@(test) +test_multisegment_pattern :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "build/output.txt\n") + create_dir(env, "repo/build") + create_file(env, "repo/build/output.txt") + create_file(env, "repo/build/other.txt") + create_file(env, "repo/output.txt") + + // Multi-segment pattern matches relative path, not just basename. + // build/output.txt: matches → ignored + // build/other.txt: doesn't match → not ignored + // output.txt: doesn't match (needs build/ prefix) → not ignored + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}, { + "repo/build/output.txt", + }) } @(test) @@ -117,7 +191,7 @@ test_no_gitignore_file :: proc(t: ^testing.T) { create_git_repo(env, "repo") create_file(env, "repo/.env") - assert_output_empty(t, env, nil) + assert_output_empty(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}) } @(test) @@ -129,7 +203,7 @@ test_empty_gitignore :: proc(t: ^testing.T) { create_file(env, "repo/.gitignore", "\n\n# comment\n\n") create_file(env, "repo/.env") - assert_output_empty(t, env, nil) + assert_output_empty(t, env, nil, {include_hidden = true, ignore_mode = .Ignored}) } @(test) @@ -156,9 +230,135 @@ test_multiple_search_dirs :: proc(t: ^testing.T) { delete(results) } + opts := WalkOptions{include_hidden = true, ignore_mode = .Ignored} thread_count := os.get_processor_core_count() - walk(dir1, &results, thread_count) - walk(dir2, &results, thread_count) + walk(dir1, &results, opts, thread_count) + walk(dir2, &results, opts, thread_count) testing.expect_value(t, len(results), 2) } +// ============================================================================ +// .All mode tests (fd -HI parity — ignore gitignore entirely) +// ============================================================================ + +@(test) +test_all_mode_emits_all_files :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n") + create_file(env, "repo/.env") + create_file(env, "repo/secrets.env") + create_file(env, "repo/normal.txt") + + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .All}, { + "repo/.env", "repo/.gitignore", "repo/secrets.env", "repo/normal.txt", + }) +} + +@(test) +test_all_mode_descends_everywhere :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "build/\n") + create_dir(env, "repo/build") + create_file(env, "repo/build/output.txt") + + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .All}, { + "repo/.gitignore", "repo/build/output.txt", + }) +} + +// ============================================================================ +// .Respected mode tests (fd -H parity — skip gitignored, prune ignored dirs) +// ============================================================================ + +@(test) +test_respected_mode_skips_gitignored :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n") + create_file(env, "repo/.env") + create_file(env, "repo/secrets.env") + create_file(env, "repo/normal.txt") + + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Respected}, { + "repo/.gitignore", "repo/normal.txt", + }) +} + +@(test) +test_respected_mode_prunes_ignored_dirs :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "build/\n") + create_dir(env, "repo/build") + create_file(env, "repo/build/output.txt") + create_file(env, "repo/main.txt") + + assert_output(t, env, nil, {include_hidden = true, ignore_mode = .Respected}, { + "repo/.gitignore", "repo/main.txt", + }) +} + +// ============================================================================ +// Filter tests (excludes, pattern, hidden) +// ============================================================================ + +@(test) +test_excludes_prune_dirs :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n") + create_file(env, "repo/.env") + create_dir(env, "repo/vendor") + create_file(env, "repo/vendor/lib.env") + + assert_output(t, env, nil, + {include_hidden = true, ignore_mode = .Ignored, excludes = {"vendor"}}, + {"repo/.env"}, + ) +} + +@(test) +test_pattern_filters_results :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n*.key\n") + create_file(env, "repo/.env") + create_file(env, "repo/secrets.env") + create_file(env, "repo/master.key") + + assert_output(t, env, nil, + {pattern = "\\.env$", include_hidden = true, ignore_mode = .Ignored}, + {"repo/.env", "repo/secrets.env"}, + ) +} + +@(test) +test_no_hidden_skips_dotfiles :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n") + create_file(env, "repo/.env") + create_file(env, "repo/secrets.env") + create_file(env, "repo/.hidden.env") + + assert_output(t, env, nil, + {include_hidden = false, ignore_mode = .Ignored}, + {"repo/secrets.env"}, + ) +} diff --git a/gitignore.odin b/gitignore.odin index d1acd6a..6af5536 100644 --- a/gitignore.odin +++ b/gitignore.odin @@ -161,17 +161,23 @@ parse :: proc(content: string) -> Gitignore { return gi } -is_ignored :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> bool { - matched := false +Match :: enum { None, Ignored, Unignored } + +check_match :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> Match { + result := Match.None for rule in gi.rules { if rule.dir_only && !is_dir do continue cap, ok := regex.match(rule.regex, path) regex.destroy(cap) if ok { - matched = !rule.negated + result = rule.negated ? .Unignored : .Ignored } } - return matched + return result +} + +is_ignored :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> bool { + return check_match(gi, path, is_dir) == .Ignored } destroy :: proc(gi: ^Gitignore) { diff --git a/test_env.odin b/test_env.odin index 5947240..f45adec 100644 --- a/test_env.odin +++ b/test_env.odin @@ -60,8 +60,14 @@ create_git_repo :: proc(env: TestEnv, path: string) { create_dir(env, sub) } -assert_output :: proc(t: ^testing.T, env: TestEnv, args: []string, expected: []string) { - results := collect_results(env, args) +assert_output :: proc( + t: ^testing.T, + env: TestEnv, + args: []string, + opts: WalkOptions, + expected: []string, +) { + results := collect_results(env, args, opts) defer { for r in results {delete(r)} delete(results) @@ -99,8 +105,13 @@ assert_output :: proc(t: ^testing.T, env: TestEnv, args: []string, expected: []s } } -assert_output_empty :: proc(t: ^testing.T, env: TestEnv, args: []string) { - results := collect_results(env, args) +assert_output_empty :: proc( + t: ^testing.T, + env: TestEnv, + args: []string, + opts: WalkOptions, +) { + results := collect_results(env, args, opts) defer { for r in results {delete(r)} delete(results) @@ -114,7 +125,7 @@ assert_output_empty :: proc(t: ^testing.T, env: TestEnv, args: []string) { } } -collect_results :: proc(env: TestEnv, args: []string) -> [dynamic]string { +collect_results :: proc(env: TestEnv, args: []string, opts: WalkOptions) -> [dynamic]string { results := make([dynamic]string) full_args := make([dynamic]string, 0, len(args) + 1, context.temp_allocator) @@ -123,7 +134,7 @@ collect_results :: proc(env: TestEnv, args: []string) -> [dynamic]string { thread_count := os.get_processor_core_count() for dir in full_args { - walk(dir, &results, thread_count) + walk(dir, &results, opts, thread_count) } for i in 0 ..< len(results) { @@ -141,4 +152,3 @@ collect_results :: proc(env: TestEnv, args: []string) -> [dynamic]string { return results } - diff --git a/walker.odin b/walker.odin index 163c154..678c2a0 100644 --- a/walker.odin +++ b/walker.odin @@ -5,15 +5,41 @@ import "core:os" import "core:strings" import "core:sync" import "core:sys/linux" +import "core:text/regex" import "core:thread" +IgnoreMode :: enum { + Respected, // skip gitignored, prune ignored dirs (fd -H default) + All, // ignore .gitignore entirely, descend everywhere (fd -HI) + Ignored, // emit ONLY gitignored files, prune ignored dirs (findr original) +} + +WalkOptions :: struct { + pattern: string, // regex on basename; "" = match all + excludes: []string, // glob patterns to skip entirely (fd -E) + include_hidden: bool, // true = include dotfiles (fd -H) + ignore_mode: IgnoreMode, +} + RawEntry :: struct { name: string, type: linux.Dirent_Type, } +GIContext :: struct { + gi: ^Gitignore, // nil if this dir had no .gitignore + base_rel: string, // relative path from repo root to this dir + parent: ^GIContext, // parent context (nil if repo root) +} + +WorkItem :: struct { + path: string, // absolute directory path + rel: string, // relative path from repo root ("" = root) + gi_ctx: ^GIContext, // gitignore chain (nil = outside any repo) +} + WalkerPool :: struct { - queue: [dynamic]string, + queue: [dynamic]WorkItem, queue_mutex: sync.Mutex, queue_sema: sync.Atomic_Sema, results: ^[dynamic]string, @@ -21,17 +47,47 @@ WalkerPool :: struct { active: i64, done: sync.One_Shot_Event, threads: [dynamic]^thread.Thread, + opts: WalkOptions, + pattern_re: regex.Regular_Expression, + has_pattern: bool, + exclude_gi: ^Gitignore, + all_contexts: [dynamic]^GIContext, + contexts_lock: sync.Mutex, } -walk :: proc(root: string, results: ^[dynamic]string, thread_count: int) { +walk :: proc(root: string, results: ^[dynamic]string, opts: WalkOptions, thread_count: int) { pool := new(WalkerPool) - pool.queue = make([dynamic]string) + pool.queue = make([dynamic]WorkItem) pool.results = results pool.active = 1 pool.threads = make([dynamic]^thread.Thread) + pool.all_contexts = make([dynamic]^GIContext) + pool.opts = opts + pool.exclude_gi = nil + pool.has_pattern = false + + if len(opts.pattern) > 0 { + re, err := regex.create(opts.pattern, {regex.Flag.No_Capture}) + if err == nil { + pool.pattern_re = re + pool.has_pattern = true + } + } + + if len(opts.excludes) > 0 { + sb: strings.Builder + strings.builder_init(&sb) + for ex in opts.excludes { + fmt.sbprintf(&sb, "%s\n", ex) + } + content := strings.to_string(sb) + pool.exclude_gi = new(Gitignore) + pool.exclude_gi^ = parse(content) + strings.builder_destroy(&sb) + } root_clone, _ := strings.clone(root) - append(&pool.queue, root_clone) + append(&pool.queue, WorkItem{path = root_clone}) sync.atomic_sema_post(&pool.queue_sema) for i in 0 ..< thread_count { @@ -52,10 +108,32 @@ walk :: proc(root: string, results: ^[dynamic]string, thread_count: int) { thread.destroy(t) } delete(pool.threads) - for path in pool.queue { - delete(path) + for item in pool.queue { + delete(item.path) + if len(item.rel) > 0 { delete(item.rel) } } delete(pool.queue) + + for ctx in pool.all_contexts { + if ctx.gi != nil { + destroy(ctx.gi) + free(ctx.gi) + } + if len(ctx.base_rel) > 0 { + delete(ctx.base_rel) + } + free(ctx) + } + delete(pool.all_contexts) + + if pool.has_pattern { + regex.destroy(pool.pattern_re) + } + if pool.exclude_gi != nil { + destroy(pool.exclude_gi) + free(pool.exclude_gi) + } + free(pool) } @@ -74,12 +152,13 @@ walk_worker :: proc(t: ^thread.Thread) { break } last := len(pool.queue) - 1 - dir_path := pool.queue[last] + item := pool.queue[last] ordered_remove(&pool.queue, last) sync.mutex_unlock(&pool.queue_mutex) - process_dir(pool, dir_path) - delete(dir_path) + process_dir(pool, item) + delete(item.path) + if len(item.rel) > 0 { delete(item.rel) } old := sync.atomic_sub_explicit(&pool.active, 1, .Release) if old == 1 { @@ -88,49 +167,132 @@ walk_worker :: proc(t: ^thread.Thread) { } } -process_dir :: proc(pool: ^WalkerPool, dir_path: string) { +process_dir :: proc(pool: ^WalkerPool, item: WorkItem) { + dir_path := item.path has_git := false entries := read_dir_entries(dir_path, &has_git) defer free_entries(&entries) + gi_ctx := item.gi_ctx + rel := item.rel + if has_git { + gi_ctx = nil + rel = "" + } + + if has_git || gi_ctx != nil { gi := load_gitignore(dir_path) - defer if gi != nil { - destroy(gi) - free(gi) + if gi != nil { + new_ctx := new(GIContext) + new_ctx.gi = gi + if len(rel) > 0 { + new_ctx.base_rel, _ = strings.clone(rel) + } + new_ctx.parent = gi_ctx + + sync.mutex_lock(&pool.contexts_lock) + append(&pool.all_contexts, new_ctx) + sync.mutex_unlock(&pool.contexts_lock) + + gi_ctx = new_ctx + } + } + + rel_buf: [4096]u8 + + for entry in entries { + if entry.name == ".git" do continue + + is_dir := entry.type == .DIR + is_regular := entry.type == .REG || entry.type == .UNKNOWN + + if pool.exclude_gi != nil && is_ignored(pool.exclude_gi, entry.name, is_dir) { + continue } - for entry in entries { - if entry.name == ".git" do continue - is_dir := entry.type == .DIR - if gi != nil && is_ignored(gi, entry.name, is_dir) { - if !is_dir { - full_path := join_path(dir_path, entry.name) - sync.mutex_lock(&pool.results_mutex) - append(pool.results, full_path) - sync.mutex_unlock(&pool.results_mutex) - } - continue - } - if is_dir { - child_path := join_path(dir_path, entry.name) - push_work(pool, child_path) - } + if !pool.opts.include_hidden && len(entry.name) > 0 && entry.name[0] == '.' { + continue } - } else { - for entry in entries { - if entry.type == .DIR { + + entry_rel := build_rel(rel_buf[:], rel, entry.name) + + ignored := false + if gi_ctx != nil && pool.opts.ignore_mode != .All { + ignored = check_chain(gi_ctx, entry_rel, is_dir) + } + + should_emit: bool + if ignored { + should_emit = pool.opts.ignore_mode == .Ignored + } else { + should_emit = pool.opts.ignore_mode != .Ignored + } + + if is_dir { + if !ignored { + child_rel, _ := strings.clone(entry_rel) child_path := join_path(dir_path, entry.name) - push_work(pool, child_path) + push_work(pool, WorkItem{path = child_path, rel = child_rel, gi_ctx = gi_ctx}) + } + } else if is_regular { + if should_emit && matches_pattern(pool, entry.name) { + full_path := join_path(dir_path, entry.name) + sync.mutex_lock(&pool.results_mutex) + append(pool.results, full_path) + sync.mutex_unlock(&pool.results_mutex) } } } } -push_work :: proc(pool: ^WalkerPool, path: string) { +check_chain :: proc(ctx: ^GIContext, entry_rel: string, is_dir: bool) -> bool { + c := ctx + for c != nil { + if c.gi != nil { + rel := relative_to(entry_rel, c.base_rel) + match := check_match(c.gi, rel, is_dir) + if match != .None { + return match == .Ignored + } + } + c = c.parent + } + return false +} + +relative_to :: proc(entry_rel, base_rel: string) -> string { + if len(base_rel) == 0 do return entry_rel + prefix_len := len(base_rel) + if len(entry_rel) > prefix_len && entry_rel[prefix_len] == '/' && + strings.has_prefix(entry_rel, base_rel) { + return entry_rel[prefix_len + 1:] + } + return entry_rel +} + +build_rel :: proc(buf: []u8, rel, name: string) -> string { + if len(rel) == 0 do return name + pos := copy(buf, rel) + if pos < len(buf) { + buf[pos] = '/' + pos += 1 + pos += copy(buf[pos:], name) + } + return string(buf[:pos]) +} + +matches_pattern :: proc(pool: ^WalkerPool, name: string) -> bool { + if !pool.has_pattern do return true + cap, ok := regex.match(pool.pattern_re, name) + regex.destroy(cap) + return ok +} + +push_work :: proc(pool: ^WalkerPool, item: WorkItem) { sync.atomic_add_explicit(&pool.active, 1, .Relaxed) sync.mutex_lock(&pool.queue_mutex) - append(&pool.queue, path) + append(&pool.queue, item) sync.mutex_unlock(&pool.queue_mutex) sync.atomic_sema_post(&pool.queue_sema) } @@ -205,4 +367,3 @@ join_path :: proc(parent, child: string) -> string { result, _ := strings.clone(s) return result } -