From 2ef733fe58594b0a0b6e3ef85142b74af445ccb8 Mon Sep 17 00:00:00 2001 From: Spencer Brower Date: Tue, 16 Jun 2026 20:57:38 -0400 Subject: [PATCH] perf: Replaced `fd` with custom internals. --- .gitignore | 4 + TODOS.md | 2 - cmd_check.odin | 11 - cmd_deps.odin | 6 - cmd_scan.odin | 10 - config.odin | 19 +- db.odin | 4 +- features.odin | 4 - findr/findr_test.odin | 320 +++++++++++++++++++++++++++ findr/gitignore.odin | 88 ++++++++ findr/gitignore_test.odin | 118 ++++++++++ findr/glob.odin | 203 +++++++++++++++++ findr/glob_test.odin | 111 ++++++++++ findr/repos.odin | 128 +++++++++++ findr/test_env.odin | 152 +++++++++++++ findr/walker.odin | 449 ++++++++++++++++++++++++++++++++++++++ flake.nix | 5 +- scan.odin | 127 +---------- scan_test.odin | 23 +- 19 files changed, 1593 insertions(+), 191 deletions(-) create mode 100644 findr/findr_test.odin create mode 100644 findr/gitignore.odin create mode 100644 findr/gitignore_test.odin create mode 100644 findr/glob.odin create mode 100644 findr/glob_test.odin create mode 100644 findr/repos.odin create mode 100644 findr/test_env.odin create mode 100644 findr/walker.odin diff --git a/.gitignore b/.gitignore index f49b1b1..f8f5015 100644 --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,12 @@ list.json man # build artifacts +*.spall builds envr envr-go +findr/findr +findr/findr-prof +findr/bench-*.md result version.odin diff --git a/TODOS.md b/TODOS.md index eb3b7a5..59e58d8 100644 --- a/TODOS.md +++ b/TODOS.md @@ -32,8 +32,6 @@ 18. 2 scan tests silently skip when fd isn't installed, tests pass without actually testing anything. These should use #assert to be sure that fd is in path. -19. Try to do all encryption / decryption in memory - only read / write encrypted data to disk. - 20. add --format -f flag to commands that draw tables. 21. Replace `testing.expect` calls with `testing.expect_value` calls where appropriate. diff --git a/cmd_check.odin b/cmd_check.odin index 1dd0f94..b1b705e 100644 --- a/cmd_check.odin +++ b/cmd_check.odin @@ -5,8 +5,6 @@ import "core:os" import "core:path/filepath" cmd_check :: proc(cmd: ^Command) { - feats := check_features() - check_path: string if len(cmd.args) > 0 { check_path = cmd.args[0] @@ -42,15 +40,6 @@ cmd_check :: proc(cmd: ^Command) { files_in_path: [dynamic]string if is_dir { - if cant_scan(feats) { - fmt.wprintln( - cmd.err, - "Error: please install fd to use the check command (https://github.com/sharkdp/fd)", - flush = false, - ) - return - } - scanned, scan_ok := scan_path(abs_path, db.cfg) if !scan_ok { fmt.wprintln(cmd.err, "Error scanning directory for .env files", flush = false) diff --git a/cmd_deps.odin b/cmd_deps.odin index 9680d96..71aa218 100644 --- a/cmd_deps.odin +++ b/cmd_deps.odin @@ -17,12 +17,6 @@ cmd_deps :: proc(cmd: ^Command) { append(&rows, []string{"Git", "\u2717 Missing"}) } - if .Fd in feats { - append(&rows, []string{"fd", "\u2713 Available"}) - } else { - append(&rows, []string{"fd", "\u2717 Missing"}) - } - if terminal.is_terminal(os.stdout) { render_table(cmd.out, headers, rows[:]) } else { diff --git a/cmd_scan.odin b/cmd_scan.odin index 4ef5ed6..921de8c 100644 --- a/cmd_scan.odin +++ b/cmd_scan.odin @@ -6,16 +6,6 @@ import "core:os" import "core:terminal" cmd_scan :: proc(cmd: ^Command) { - feats := check_features() - if cant_scan(feats) { - fmt.wprintln( - cmd.err, - "Error: please install fd to use the scan command (https://github.com/sharkdp/fd)", - flush = false, - ) - return - } - db, db_ok := db_open(cmd.config_path) if !db_ok { return diff --git a/config.odin b/config.odin index 22bd25d..2bfa99c 100644 --- a/config.odin +++ b/config.odin @@ -6,6 +6,8 @@ import "core:os" import "core:path/filepath" import "core:strings" +import "findr" + SshKeyPair :: struct { Private: string `json:"private"`, Public: string `json:"public"`, @@ -218,22 +220,7 @@ search_paths :: proc(cfg: Config) -> (paths: [dynamic]string) { find_git_roots :: proc(cfg: Config) -> (roots: [dynamic]string, ok: bool) { paths := search_paths(cfg) - - for sp in paths { - args := []string{"fd", "-H", "-t", "d", "^\\.git$", sp} - lines, fd_ok := run_fd(args) - if !fd_ok { - return - } - - for line in lines { - cleaned, _ := filepath.clean(line) - parent := filepath.dir(cleaned) - cloned, _ := strings.clone(parent) - append(&roots, cloned) - } - } - + findr.find_repos(paths[:], &roots, os.get_processor_core_count()) ok = true return } diff --git a/db.odin b/db.odin index c3a9ffb..cceb75a 100644 --- a/db.odin +++ b/db.odin @@ -517,8 +517,8 @@ update_dir :: proc(f: ^EnvFile, new_dir: string) { find_moved_dirs :: proc(d: ^Db, f: ^EnvFile) -> ([dynamic]string, bool) { feats := check_features() - if .Fd not_in feats || .Git not_in feats { - fmt.println("Error: fd and git are required for moved dir detection") + if .Git not_in feats { + fmt.println("Error: git is required for moved dir detection") return {}, false } diff --git a/features.odin b/features.odin index 72e2102..eaf2876 100644 --- a/features.odin +++ b/features.odin @@ -7,7 +7,6 @@ import "core:strings" Feature :: enum { Git, - Fd, } AvailableFeatures :: bit_set[Feature] @@ -27,9 +26,6 @@ check_features :: proc() -> AvailableFeatures { if find_binary(paths, "git") != "" { feats += {.Git} } - if find_binary(paths, "fd") != "" { - feats += {.Fd} - } return feats } diff --git a/findr/findr_test.odin b/findr/findr_test.odin new file mode 100644 index 0000000..22c22ec --- /dev/null +++ b/findr/findr_test.odin @@ -0,0 +1,320 @@ +package findr + +import "core:os" +import "core:sort" +import "core:strings" +import "core:sys/linux" +import "core:testing" + +// ============================================================================ +// Gitignored file emission tests (emit ONLY gitignored files, descend everywhere) +// ============================================================================ + +@(test) +test_basic_gitignored :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n") + create_file(env, "repo/.env") + create_file(env, "repo/secrets.env") + create_file(env, "repo/normal.txt") + + assert_output(t, env, nil, {}, { + "repo/.env", "repo/secrets.env", + }) +} + +@(test) +test_non_repo_not_scanned :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_dir(env, "norepo") + create_file(env, "norepo/.gitignore", "*.env\n") + create_file(env, "norepo/.env") + + assert_output_empty(t, env, nil, {}) +} + +@(test) +test_negation_pattern :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n!prod.env\n") + create_file(env, "repo/.env") + create_file(env, "repo/secrets.env") + create_file(env, "repo/prod.env") + + assert_output(t, env, nil, {}, { + "repo/.env", "repo/secrets.env", + }) +} + +@(test) +test_multiple_repos :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo1") + create_file(env, "repo1/.gitignore", "*.env\n") + create_file(env, "repo1/a.env") + + create_git_repo(env, "repo2") + create_file(env, "repo2/.gitignore", "*.key\n") + create_file(env, "repo2/secret.key") + + assert_output(t, env, nil, {}, { + "repo1/a.env", "repo2/secret.key", + }) +} + +@(test) +test_nested_repos :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "parent") + create_file(env, "parent/.gitignore", "*.env\n") + create_file(env, "parent/top.env") + + create_git_repo(env, "parent/child") + create_file(env, "parent/child/.gitignore", "*.key\n") + create_file(env, "parent/child/api.key") + + assert_output(t, env, nil, {}, { + "parent/top.env", "parent/child/api.key", + }) +} + +@(test) +test_nested_gitignore_read :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n") + create_dir(env, "repo/sub") + create_file(env, "repo/sub/.gitignore", "*.txt\n") + create_file(env, "repo/sub/secret.txt") + create_file(env, "repo/sub/.env") + + assert_output(t, env, nil, {}, { + "repo/sub/secret.txt", "repo/sub/.env", + }) +} + +@(test) +test_nested_gitignore_negation :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.log\n") + create_dir(env, "repo/sub") + create_file(env, "repo/sub/.gitignore", "!important.log\n") + create_file(env, "repo/sub/important.log") + create_file(env, "repo/sub/debug.log") + + assert_output(t, env, nil, {}, { + "repo/sub/debug.log", + }) +} + +@(test) +test_multisegment_pattern :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "build/output.txt\n") + create_dir(env, "repo/build") + create_file(env, "repo/build/output.txt") + create_file(env, "repo/build/other.txt") + create_file(env, "repo/output.txt") + + assert_output(t, env, nil, {}, { + "repo/build/output.txt", + }) +} + +@(test) +test_no_gitignore_file :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.env") + + assert_output_empty(t, env, nil, {}) +} + +@(test) +test_empty_gitignore :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "\n\n# comment\n\n") + create_file(env, "repo/.env") + + assert_output_empty(t, env, nil, {}) +} + +@(test) +test_multiple_search_dirs :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "dir1/repo") + create_file(env, "dir1/repo/.gitignore", "*.env\n") + create_file(env, "dir1/repo/a.env") + create_file(env, "dir1/repo/normal.txt") + + create_git_repo(env, "dir2/repo") + create_file(env, "dir2/repo/.gitignore", "*.env\n") + create_file(env, "dir2/repo/b.env") + + dir1 := join_path(env.temp_dir, "dir1") + defer delete(dir1) + dir2 := join_path(env.temp_dir, "dir2") + defer delete(dir2) + + results := make([dynamic]string) + defer { + for r in results {delete(r)} + delete(results) + } + + opts := WalkOptions{} + thread_count := os.get_processor_core_count() + walk({dir1, dir2}, &results, opts, thread_count) + + testing.expect_value(t, len(results), 2) + + actual := make([dynamic]string, 0, len(results)) + for r in results { + stripped := r + if strings.has_prefix(stripped, env.temp_dir) { + stripped = stripped[len(env.temp_dir):] + if len(stripped) > 0 && stripped[0] == '/' { + stripped = stripped[1:] + } + } + append(&actual, stripped) + } + defer delete(actual) + + expected := []string{"dir1/repo/a.env", "dir2/repo/b.env"} + + sort.quick_sort(actual[:]) + sort.quick_sort(expected[:]) + + for i in 0 ..< len(expected) { + testing.expect_value(t, actual[i], expected[i]) + } +} + +// ============================================================================ +// Ignored directory recursion tests +// ============================================================================ + +@(test) +test_ignored_dir_descended :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "secrets/\n") + create_dir(env, "repo/secrets") + create_file(env, "repo/secrets/.env") + create_file(env, "repo/secrets/api.key") + + // Ignored dir's contents are emitted AND descended into + assert_output(t, env, nil, {}, { + "repo/secrets/", "repo/secrets/.env", "repo/secrets/api.key", + }) +} + +@(test) +test_nested_ignored_dir :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "build/\n") + create_dir(env, "repo/build") + create_dir(env, "repo/build/sub") + create_file(env, "repo/build/output.txt") + create_file(env, "repo/build/sub/deep.env") + + assert_output(t, env, nil, {}, { + "repo/build/", "repo/build/output.txt", + "repo/build/sub/", "repo/build/sub/deep.env", + }) +} + +// ============================================================================ +// Filter tests (excludes, pattern) +// ============================================================================ + +@(test) +test_excludes_prune_dirs :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n") + create_file(env, "repo/.env") + create_dir(env, "repo/vendor") + create_file(env, "repo/vendor/lib.env") + + assert_output(t, env, nil, + {excludes = {"vendor"}}, + {"repo/.env"}, + ) +} + +@(test) +test_pattern_filters_results :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n*.key\n") + create_file(env, "repo/.env") + create_file(env, "repo/secrets.env") + create_file(env, "repo/master.key") + + assert_output(t, env, nil, + {pattern = "\\.env$"}, + {"repo/.env", "repo/secrets.env"}, + ) +} + +// ============================================================================ +// Special file type tests +// ============================================================================ + +@(test) +test_fifo_emitted :: proc(t: ^testing.T) { + env := create_test_env() + defer destroy_test_env(&env) + + create_git_repo(env, "repo") + create_file(env, "repo/.gitignore", "*.env\n*.fifo\n") + + fifo_path := join_path(env.temp_dir, "repo/test.fifo") + defer delete(fifo_path) + cpath := strings.clone_to_cstring(fifo_path) + defer delete(cpath) + linux.mknod(cpath, linux.S_IFIFO | linux.Mode{.IRUSR, .IWUSR}, 0) + + assert_output(t, env, nil, + {pattern = "\\.fifo$"}, + {"repo/test.fifo"}, + ) +} diff --git a/findr/gitignore.odin b/findr/gitignore.odin new file mode 100644 index 0000000..680b9c0 --- /dev/null +++ b/findr/gitignore.odin @@ -0,0 +1,88 @@ +package findr + +import "core:strings" + +Gitignore :: struct { + rules: [dynamic]Rule, +} + +Rule :: struct { + pattern: GlobPattern, + negated: bool, + dir_only: bool, +} + +Match :: enum { + None, + Ignored, + Unignored, +} + +is_ignored :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> bool { + return check_match(gi, path, is_dir) == .Ignored +} + +check_match :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> Match { + result := Match.None + for &rule in gi.rules { + if rule.dir_only && !is_dir do continue + if glob_match_compiled(&rule.pattern, path) { + result = rule.negated ? .Unignored : .Ignored + } + } + return result +} + +parse :: proc(content: string) -> Gitignore { + gi: Gitignore + gi.rules = make([dynamic]Rule) + + remaining := content + for { + line, ok := strings.split_lines_iterator(&remaining) + if !ok do break + + s := strings.trim_space(line) + if len(s) == 0 do continue + if s[0] == '#' do continue + + negated := false + if s[0] == '!' { + negated = true + s = s[1:] + } + + if len(s) > 0 && s[0] == '\\' { + if len(s) > 1 && (s[1] == '#' || s[1] == '!') { + s = s[1:] + } + } + + dir_only := false + if len(s) > 0 && s[len(s) - 1] == '/' { + dir_only = true + s = s[:len(s) - 1] + } + + anchored := false + if len(s) > 0 && s[0] == '/' { + anchored = true + s = s[1:] + } + + if len(s) == 0 do continue + + gp := glob_compile(s, anchored) + append(&gi.rules, Rule{pattern = gp, negated = negated, dir_only = dir_only}) + } + + return gi +} + +destroy :: proc(gi: ^Gitignore) { + for &rule in gi.rules { + glob_destroy(&rule.pattern) + } + delete(gi.rules) +} + diff --git a/findr/gitignore_test.odin b/findr/gitignore_test.odin new file mode 100644 index 0000000..32ca939 --- /dev/null +++ b/findr/gitignore_test.odin @@ -0,0 +1,118 @@ +package findr + +import "core:testing" + +@(test) +test_is_ignored_basic :: proc(t: ^testing.T) { + gi := parse("*.env\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), true) + testing.expect_value(t, is_ignored(&gi, "foo.env", false), true) + testing.expect_value(t, is_ignored(&gi, ".env.local", false), false) + testing.expect_value(t, is_ignored(&gi, "config.yaml", false), false) +} + +@(test) +test_is_ignored_negation :: proc(t: ^testing.T) { + gi := parse("*.env\n!.env.production\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), true) + testing.expect_value(t, is_ignored(&gi, ".env.production", false), false) +} + +@(test) +test_is_ignored_dir_only :: proc(t: ^testing.T) { + gi := parse("node_modules/\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "node_modules", true), true) + testing.expect_value(t, is_ignored(&gi, "node_modules", false), false) +} + +@(test) +test_is_ignored_anchored :: proc(t: ^testing.T) { + gi := parse("/secret.key\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "secret.key", false), true) +} + +@(test) +test_is_ignored_comments_skipped :: proc(t: ^testing.T) { + gi := parse("# this is a comment\n#another\n*.tmp\n") + defer destroy(&gi) + + testing.expect_value(t, len(gi.rules), 1) + testing.expect_value(t, is_ignored(&gi, "file.tmp", false), true) +} + +@(test) +test_is_ignored_blank_lines_skipped :: proc(t: ^testing.T) { + gi := parse("\n\n \n*.log\n\n") + defer destroy(&gi) + + testing.expect_value(t, len(gi.rules), 1) +} + +@(test) +test_is_ignored_last_match_wins :: proc(t: ^testing.T) { + gi := parse("*.env\n!*.env\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), false) +} + +@(test) +test_is_ignored_no_rules :: proc(t: ^testing.T) { + gi := parse("") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "anything", false), false) +} + +@(test) +test_is_ignored_env_pattern :: proc(t: ^testing.T) { + gi := parse(".env*\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, ".env", false), true) + testing.expect_value(t, is_ignored(&gi, ".env.local", false), true) + testing.expect_value(t, is_ignored(&gi, ".envrc", false), true) +} + +@(test) +test_is_ignored_globstar :: proc(t: ^testing.T) { + gi := parse("**/cache\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "cache", false), true) + testing.expect_value(t, is_ignored(&gi, "foo/cache", false), true) + testing.expect_value(t, is_ignored(&gi, "foo/bar/cache", false), true) +} + +@(test) +test_star_negation_subpath :: proc(t: ^testing.T) { + gi := parse("*\n!public/\n") + defer destroy(&gi) + + // public dir itself is un-ignored + testing.expect_value(t, is_ignored(&gi, "public", true), false) + // children of public/ should still be ignored by * + testing.expect_value(t, is_ignored(&gi, "public/uuid-dir", true), true) + testing.expect_value(t, is_ignored(&gi, "public/uuid-dir/file.txt", false), true) +} + +@(test) +test_is_ignored_hash_pattern :: proc(t: ^testing.T) { + gi := parse("\\#*\\#\n") + defer destroy(&gi) + + testing.expect_value(t, is_ignored(&gi, "#foo#", false), true) + testing.expect_value(t, is_ignored(&gi, "#test#", false), true) + testing.expect_value(t, is_ignored(&gi, "AUTHORS", false), false) + testing.expect_value(t, is_ignored(&gi, "build.zig", false), false) + testing.expect_value(t, is_ignored(&gi, "ChangeLog", false), false) +} + diff --git a/findr/glob.odin b/findr/glob.odin new file mode 100644 index 0000000..52dcc2f --- /dev/null +++ b/findr/glob.odin @@ -0,0 +1,203 @@ +package findr + +Range :: struct { + lo: u8, + hi: u8, +} + +Class_Data :: struct { + negated: bool, + ranges: [dynamic]Range, +} + +Token_Kind :: enum u8 { Char, Star, Globstar, Question, Class } + +Token :: struct { + kind: Token_Kind, + byte: u8, + class_idx: u16, +} + +GlobPattern :: struct { + tokens: [dynamic]Token, + classes: [dynamic]Class_Data, + anchored: bool, +} + +glob_compile :: proc(pattern: string, anchored: bool) -> GlobPattern { + gp: GlobPattern + gp.tokens = make([dynamic]Token) + gp.classes = make([dynamic]Class_Data) + gp.anchored = anchored + + i := 0 + for i < len(pattern) { + c := pattern[i] + + if c == '*' { + if i + 1 < len(pattern) && pattern[i + 1] == '*' { + prev_slash := i == 0 || pattern[i - 1] == '/' + at_end := i + 2 >= len(pattern) + next_slash := !at_end && pattern[i + 2] == '/' + + if prev_slash && (next_slash || at_end) { + append(&gp.tokens, Token{kind = .Globstar}) + if next_slash { + i += 3 + } else { + i += 2 + } + } else { + append(&gp.tokens, Token{kind = .Star}) + i += 2 + } + } else { + append(&gp.tokens, Token{kind = .Star}) + i += 1 + } + } else if c == '?' { + append(&gp.tokens, Token{kind = .Question}) + i += 1 + } else if c == '[' { + i += 1 + negated := false + if i < len(pattern) && pattern[i] == '!' { + negated = true + i += 1 + } + + ranges := make([dynamic]Range) + + if i < len(pattern) && pattern[i] == ']' { + append(&ranges, Range{lo = ']', hi = ']'}) + i += 1 + } + + for i < len(pattern) && pattern[i] != ']' { + if i + 2 < len(pattern) && pattern[i + 1] == '-' && pattern[i + 2] != ']' { + append(&ranges, Range{lo = pattern[i], hi = pattern[i + 2]}) + i += 3 + } else { + append(&ranges, Range{lo = pattern[i], hi = pattern[i]}) + i += 1 + } + } + + if i < len(pattern) { + i += 1 + } + + class_idx := u16(len(gp.classes)) + append(&gp.classes, Class_Data{negated = negated, ranges = ranges}) + append(&gp.tokens, Token{kind = .Class, class_idx = class_idx}) + } else if c == '\\' { + i += 1 + if i < len(pattern) { + append(&gp.tokens, Token{kind = .Char, byte = pattern[i]}) + i += 1 + } + } else { + append(&gp.tokens, Token{kind = .Char, byte = c}) + i += 1 + } + } + + return gp +} + +match_tokens :: proc(tokens: []Token, classes: []Class_Data, ti: int, path: string, pi: int) -> bool { + if ti >= len(tokens) { + return pi == len(path) + } + + tok := tokens[ti] + switch tok.kind { + case .Char: + if pi < len(path) && path[pi] == tok.byte { + return match_tokens(tokens, classes, ti + 1, path, pi + 1) + } + return false + + case .Question: + if pi < len(path) && path[pi] != '/' { + return match_tokens(tokens, classes, ti + 1, path, pi + 1) + } + return false + + case .Star: + max_end := pi + for max_end < len(path) && path[max_end] != '/' { + max_end += 1 + } + for end := max_end; end >= pi; end -= 1 { + if match_tokens(tokens, classes, ti + 1, path, end) { + return true + } + } + return false + + case .Globstar: + if ti + 1 >= len(tokens) { + return true + } + if match_tokens(tokens, classes, ti + 1, path, pi) { + return true + } + for end := pi + 1; end <= len(path); end += 1 { + if path[end - 1] == '/' { + if match_tokens(tokens, classes, ti + 1, path, end) { + return true + } + } + } + return false + + case .Class: + if pi >= len(path) { + return false + } + cd := classes[tok.class_idx] + ch := path[pi] + in_range := false + for r in cd.ranges { + if ch >= r.lo && ch <= r.hi { + in_range = true + break + } + } + if in_range != cd.negated { + return match_tokens(tokens, classes, ti + 1, path, pi + 1) + } + return false + } + return false +} + +glob_match_compiled :: proc(gp: ^GlobPattern, path: string) -> bool { + tokens := gp.tokens[:] + classes := gp.classes[:] + + if gp.anchored { + return match_tokens(tokens, classes, 0, path, 0) + } + + if match_tokens(tokens, classes, 0, path, 0) { + return true + } + for i := 1; i < len(path); i += 1 { + if path[i - 1] == '/' { + if match_tokens(tokens, classes, 0, path, i) { + return true + } + } + } + return false +} + +glob_destroy :: proc(gp: ^GlobPattern) { + for &cd in gp.classes { + delete(cd.ranges) + } + delete(gp.classes) + delete(gp.tokens) +} diff --git a/findr/glob_test.odin b/findr/glob_test.odin new file mode 100644 index 0000000..0fb40e8 --- /dev/null +++ b/findr/glob_test.odin @@ -0,0 +1,111 @@ +package findr + +import "core:testing" + +glob_match :: proc(pattern: string, path: string, anchored: bool) -> bool { + gp := glob_compile(pattern, anchored) + result := glob_match_compiled(&gp, path) + glob_destroy(&gp) + return result +} + +@(test) +test_glob_simple :: proc(t: ^testing.T) { + testing.expect(t, glob_match("foo", "foo", false)) + testing.expect(t, glob_match("foo", "bar/foo", false)) + testing.expect(t, !glob_match("foo", "foobar", false)) + testing.expect(t, !glob_match("foo", "foo/bar", false)) +} + +@(test) +test_glob_anchored :: proc(t: ^testing.T) { + testing.expect(t, glob_match("foo", "foo", true)) + testing.expect(t, !glob_match("foo", "bar/foo", true)) + testing.expect(t, !glob_match("foo", "foobar", true)) +} + +@(test) +test_glob_star :: proc(t: ^testing.T) { + testing.expect(t, glob_match("*.log", "test.log", false)) + testing.expect(t, glob_match("*.log", ".log", false)) + testing.expect(t, !glob_match("*.log", "test.txt", false)) + testing.expect(t, !glob_match("*.log", "dir/test", false)) +} + +@(test) +test_glob_question :: proc(t: ^testing.T) { + testing.expect(t, glob_match("?.log", "a.log", false)) + testing.expect(t, !glob_match("?.log", "ab.log", false)) + testing.expect(t, !glob_match("?.log", ".log", false)) +} + +@(test) +test_glob_char_class :: proc(t: ^testing.T) { + testing.expect(t, glob_match("[abc].log", "a.log", false)) + testing.expect(t, glob_match("[abc].log", "b.log", false)) + testing.expect(t, !glob_match("[abc].log", "d.log", false)) +} + +@(test) +test_glob_negated_class :: proc(t: ^testing.T) { + testing.expect(t, glob_match("[!abc].log", "d.log", false)) + testing.expect(t, !glob_match("[!abc].log", "a.log", false)) +} + +@(test) +test_glob_dot_literal :: proc(t: ^testing.T) { + testing.expect(t, glob_match(".env", ".env", false)) + testing.expect(t, glob_match(".env", "dir/.env", false)) + testing.expect(t, !glob_match(".env", "env", false)) + testing.expect(t, !glob_match(".env", "x.env", false)) +} + +@(test) +test_glob_globstar_prefix :: proc(t: ^testing.T) { + testing.expect(t, glob_match("**/foo", "foo", false)) + testing.expect(t, glob_match("**/foo", "a/b/foo", false)) + testing.expect(t, !glob_match("**/foo", "foobar", false)) + testing.expect(t, !glob_match("**/foo", "a/foobar", false)) +} + +@(test) +test_glob_globstar_suffix :: proc(t: ^testing.T) { + testing.expect(t, glob_match("abc/**", "abc/x", false)) + testing.expect(t, glob_match("abc/**", "abc/x/y", false)) + testing.expect(t, !glob_match("abc/**", "abc", false)) + testing.expect(t, !glob_match("abc/**", "abcd/x", false)) +} + +@(test) +test_glob_globstar_middle :: proc(t: ^testing.T) { + testing.expect(t, glob_match("foo/**/bar", "foo/bar", false)) + testing.expect(t, glob_match("foo/**/bar", "foo/x/bar", false)) + testing.expect(t, !glob_match("foo/**/bar", "foo/barx", false)) + testing.expect(t, !glob_match("foo/**/bar", "foo/x/y/baz", false)) +} + +@(test) +test_glob_backslash_escape :: proc(t: ^testing.T) { + testing.expect(t, glob_match("\\!foo", "!foo", false)) + testing.expect(t, !glob_match("\\!foo", "foo", false)) +} + +@(test) +test_glob_hash_literal :: proc(t: ^testing.T) { + testing.expect(t, glob_match("#foo", "#foo", false)) + testing.expect(t, !glob_match("#foo", "foo", false)) +} + +@(test) +test_glob_hash_pattern :: proc(t: ^testing.T) { + testing.expect(t, glob_match("#*#", "#test#", false)) + testing.expect(t, glob_match("#*#", "##", false)) + testing.expect(t, !glob_match("#*#", "test", false)) + testing.expect(t, !glob_match("#*#", "#test", false)) +} + +@(test) +test_glob_empty :: proc(t: ^testing.T) { + testing.expect(t, glob_match("", "", false)) + testing.expect(t, !glob_match("", "foo", false)) +} diff --git a/findr/repos.odin b/findr/repos.odin new file mode 100644 index 0000000..2cd54e6 --- /dev/null +++ b/findr/repos.odin @@ -0,0 +1,128 @@ +package findr + +import "core:strings" +import "core:sync" +import "core:sys/linux" +import "core:thread" + +RepoPool :: struct { + queue: [dynamic]string, + queue_mutex: sync.Mutex, + queue_sema: sync.Atomic_Sema, + results: ^[dynamic]string, + results_lock: sync.Mutex, + active: i64, + done: sync.One_Shot_Event, + threads: []^thread.Thread, +} + +find_repos :: proc(roots: []string, results: ^[dynamic]string, thread_count: int) { + if len(roots) == 0 do return + + pool := new(RepoPool) + pool.queue = make([dynamic]string) + pool.results = results + pool.active = i64(len(roots)) + pool.threads = make([]^thread.Thread, thread_count) + + for root in roots { + root_clone, _ := strings.clone(root) + append(&pool.queue, root_clone) + sync.atomic_sema_post(&pool.queue_sema) + } + + for i in 0 ..< thread_count { + t := thread.create(repo_worker) + t.data = rawptr(pool) + t.init_context = context + thread.start(t) + pool.threads[i] = t + } + + sync.one_shot_event_wait(&pool.done) + + for _ in 0 ..< thread_count { + sync.atomic_sema_post(&pool.queue_sema) + } + + for t in pool.threads { + thread.destroy(t) + } + delete(pool.threads) + + for path in pool.queue { + delete(path) + } + delete(pool.queue) + + free(pool) +} + +repo_worker :: proc(t: ^thread.Thread) { + pool := cast(^RepoPool)t.data + + for { + sync.atomic_sema_wait(&pool.queue_sema) + + sync.mutex_lock(&pool.queue_mutex) + if len(pool.queue) == 0 { + sync.mutex_unlock(&pool.queue_mutex) + if sync.atomic_load_explicit(&pool.active, .Acquire) == 0 { + sync.one_shot_event_signal(&pool.done) + } + break + } + last := len(pool.queue) - 1 + dir_path := pool.queue[last] + ordered_remove(&pool.queue, last) + sync.mutex_unlock(&pool.queue_mutex) + + process_repo_dir(pool, dir_path) + delete(dir_path) + + old := sync.atomic_sub_explicit(&pool.active, 1, .Release) + if old == 1 { + sync.one_shot_event_signal(&pool.done) + } + } +} + +process_repo_dir :: proc(pool: ^RepoPool, dir_path: string) { + cpath := strings.clone_to_cstring(dir_path) + if cpath == nil do return + defer delete(cpath) + + fd, open_err := linux.open(cpath, {.DIRECTORY, .CLOEXEC}) + if open_err != .NONE do return + defer linux.close(fd) + + if has_git_dir(fd) { + cloned, _ := strings.clone(dir_path) + sync.mutex_lock(&pool.results_lock) + append(pool.results, cloned) + sync.mutex_unlock(&pool.results_lock) + } + + buf: [32 * 1024]u8 + + for { + n, errno := linux.getdents(fd, buf[:]) + if n <= 0 || errno != .NONE do break + + offs := 0 + for d in linux.dirent_iterate_buf(buf[:n], &offs) { + name := linux.dirent_name(d) + if name == "." || name == ".." do continue + if name == ".git" do continue + + if d.type == .DIR { + child_path := join_path(dir_path, name) + sync.atomic_add_explicit(&pool.active, 1, .Relaxed) + sync.mutex_lock(&pool.queue_mutex) + append(&pool.queue, child_path) + sync.mutex_unlock(&pool.queue_mutex) + sync.atomic_sema_post(&pool.queue_sema) + } + } + } +} diff --git a/findr/test_env.odin b/findr/test_env.odin new file mode 100644 index 0000000..221c345 --- /dev/null +++ b/findr/test_env.odin @@ -0,0 +1,152 @@ +package findr + +import "core:fmt" +import "core:log" +import "core:os" +import "core:sort" +import "core:strings" +import "core:testing" + +TestEnv :: struct { + temp_dir: string, +} + +create_test_env :: proc() -> (env: TestEnv) { + tmp, err := os.mkdir_temp("", "findr-test-*", context.allocator) + if err != nil { + log.error("Failed to create temp dir:", err) + panic("Failed to create temp dir") + } + + env.temp_dir = tmp + return +} + +destroy_test_env :: proc(env: ^TestEnv) { + os.remove_all(env.temp_dir) + delete(env.temp_dir) +} + +create_dir :: proc(env: TestEnv, path: string) { + full := join_path(env.temp_dir, path) + defer delete(full) + os.mkdir_all(full, os.Permissions_Default_Directory) +} + +create_file :: proc(env: TestEnv, path: string, content: string = "") { + full := join_path(env.temp_dir, path) + defer delete(full) + + dir_end := strings.last_index(full, "/") + if dir_end >= 0 { + dir_path := full[:dir_end] + os.mkdir_all(dir_path, os.Permissions_Default_Directory) + } + + f, err := os.create(full) + if err != nil { + log.error("Failed to create file:", full, err) + return + } + if len(content) > 0 { + os.write_string(f, content) + } + os.close(f) +} + +create_git_repo :: proc(env: TestEnv, path: string) { + sub := join_path(path, ".git") + defer delete(sub) + create_dir(env, sub) +} + +assert_output :: proc( + t: ^testing.T, + env: TestEnv, + args: []string, + opts: WalkOptions, + expected: []string, +) { + results := collect_results(env, args, opts) + defer { + for r in results {delete(r)} + delete(results) + } + + sorted_expected := make([dynamic]string, 0, len(expected)) + for e in expected {append(&sorted_expected, e)} + defer delete(sorted_expected) + + sorted_actual := make([dynamic]string, 0, len(results)) + for a in results {append(&sorted_actual, a)} + defer delete(sorted_actual) + + sort.quick_sort(sorted_expected[:]) + sort.quick_sort(sorted_actual[:]) + + if len(sorted_expected) != len(sorted_actual) { + testing.fail(t) + log.error( + fmt.tprintf("Expected %d results, got %d", len(sorted_expected), len(sorted_actual)), + ) + log.error("Expected:", sorted_expected[:]) + log.error("Actual: ", sorted_actual[:]) + return + } + + for i in 0 ..< len(sorted_expected) { + if sorted_expected[i] != sorted_actual[i] { + testing.fail(t) + log.error(fmt.tprintf("Mismatch at index %d", i)) + log.error("Expected:", sorted_expected[:]) + log.error("Actual: ", sorted_actual[:]) + return + } + } +} + +assert_output_empty :: proc( + t: ^testing.T, + env: TestEnv, + args: []string, + opts: WalkOptions, +) { + results := collect_results(env, args, opts) + defer { + for r in results {delete(r)} + delete(results) + } + if len(results) > 0 { + testing.fail(t) + log.error(fmt.tprintf("Expected no results, got %d:", len(results))) + for r in results { + log.error(" ", r) + } + } +} + +collect_results :: proc(env: TestEnv, args: []string, opts: WalkOptions) -> [dynamic]string { + results := make([dynamic]string) + + full_args := make([dynamic]string, 0, len(args) + 1, context.temp_allocator) + append(&full_args, env.temp_dir) + for a in args {append(&full_args, a)} + + thread_count := os.get_processor_core_count() + walk(full_args[:], &results, opts, thread_count) + + for i in 0 ..< len(results) { + r := results[i] + if strings.has_prefix(r, env.temp_dir) { + stripped := r[len(env.temp_dir):] + if len(stripped) > 0 && stripped[0] == '/' { + stripped = stripped[1:] + } + new_r, _ := strings.clone(stripped) + delete(r) + results[i] = new_r + } + } + + return results +} diff --git a/findr/walker.odin b/findr/walker.odin new file mode 100644 index 0000000..f32e74f --- /dev/null +++ b/findr/walker.odin @@ -0,0 +1,449 @@ +package findr + +import "core:fmt" +import "core:os" +import "core:strings" +import "core:sync" +import "core:sync/chan" +import "core:sys/linux" +import "core:text/regex" +import "core:thread" + +OUTPUT_BUF_SIZE :: 64 * 1024 + +WalkOptions :: struct { + pattern: string, // regex on basename; "" = match all + excludes: []string, // glob patterns to skip entirely +} + +GIContext :: struct { + gi: ^Gitignore, // nil if this dir had no .gitignore + base_rel: string, // relative path from repo root to this dir + parent: ^GIContext, // parent context (nil if repo root) +} + +WorkItem :: struct { + path: string, // absolute directory path + rel: string, // relative path from repo root ("" = root) + gi_ctx: ^GIContext, // gitignore chain (nil = outside any repo) + in_repo: bool, // true if inside a git repo + in_ignored: bool, // true if inside a gitignored directory +} + +WalkerPool :: struct { + queue: [dynamic]WorkItem, + queue_mutex: sync.Mutex, + queue_sema: sync.Atomic_Sema, + result_chan: chan.Chan([]u8), + active: i64, + done: sync.One_Shot_Event, + threads: []^thread.Thread, + opts: WalkOptions, + pattern_re: regex.Regular_Expression, + has_pattern: bool, + exclude_gi: ^Gitignore, + all_contexts: [dynamic]^GIContext, + contexts_lock: sync.Mutex, +} + +Collector_Data :: struct { + ch: chan.Chan([]u8), + results: ^[dynamic]string, +} + +collect_worker :: proc(t: ^thread.Thread) { + data := cast(^Collector_Data)t.data + for { + batch, ok := chan.recv(data.ch) + if !ok do break + start := 0 + for i in 0 ..< len(batch) { + if batch[i] == '\n' { + if i > start { + s, _ := strings.clone(string(batch[start:i])) + append(data.results, s) + } + start = i + 1 + } + } + delete(batch) + } +} + +walk :: proc(roots: []string, results: ^[dynamic]string, opts: WalkOptions, thread_count: int) { + if len(roots) == 0 do return + + ch, _ := chan.create(chan.Chan([]u8), max(2 * thread_count, 2), context.allocator) + defer chan.destroy(ch) + + data := new(Collector_Data) + data.ch = ch + data.results = results + defer free(data) + + collector := thread.create(collect_worker) + collector.data = rawptr(data) + collector.init_context = context + thread.start(collector) + + pool := new(WalkerPool) + pool.queue = make([dynamic]WorkItem) + pool.result_chan = ch + pool.active = i64(len(roots)) + pool.threads = make([]^thread.Thread, thread_count) + pool.all_contexts = make([dynamic]^GIContext) + pool.opts = opts + pool.exclude_gi = nil + pool.has_pattern = false + + if len(opts.pattern) > 0 { + re, err := regex.create(opts.pattern, {regex.Flag.No_Capture}) + if err == nil { + pool.pattern_re = re + pool.has_pattern = true + } + } + + if len(opts.excludes) > 0 { + sb: strings.Builder + strings.builder_init(&sb) + for ex in opts.excludes { + fmt.sbprintf(&sb, "%s\n", ex) + } + content := strings.to_string(sb) + pool.exclude_gi = new(Gitignore) + pool.exclude_gi^ = parse(content) + strings.builder_destroy(&sb) + } + + for root in roots { + root_clone, _ := strings.clone(root) + append(&pool.queue, WorkItem{path = root_clone}) + sync.atomic_sema_post(&pool.queue_sema) + } + + for i in 0 ..< thread_count { + t := thread.create(walk_worker) + t.data = rawptr(pool) + t.init_context = context + thread.start(t) + pool.threads[i] = t + } + + sync.one_shot_event_wait(&pool.done) + + for _ in 0 ..< thread_count { + sync.atomic_sema_post(&pool.queue_sema) + } + + for t in pool.threads { + thread.destroy(t) + } + delete(pool.threads) + for item in pool.queue { + delete(item.path) + if len(item.rel) > 0 {delete(item.rel)} + } + delete(pool.queue) + + for ctx in pool.all_contexts { + if ctx.gi != nil { + destroy(ctx.gi) + free(ctx.gi) + } + if len(ctx.base_rel) > 0 { + delete(ctx.base_rel) + } + free(ctx) + } + delete(pool.all_contexts) + + if pool.has_pattern { + regex.destroy(pool.pattern_re) + } + if pool.exclude_gi != nil { + destroy(pool.exclude_gi) + free(pool.exclude_gi) + } + + free(pool) + + chan.close(ch) + thread.join(collector) + thread.destroy(collector) +} + +flush_buf :: proc(ch: chan.Chan([]u8), local: ^[dynamic]u8) { + if len(local) == 0 do return + batch := local[:] + local^ = make([dynamic]u8, 0, OUTPUT_BUF_SIZE) + chan.send(ch, batch) +} + +append_path :: proc(buf: ^[dynamic]u8, parent, name: string, trailing_slash: bool) { + need_sep := len(parent) > 0 && parent[len(parent) - 1] != '/' + size := len(parent) + len(name) + 1 + if need_sep do size += 1 + if trailing_slash do size += 1 + + old_len := len(buf) + reserve(buf, old_len + size) + resize(buf, old_len + size) + + pos := old_len + pos += copy(buf[pos:], parent) + if need_sep {buf[pos] = '/'; pos += 1} + pos += copy(buf[pos:], name) + if trailing_slash {buf[pos] = '/'; pos += 1} + buf[pos] = '\n' +} + +walk_worker :: proc(t: ^thread.Thread) { + pool := cast(^WalkerPool)t.data + + local_buf := make([dynamic]u8, 0, OUTPUT_BUF_SIZE) + defer { + if len(local_buf) > 0 { + flush_buf(pool.result_chan, &local_buf) + } + delete(local_buf) + } + + for { + sync.atomic_sema_wait(&pool.queue_sema) + + sync.mutex_lock(&pool.queue_mutex) + if len(pool.queue) == 0 { + sync.mutex_unlock(&pool.queue_mutex) + if sync.atomic_load_explicit(&pool.active, .Acquire) == 0 { + sync.one_shot_event_signal(&pool.done) + } + break + } + last := len(pool.queue) - 1 + item := pool.queue[last] + ordered_remove(&pool.queue, last) + sync.mutex_unlock(&pool.queue_mutex) + + process_dir(pool, item, &local_buf) + delete(item.path) + if len(item.rel) > 0 {delete(item.rel)} + + if len(local_buf) >= OUTPUT_BUF_SIZE { + flush_buf(pool.result_chan, &local_buf) + } + + old := sync.atomic_sub_explicit(&pool.active, 1, .Release) + if old == 1 { + sync.one_shot_event_signal(&pool.done) + } + } +} + +process_dir :: proc(pool: ^WalkerPool, item: WorkItem, local_buf: ^[dynamic]u8) { + dir_path := item.path + + cpath := strings.clone_to_cstring(dir_path) + if cpath == nil do return + defer delete(cpath) + + fd, open_err := linux.open(cpath, {.DIRECTORY, .CLOEXEC}) + if open_err != .NONE do return + defer linux.close(fd) + + has_git := false + if !item.in_ignored { + has_git = has_git_dir(fd) + } + + gi_ctx := item.gi_ctx + rel := item.rel + + if has_git { + gi_ctx = nil + rel = "" + } + + child_in_repo := has_git || item.in_repo + + gi: ^Gitignore = nil + if !item.in_ignored { + gi = load_ignore_patterns(dir_path, child_in_repo) + } + if gi != nil { + new_ctx := new(GIContext) + new_ctx.gi = gi + if len(rel) > 0 { + new_ctx.base_rel, _ = strings.clone(rel) + } + new_ctx.parent = gi_ctx + + sync.mutex_lock(&pool.contexts_lock) + append(&pool.all_contexts, new_ctx) + sync.mutex_unlock(&pool.contexts_lock) + + gi_ctx = new_ctx + } + + buf: [32 * 1024]u8 + rel_buf: [4096]u8 + + for { + n, errno := linux.getdents(fd, buf[:]) + if n <= 0 || errno != .NONE do break + + offs := 0 + for d in linux.dirent_iterate_buf(buf[:n], &offs) { + name := linux.dirent_name(d) + if name == "." || name == ".." do continue + if name == ".git" do continue + + is_dir := d.type == .DIR + is_nondir := d.type != .DIR + + if pool.exclude_gi != nil && is_ignored(pool.exclude_gi, name, is_dir) { + continue + } + + entry_rel := build_rel(rel_buf[:], rel, name) + + ignored := false + if item.in_ignored { + ignored = true + } else if gi_ctx != nil { + ignored = check_chain(gi_ctx, entry_rel, is_dir) + } + + if is_dir { + if ignored && matches_pattern(pool, name) { + append_path(local_buf, dir_path, name, true) + } + child_rel, _ := strings.clone(entry_rel) + child_path := join_path(dir_path, name) + push_work( + pool, + WorkItem { + path = child_path, + rel = child_rel, + gi_ctx = gi_ctx, + in_repo = child_in_repo, + in_ignored = ignored, + }, + ) + } else if is_nondir { + if ignored && matches_pattern(pool, name) { + append_path(local_buf, dir_path, name, false) + } + } + } + } +} + +check_chain :: proc(ctx: ^GIContext, entry_rel: string, is_dir: bool) -> bool { + c := ctx + for c != nil { + if c.gi != nil { + rel := relative_to(entry_rel, c.base_rel) + match := check_match(c.gi, rel, is_dir) + if match != .None { + return match == .Ignored + } + } + c = c.parent + } + return false +} + +relative_to :: proc(entry_rel, base_rel: string) -> string { + if len(base_rel) == 0 do return entry_rel + prefix_len := len(base_rel) + if len(entry_rel) > prefix_len && + entry_rel[prefix_len] == '/' && + strings.has_prefix(entry_rel, base_rel) { + return entry_rel[prefix_len + 1:] + } + return entry_rel +} + +build_rel :: proc(buf: []u8, rel, name: string) -> string { + if len(rel) == 0 do return name + pos := copy(buf, rel) + if pos < len(buf) { + buf[pos] = '/' + pos += 1 + pos += copy(buf[pos:], name) + } + return string(buf[:pos]) +} + +matches_pattern :: proc(pool: ^WalkerPool, name: string) -> bool { + if !pool.has_pattern do return true + cap, ok := regex.match(pool.pattern_re, name) + regex.destroy(cap) + return ok +} + +push_work :: proc(pool: ^WalkerPool, item: WorkItem) { + sync.atomic_add_explicit(&pool.active, 1, .Relaxed) + sync.mutex_lock(&pool.queue_mutex) + append(&pool.queue, item) + sync.mutex_unlock(&pool.queue_mutex) + sync.atomic_sema_post(&pool.queue_sema) +} + +has_git_dir :: proc(fd: linux.Fd) -> bool { + git_fd, err := linux.openat(fd, ".git", {.DIRECTORY, .CLOEXEC}) + if err == .NONE { + linux.close(git_fd) + return true + } + return false +} + +load_ignore_patterns :: proc(dir_path: string, in_repo: bool) -> ^Gitignore { + has_patterns := false + sb: strings.Builder + strings.builder_init(&sb) + defer strings.builder_destroy(&sb) + + if in_repo { + gi_path := join_path(dir_path, ".gitignore") + data, err := os.read_entire_file_from_path(gi_path, context.allocator) + delete(gi_path) + if err == .NONE { + fmt.sbprintf(&sb, "%s", string(data)) + delete(data) + has_patterns = true + } + } + + ig_path := join_path(dir_path, ".ignore") + idata, ierr := os.read_entire_file_from_path(ig_path, context.allocator) + delete(ig_path) + if ierr == .NONE { + fmt.sbprintf(&sb, "%s", string(idata)) + delete(idata) + has_patterns = true + } + + if !has_patterns do return nil + + content := strings.to_string(sb) + gi := new(Gitignore) + gi^ = parse(content) + return gi +} + +join_path :: proc(parent, child: string) -> string { + need_sep := len(parent) == 0 || parent[len(parent) - 1] != '/' + total := len(parent) + len(child) + if need_sep do total += 1 + buf := make([]u8, total, context.allocator) + pos := copy(buf, parent) + if need_sep { + buf[pos] = '/' + pos += 1 + } + copy(buf[pos:], child) + return string(buf) +} diff --git a/flake.nix b/flake.nix index c3e14a2..8a7ac75 100644 --- a/flake.nix +++ b/flake.nix @@ -95,7 +95,6 @@ devShells.default = pkgs.mkShell { buildInputs = with pkgs; [ - fd nushell libsodium @@ -106,6 +105,10 @@ # Build tools zip + # Helper tools + delta + hyperfine + # IDE unstable.helix typescript-language-server diff --git a/scan.odin b/scan.odin index a220949..fba31dd 100644 --- a/scan.odin +++ b/scan.odin @@ -1,137 +1,21 @@ package main -import "core:fmt" import "core:os" -import "core:strings" -import "core:sync" -import "core:terminal" -fd_counter: sync.Atomic_Mutex -fd_seq: int +import "findr" // Caller is responsible for freeing paths scan_path :: proc(search_path: string, cfg: Config) -> (paths: [dynamic]string, ok: bool) { - if terminal.is_terminal(os.stdout) { - fmt.printf("Searching for all files in \"%s\"...\n", search_path) + opts := findr.WalkOptions { + pattern = cfg.ScanConfig.Matcher, + excludes = cfg.ScanConfig.Exclude[:], } - all_files, all_ok := run_fd(build_fd_args(search_path, cfg, true)) - if !all_ok { - return - } - - if terminal.is_terminal(os.stdout) { - fmt.printf("Search for unignored fies in \"%s\"...\n", search_path) - } - unignored_files, unignored_ok := run_fd(build_fd_args(search_path, cfg, false)) - if !unignored_ok { - return - } - - unignored_set := make(map[string]bool, len(unignored_files), context.temp_allocator) - for file in unignored_files { - unignored_set[file] = true - } - - for file in all_files { - if !(file in unignored_set) { - append(&paths, file) - } - } - + findr.walk({search_path}, &paths, opts, os.get_processor_core_count()) ok = true return } -@(private = "file") -build_fd_args :: proc(search_path: string, cfg: Config, include_ignored: bool) -> []string { - args_len := 3 + 2 * len(cfg.ScanConfig.Exclude) + 2 - args := make([dynamic]string, 0, args_len, context.temp_allocator) - append(&args, "fd") - append(&args, "-a") - append(&args, cfg.ScanConfig.Matcher) - - for exclude in cfg.ScanConfig.Exclude { - append(&args, "-E") - append(&args, exclude) - } - - if include_ignored { - append(&args, "-HI") - } else { - append(&args, "-H") - } - - append(&args, search_path) - return args[:] -} - -run_fd :: proc(args: []string) -> (lines: []string, ok: bool) { - tmp_path := next_fd_tmp_path() - tmp_file, tmp_err := os.open(tmp_path, os.O_CREATE | os.O_WRONLY | os.O_TRUNC) - if tmp_err != nil { - // TODO: Log a message here - return - } - - desc := os.Process_Desc { - command = args, - stdout = tmp_file, - stderr = nil, - } - - p, start_err := os.process_start(desc) - os.close(tmp_file) - if start_err != nil { - os.remove(tmp_path) - return - } - - state, wait_err := os.process_wait(p) - if wait_err != nil || state.exit_code != 0 { - os.remove(tmp_path) - return - } - - data, read_err := os.read_entire_file_from_path(tmp_path, context.temp_allocator) - os.remove(tmp_path) - if read_err != nil { - return - } - - output := string(data) - output = strings.trim_space(output) - if len(output) == 0 { - ok = true - return - } - - raw_lines := strings.split(output, "\n", context.temp_allocator) - result := make([dynamic]string, 0, len(raw_lines), context.temp_allocator) - for line in raw_lines { - trimmed := strings.trim_space(line) - if len(trimmed) > 0 { - append(&result, trimmed) - } - } - - return result[:], true -} - -@(private = "file") -next_fd_tmp_path :: proc() -> string { - sync.atomic_mutex_lock(&fd_counter) - n := fd_seq - fd_seq += 1 - sync.atomic_mutex_unlock(&fd_counter) - return fmt.tprintf("/tmp/envr-fd-%d-%d", os.get_pid(), n) -} - -cant_scan :: proc(feats: AvailableFeatures) -> bool { - return Feature.Fd not_in feats -} - find_unbacked :: proc(local_files: []string, db_files: []EnvFile) -> []string { - // Lives until the end of the function backed_set := make(map[string]bool, len(db_files), context.temp_allocator) for file in db_files { backed_set[file.Path] = true @@ -145,4 +29,3 @@ find_unbacked :: proc(local_files: []string, db_files: []EnvFile) -> []string { } return unbacked[:] } - diff --git a/scan_test.odin b/scan_test.odin index f34db6c..35124c9 100644 --- a/scan_test.odin +++ b/scan_test.odin @@ -3,14 +3,10 @@ package main import "core:fmt" import "core:os" import "core:path/filepath" -import "core:strings" import "core:testing" @(test) test_scan_path_finds_gitignored_env_files :: proc(t: ^testing.T) { - feats := check_features() - testing.expect(t, cant_scan(feats) == false) - base := fmt.tprintf("/tmp/envr-scan-test-%d", os.get_pid()) os.mkdir_all(base) defer os.remove_all(base) @@ -42,7 +38,12 @@ test_scan_path_finds_gitignored_env_files :: proc(t: ^testing.T) { } results, ok := scan_path(base, cfg) - defer delete(results) + defer { + for path in results { + delete(path) + } + delete(results) + } testing.expect(t, ok, "scan_path should succeed") found_env := false @@ -69,9 +70,6 @@ test_scan_path_finds_gitignored_env_files :: proc(t: ^testing.T) { @(test) test_scan_path_empty_dir :: proc(t: ^testing.T) { - feats := check_features() - testing.expect(t, cant_scan(feats) == false) - base := fmt.tprintf("/tmp/envr-scan-empty-%d", os.get_pid()) os.mkdir_all(base) defer os.remove_all(base) @@ -85,12 +83,3 @@ test_scan_path_empty_dir :: proc(t: ^testing.T) { testing.expect(t, ok, "scan_path should succeed") testing.expect(t, len(results) == 0, fmt.tprintf("expected 0 results, got %d", len(results))) } - -@(test) -test_scan_meets_expectations :: proc(t: ^testing.T) { - testing.expect(t, cant_scan({}), "no features should mean can't scan") - testing.expect(t, cant_scan({.Git}), "Git alone should mean can't scan") - testing.expect(t, !cant_scan({.Fd}), "having Fd should mean can scan") - testing.expect(t, !cant_scan({.Fd, .Git}), "both Fd and Git should mean can scan") -} -