package findr import "core:fmt" import "core:os" import "core:strings" import "core:sync" import "core:sys/linux" import "core:text/regex" import "core:thread" IgnoreMode :: enum { Respected, // skip gitignored, prune ignored dirs (fd -H default) All, // ignore .gitignore entirely, descend everywhere (fd -HI) Ignored, // emit ONLY gitignored files, prune ignored dirs (findr original) } WalkOptions :: struct { pattern: string, // regex on basename; "" = match all excludes: []string, // glob patterns to skip entirely (fd -E) include_hidden: bool, // true = include dotfiles (fd -H) ignore_mode: IgnoreMode, } RawEntry :: struct { name: string, type: linux.Dirent_Type, } GIContext :: struct { gi: ^Gitignore, // nil if this dir had no .gitignore base_rel: string, // relative path from repo root to this dir parent: ^GIContext, // parent context (nil if repo root) } WorkItem :: struct { path: string, // absolute directory path rel: string, // relative path from repo root ("" = root) gi_ctx: ^GIContext, // gitignore chain (nil = outside any repo) in_repo: bool, // true if inside a git repo } WalkerPool :: struct { queue: [dynamic]WorkItem, queue_mutex: sync.Mutex, queue_sema: sync.Atomic_Sema, results: ^[dynamic]string, results_mutex: sync.Mutex, active: i64, done: sync.One_Shot_Event, threads: [dynamic]^thread.Thread, opts: WalkOptions, pattern_re: regex.Regular_Expression, has_pattern: bool, exclude_gi: ^Gitignore, all_contexts: [dynamic]^GIContext, contexts_lock: sync.Mutex, } walk :: proc(roots: []string, results: ^[dynamic]string, opts: WalkOptions, thread_count: int) { if len(roots) == 0 do return pool := new(WalkerPool) pool.queue = make([dynamic]WorkItem) pool.results = results pool.active = i64(len(roots)) pool.threads = make([dynamic]^thread.Thread) pool.all_contexts = make([dynamic]^GIContext) pool.opts = opts pool.exclude_gi = nil pool.has_pattern = false if len(opts.pattern) > 0 { re, err := regex.create(opts.pattern, {regex.Flag.No_Capture}) if err == nil { pool.pattern_re = re pool.has_pattern = true } } if len(opts.excludes) > 0 { sb: strings.Builder strings.builder_init(&sb) for ex in opts.excludes { fmt.sbprintf(&sb, "%s\n", ex) } content := strings.to_string(sb) pool.exclude_gi = new(Gitignore) pool.exclude_gi^ = parse(content) strings.builder_destroy(&sb) } for root in roots { root_clone, _ := strings.clone(root) append(&pool.queue, WorkItem{path = root_clone}) sync.atomic_sema_post(&pool.queue_sema) } for i in 0 ..< thread_count { t := thread.create(walk_worker) t.data = rawptr(pool) t.init_context = context thread.start(t) append(&pool.threads, t) } sync.one_shot_event_wait(&pool.done) for _ in 0 ..< thread_count { sync.atomic_sema_post(&pool.queue_sema) } for t in pool.threads { thread.destroy(t) } delete(pool.threads) for item in pool.queue { delete(item.path) if len(item.rel) > 0 { delete(item.rel) } } delete(pool.queue) for ctx in pool.all_contexts { if ctx.gi != nil { destroy(ctx.gi) free(ctx.gi) } if len(ctx.base_rel) > 0 { delete(ctx.base_rel) } free(ctx) } delete(pool.all_contexts) if pool.has_pattern { regex.destroy(pool.pattern_re) } if pool.exclude_gi != nil { destroy(pool.exclude_gi) free(pool.exclude_gi) } free(pool) } walk_worker :: proc(t: ^thread.Thread) { pool := cast(^WalkerPool)t.data for { sync.atomic_sema_wait(&pool.queue_sema) sync.mutex_lock(&pool.queue_mutex) if len(pool.queue) == 0 { sync.mutex_unlock(&pool.queue_mutex) if sync.atomic_load_explicit(&pool.active, .Acquire) == 0 { sync.one_shot_event_signal(&pool.done) } break } last := len(pool.queue) - 1 item := pool.queue[last] ordered_remove(&pool.queue, last) sync.mutex_unlock(&pool.queue_mutex) process_dir(pool, item) delete(item.path) if len(item.rel) > 0 { delete(item.rel) } old := sync.atomic_sub_explicit(&pool.active, 1, .Release) if old == 1 { sync.one_shot_event_signal(&pool.done) } } } process_dir :: proc(pool: ^WalkerPool, item: WorkItem) { dir_path := item.path has_git := false entries := read_dir_entries(dir_path, &has_git) defer free_entries(&entries) gi_ctx := item.gi_ctx rel := item.rel if has_git { gi_ctx = nil rel = "" } child_in_repo := has_git || item.in_repo gi := load_ignore_patterns(dir_path, child_in_repo) if gi != nil { new_ctx := new(GIContext) new_ctx.gi = gi if len(rel) > 0 { new_ctx.base_rel, _ = strings.clone(rel) } new_ctx.parent = gi_ctx sync.mutex_lock(&pool.contexts_lock) append(&pool.all_contexts, new_ctx) sync.mutex_unlock(&pool.contexts_lock) gi_ctx = new_ctx } rel_buf: [4096]u8 for entry in entries { if entry.name == ".git" do continue is_dir := entry.type == .DIR is_nondir := entry.type != .DIR if pool.exclude_gi != nil && is_ignored(pool.exclude_gi, entry.name, is_dir) { continue } if !pool.opts.include_hidden && len(entry.name) > 0 && entry.name[0] == '.' { continue } entry_rel := build_rel(rel_buf[:], rel, entry.name) ignored := false if gi_ctx != nil && pool.opts.ignore_mode != .All { ignored = check_chain(gi_ctx, entry_rel, is_dir) } should_emit: bool if ignored { should_emit = pool.opts.ignore_mode == .Ignored } else { should_emit = pool.opts.ignore_mode != .Ignored } if is_dir { if should_emit && matches_pattern(pool, entry.name) { dir_path_out := join_path_dir(dir_path, entry.name) sync.mutex_lock(&pool.results_mutex) append(pool.results, dir_path_out) sync.mutex_unlock(&pool.results_mutex) } if !ignored { child_rel, _ := strings.clone(entry_rel) child_path := join_path(dir_path, entry.name) push_work(pool, WorkItem{path = child_path, rel = child_rel, gi_ctx = gi_ctx, in_repo = child_in_repo}) } } else if is_nondir { if should_emit && matches_pattern(pool, entry.name) { full_path := join_path(dir_path, entry.name) sync.mutex_lock(&pool.results_mutex) append(pool.results, full_path) sync.mutex_unlock(&pool.results_mutex) } } } } check_chain :: proc(ctx: ^GIContext, entry_rel: string, is_dir: bool) -> bool { c := ctx for c != nil { if c.gi != nil { rel := relative_to(entry_rel, c.base_rel) match := check_match(c.gi, rel, is_dir) if match != .None { return match == .Ignored } } c = c.parent } return false } relative_to :: proc(entry_rel, base_rel: string) -> string { if len(base_rel) == 0 do return entry_rel prefix_len := len(base_rel) if len(entry_rel) > prefix_len && entry_rel[prefix_len] == '/' && strings.has_prefix(entry_rel, base_rel) { return entry_rel[prefix_len + 1:] } return entry_rel } build_rel :: proc(buf: []u8, rel, name: string) -> string { if len(rel) == 0 do return name pos := copy(buf, rel) if pos < len(buf) { buf[pos] = '/' pos += 1 pos += copy(buf[pos:], name) } return string(buf[:pos]) } matches_pattern :: proc(pool: ^WalkerPool, name: string) -> bool { if !pool.has_pattern do return true cap, ok := regex.match(pool.pattern_re, name) regex.destroy(cap) return ok } push_work :: proc(pool: ^WalkerPool, item: WorkItem) { sync.atomic_add_explicit(&pool.active, 1, .Relaxed) sync.mutex_lock(&pool.queue_mutex) append(&pool.queue, item) sync.mutex_unlock(&pool.queue_mutex) sync.atomic_sema_post(&pool.queue_sema) } read_dir_entries :: proc(dir_path: string, has_git: ^bool) -> [dynamic]RawEntry { entries := make([dynamic]RawEntry) cpath := strings.clone_to_cstring(dir_path) if cpath == nil do return entries fd, err := linux.open(cpath, {.DIRECTORY, .CLOEXEC}) delete(cpath) if err != .NONE do return entries buf: [8192]u8 has_git^ = false for { n, errno := linux.getdents(fd, buf[:]) if n <= 0 || errno != .NONE do break offs := 0 for d in linux.dirent_iterate_buf(buf[:n], &offs) { name := linux.dirent_name(d) if name == "." || name == ".." do continue if name == ".git" && d.type == .DIR { has_git^ = true } cloned := strings.clone(name) append(&entries, RawEntry{name = cloned, type = d.type}) } } linux.close(fd) return entries } free_entries :: proc(entries: ^[dynamic]RawEntry) { for &entry in entries { delete(entry.name) } delete(entries^) } load_ignore_patterns :: proc(dir_path: string, in_repo: bool) -> ^Gitignore { has_patterns := false sb: strings.Builder strings.builder_init(&sb) defer strings.builder_destroy(&sb) if in_repo { gi_path := join_path(dir_path, ".gitignore") data, err := os.read_entire_file_from_path(gi_path, context.allocator) delete(gi_path) if err == .NONE { fmt.sbprintf(&sb, "%s", string(data)) delete(data) has_patterns = true } } ig_path := join_path(dir_path, ".ignore") idata, ierr := os.read_entire_file_from_path(ig_path, context.allocator) delete(ig_path) if ierr == .NONE { fmt.sbprintf(&sb, "%s", string(idata)) delete(idata) has_patterns = true } if !has_patterns do return nil content := strings.to_string(sb) gi := new(Gitignore) gi^ = parse(content) return gi } join_path :: proc(parent, child: string) -> string { b: strings.Builder strings.builder_init(&b) defer strings.builder_destroy(&b) fmt.sbprintf(&b, "%s", parent) if len(parent) == 0 || parent[len(parent) - 1] != '/' { fmt.sbprintf(&b, "/") } fmt.sbprintf(&b, "%s", child) s := strings.to_string(b) result, _ := strings.clone(s) return result } join_path_dir :: proc(parent, child: string) -> string { b: strings.Builder strings.builder_init(&b) defer strings.builder_destroy(&b) fmt.sbprintf(&b, "%s", parent) if len(parent) == 0 || parent[len(parent) - 1] != '/' { fmt.sbprintf(&b, "/") } fmt.sbprintf(&b, "%s/", child) s := strings.to_string(b) result, _ := strings.clone(s) return result }