perf(findr): Replaced regex engine with glob.
This commit is contained in:
@@ -1,27 +1,34 @@
|
|||||||
findr is ~2.3x slower than fd (case 1: 547ms vs 241ms). Opportunities:
|
# Performance Ideas
|
||||||
|
|
||||||
1. Per-thread result buffers (DONE)
|
Current state after regex→glob migration. findr beats fd in 3/4 cases.
|
||||||
Each thread accumulates results locally, then merges once at exit. Eliminates per-result mutex contention.
|
|
||||||
|
|
||||||
2. Batched channel (fd's approach)
|
## Benchmark results (2026-06-17)
|
||||||
Replace global results array + merge with a buffered channel of batches. Each worker fills a local batch (~256 items), sends it to a `chan.Chan([]string)` (capacity = 2 × threads). A receiver thread drains batches and collects/prints. Provides backpressure, streaming output, and per-batch (not global) synchronization. Enables sorting like fd does (buffer first 1000 results or 100ms, then stream).
|
|
||||||
|
|
||||||
3. Path allocation waste (join_path/join_path_dir)
|
| Case | fd | findr | Ratio |
|
||||||
Every path construction spins up a strings.Builder, does fmt.sbprintf, to_string, clone, then builder_destroy — 2 heap allocs + 2 frees per path. Could be a simple memcpy into a stack buffer with a single alloc.
|
|------|------|-------|-------|
|
||||||
|
| 1 `-E .jj` | 172ms | 135ms | **1.27x faster** |
|
||||||
|
| 2 `-H` | 1.184s | 1.097s | **1.08x faster** |
|
||||||
|
| 3 `-HI` | 1.251s | 1.670s | **1.34x slower** |
|
||||||
|
| 4 `-E .git` | 274ms | 202ms | **1.36x faster** |
|
||||||
|
|
||||||
4. Larger getdents buffer
|
Case 3 (`-HI`) skips gitignore entirely, so it's pure I/O + allocation. System time is 2x fd's (12.1s vs 5.5s), pointing to syscall/allocation overhead.
|
||||||
Currently 8KB. Increasing to 64KB+ means fewer syscalls per directory with many entries.
|
|
||||||
|
|
||||||
5. Eliminate entry name cloning
|
## Completed
|
||||||
strings.clone(name) in read_dir_entries heap-allocates per dirent. Names are valid in the getdents buffer during process_dir, so the clone may be unnecessary.
|
|
||||||
|
|
||||||
6. Arena allocator per thread
|
1. **Per-thread result buffers** — each thread accumulates locally, merges once at exit. Eliminates per-result mutex contention.
|
||||||
Replace the default allocator for transient strings with a bump allocator — allocate in bulk, free all at once.
|
2. **Lean path join** — `join_path`/`join_path_dir` use stack buffer + `copy` + single alloc instead of `strings.Builder` + `fmt.sbprintf` + `clone`.
|
||||||
2. Path allocation waste (join_path/join_path_dir)
|
3. **Regex→glob migration** — replaced regex NFA with backtracking glob matcher. Eliminated 27% of CPU spent on `add_thread`/`is_ignored`. Biggest win.
|
||||||
Every path construction spins up a strings.Builder, does fmt.sbprintf, to_string, clone, then builder_destroy — 2 heap allocs + 2 frees per path. Could be a simple memcpy into a stack buffer with a single alloc.
|
|
||||||
3. Larger getdents buffer
|
## Remaining ideas
|
||||||
Currently 8KB. Increasing to 64KB+ means fewer syscalls per directory with many entries.
|
|
||||||
4. Eliminate entry name cloning
|
1. **Larger getdents buffer** (8KB → 64KB+)
|
||||||
strings.clone(name) in read_dir_entries heap-allocates per dirent. Names are valid in the getdents buffer during process_dir, so the clone may be unnecessary.
|
Fewer syscalls per directory with many entries. Low effort.
|
||||||
5. Arena allocator per thread
|
|
||||||
Replace the default allocator for transient strings with a bump allocator — allocate in bulk, free all at once.
|
2. **Eliminate entry name cloning**
|
||||||
|
`strings.clone(name)` in `read_dir_entries` heap-allocates per dirent. Names are valid in the getdents buffer during `process_dir`, so the clone may be unnecessary. Low effort.
|
||||||
|
|
||||||
|
3. **Arena allocator per thread**
|
||||||
|
Bump allocator for all transient strings, free once at exit. Bigger change, helps everywhere.
|
||||||
|
|
||||||
|
4. **Batched channel** (fd's approach)
|
||||||
|
Replace global results array with buffered channel of batches. Enables streaming output and sorting like fd does.
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package findr
|
package findr
|
||||||
|
|
||||||
import "core:bufio"
|
import "core:bufio"
|
||||||
import "core:fmt"
|
|
||||||
import "core:os"
|
import "core:os"
|
||||||
import "core:strings"
|
import "core:strings"
|
||||||
|
|
||||||
@@ -89,3 +88,4 @@ main :: proc() {
|
|||||||
}
|
}
|
||||||
bufio.writer_flush(&w)
|
bufio.writer_flush(&w)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
155
gitignore.odin
155
gitignore.odin
@@ -1,112 +1,36 @@
|
|||||||
package findr
|
package findr
|
||||||
|
|
||||||
import "core:fmt"
|
|
||||||
import "core:strings"
|
import "core:strings"
|
||||||
import "core:text/regex"
|
|
||||||
|
|
||||||
// FIXME: Use a const bit_set[0..<128; u128] here when we start doing optimizations
|
Gitignore :: struct {
|
||||||
is_regex_meta :: proc(c: u8) -> bool {
|
rules: [dynamic]Rule,
|
||||||
switch c {
|
|
||||||
case '.', '+', '(', ')', '{', '}', '^', '$', '|', '#':
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
glob_to_regex :: proc(pattern: string, anchored: bool) -> string {
|
|
||||||
// TODO: Attempt to pre-allocate the string builder when we start doing optimizations
|
|
||||||
sb: strings.Builder
|
|
||||||
strings.builder_init(&sb)
|
|
||||||
defer strings.builder_destroy(&sb)
|
|
||||||
|
|
||||||
if anchored {
|
|
||||||
fmt.sbprintf(&sb, "^")
|
|
||||||
} else {
|
|
||||||
fmt.sbprintf(&sb, "(^|/)")
|
|
||||||
}
|
|
||||||
|
|
||||||
i := 0
|
|
||||||
for i < len(pattern) {
|
|
||||||
c := pattern[i]
|
|
||||||
|
|
||||||
if c == '*' {
|
|
||||||
if i + 1 < len(pattern) && pattern[i + 1] == '*' {
|
|
||||||
prev_slash := i == 0 || pattern[i - 1] == '/'
|
|
||||||
at_end := i + 2 >= len(pattern)
|
|
||||||
next_slash := !at_end && pattern[i + 2] == '/'
|
|
||||||
|
|
||||||
if prev_slash && (next_slash || at_end) {
|
|
||||||
if next_slash {
|
|
||||||
i += 3
|
|
||||||
fmt.sbprintf(&sb, "(.*/)?")
|
|
||||||
} else {
|
|
||||||
i += 2
|
|
||||||
fmt.sbprintf(&sb, ".*")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fmt.sbprintf(&sb, "[^/]*")
|
|
||||||
i += 2
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fmt.sbprintf(&sb, "[^/]*")
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
} else if c == '?' {
|
|
||||||
fmt.sbprintf(&sb, "[^/]")
|
|
||||||
i += 1
|
|
||||||
} else if c == '[' {
|
|
||||||
append(&sb.buf, '[')
|
|
||||||
i += 1
|
|
||||||
if i < len(pattern) && pattern[i] == '!' {
|
|
||||||
append(&sb.buf, '^')
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
if i < len(pattern) && pattern[i] == ']' {
|
|
||||||
append(&sb.buf, ']')
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
for i < len(pattern) && pattern[i] != ']' {
|
|
||||||
append(&sb.buf, pattern[i])
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
if i < len(pattern) {
|
|
||||||
append(&sb.buf, ']')
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
} else if c == '\\' {
|
|
||||||
i += 1
|
|
||||||
if i < len(pattern) {
|
|
||||||
if is_regex_meta(pattern[i]) {
|
|
||||||
append(&sb.buf, '\\')
|
|
||||||
}
|
|
||||||
append(&sb.buf, pattern[i])
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
} else if is_regex_meta(c) {
|
|
||||||
append(&sb.buf, '\\')
|
|
||||||
append(&sb.buf, c)
|
|
||||||
i += 1
|
|
||||||
} else {
|
|
||||||
append(&sb.buf, c)
|
|
||||||
i += 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.sbprintf(&sb, "$")
|
|
||||||
|
|
||||||
s := strings.to_string(sb)
|
|
||||||
result, _ := strings.clone(s)
|
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Rule :: struct {
|
Rule :: struct {
|
||||||
regex: regex.Regular_Expression,
|
pattern: GlobPattern,
|
||||||
negated: bool,
|
negated: bool,
|
||||||
dir_only: bool,
|
dir_only: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
Gitignore :: struct {
|
Match :: enum {
|
||||||
rules: [dynamic]Rule,
|
None,
|
||||||
|
Ignored,
|
||||||
|
Unignored,
|
||||||
|
}
|
||||||
|
|
||||||
|
is_ignored :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> bool {
|
||||||
|
return check_match(gi, path, is_dir) == .Ignored
|
||||||
|
}
|
||||||
|
|
||||||
|
check_match :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> Match {
|
||||||
|
result := Match.None
|
||||||
|
for &rule in gi.rules {
|
||||||
|
if rule.dir_only && !is_dir do continue
|
||||||
|
if glob_match_compiled(&rule.pattern, path) {
|
||||||
|
result = rule.negated ? .Unignored : .Ignored
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
parse :: proc(content: string) -> Gitignore {
|
parse :: proc(content: string) -> Gitignore {
|
||||||
@@ -148,43 +72,16 @@ parse :: proc(content: string) -> Gitignore {
|
|||||||
|
|
||||||
if len(s) == 0 do continue
|
if len(s) == 0 do continue
|
||||||
|
|
||||||
regex_str := glob_to_regex(s, anchored)
|
gp := glob_compile(s, anchored)
|
||||||
re, err := regex.create(regex_str, {regex.Flag.No_Capture})
|
append(&gi.rules, Rule{pattern = gp, negated = negated, dir_only = dir_only})
|
||||||
delete(regex_str)
|
|
||||||
if err != nil do continue
|
|
||||||
|
|
||||||
append(&gi.rules, Rule{regex = re, negated = negated, dir_only = dir_only})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return gi
|
return gi
|
||||||
}
|
}
|
||||||
|
|
||||||
Match :: enum {
|
|
||||||
None,
|
|
||||||
Ignored,
|
|
||||||
Unignored,
|
|
||||||
}
|
|
||||||
|
|
||||||
check_match :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> Match {
|
|
||||||
result := Match.None
|
|
||||||
for rule in gi.rules {
|
|
||||||
if rule.dir_only && !is_dir do continue
|
|
||||||
cap, ok := regex.match(rule.regex, path)
|
|
||||||
regex.destroy(cap)
|
|
||||||
if ok {
|
|
||||||
result = rule.negated ? .Unignored : .Ignored
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
is_ignored :: proc(gi: ^Gitignore, path: string, is_dir: bool) -> bool {
|
|
||||||
return check_match(gi, path, is_dir) == .Ignored
|
|
||||||
}
|
|
||||||
|
|
||||||
destroy :: proc(gi: ^Gitignore) {
|
destroy :: proc(gi: ^Gitignore) {
|
||||||
for rule in gi.rules {
|
for &rule in gi.rules {
|
||||||
regex.destroy(rule.regex)
|
glob_destroy(&rule.pattern)
|
||||||
}
|
}
|
||||||
delete(gi.rules)
|
delete(gi.rules)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,100 +4,103 @@ import "core:testing"
|
|||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_simple :: proc(t: ^testing.T) {
|
test_glob_simple :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("foo", false)
|
testing.expect(t, glob_match("foo", "foo", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match("foo", "bar/foo", false))
|
||||||
testing.expect_value(t, result, "(^|/)foo$")
|
testing.expect(t, !glob_match("foo", "foobar", false))
|
||||||
|
testing.expect(t, !glob_match("foo", "foo/bar", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_anchored :: proc(t: ^testing.T) {
|
test_glob_anchored :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("foo", true)
|
testing.expect(t, glob_match("foo", "foo", true))
|
||||||
defer delete(result)
|
testing.expect(t, !glob_match("foo", "bar/foo", true))
|
||||||
testing.expect_value(t, result, "^foo$")
|
testing.expect(t, !glob_match("foo", "foobar", true))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_star :: proc(t: ^testing.T) {
|
test_glob_star :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("*.log", false)
|
testing.expect(t, glob_match("*.log", "test.log", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match("*.log", ".log", false))
|
||||||
testing.expect_value(t, result, "(^|/)[^/]*\\.log$")
|
testing.expect(t, !glob_match("*.log", "test.txt", false))
|
||||||
|
testing.expect(t, !glob_match("*.log", "dir/test", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_question :: proc(t: ^testing.T) {
|
test_glob_question :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("?.log", false)
|
testing.expect(t, glob_match("?.log", "a.log", false))
|
||||||
defer delete(result)
|
testing.expect(t, !glob_match("?.log", "ab.log", false))
|
||||||
testing.expect_value(t, result, "(^|/)[^/]\\.log$")
|
testing.expect(t, !glob_match("?.log", ".log", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_char_class :: proc(t: ^testing.T) {
|
test_glob_char_class :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("[abc].log", false)
|
testing.expect(t, glob_match("[abc].log", "a.log", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match("[abc].log", "b.log", false))
|
||||||
testing.expect_value(t, result, "(^|/)[abc]\\.log$")
|
testing.expect(t, !glob_match("[abc].log", "d.log", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_negated_class :: proc(t: ^testing.T) {
|
test_glob_negated_class :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("[!abc].log", false)
|
testing.expect(t, glob_match("[!abc].log", "d.log", false))
|
||||||
defer delete(result)
|
testing.expect(t, !glob_match("[!abc].log", "a.log", false))
|
||||||
testing.expect_value(t, result, "(^|/)[^abc]\\.log$")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_dot_escaped :: proc(t: ^testing.T) {
|
test_glob_dot_literal :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex(".env", false)
|
testing.expect(t, glob_match(".env", ".env", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match(".env", "dir/.env", false))
|
||||||
testing.expect_value(t, result, "(^|/)\\.env$")
|
testing.expect(t, !glob_match(".env", "env", false))
|
||||||
|
testing.expect(t, !glob_match(".env", "x.env", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_globstar_prefix :: proc(t: ^testing.T) {
|
test_glob_globstar_prefix :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("**/foo", false)
|
testing.expect(t, glob_match("**/foo", "foo", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match("**/foo", "a/b/foo", false))
|
||||||
testing.expect_value(t, result, "(^|/)(.*/)?foo$")
|
testing.expect(t, !glob_match("**/foo", "foobar", false))
|
||||||
|
testing.expect(t, !glob_match("**/foo", "a/foobar", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_globstar_suffix :: proc(t: ^testing.T) {
|
test_glob_globstar_suffix :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("abc/**", false)
|
testing.expect(t, glob_match("abc/**", "abc/x", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match("abc/**", "abc/x/y", false))
|
||||||
testing.expect_value(t, result, "(^|/)abc/.*$")
|
testing.expect(t, !glob_match("abc/**", "abc", false))
|
||||||
|
testing.expect(t, !glob_match("abc/**", "abcd/x", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_globstar_middle :: proc(t: ^testing.T) {
|
test_glob_globstar_middle :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("foo/**/bar", false)
|
testing.expect(t, glob_match("foo/**/bar", "foo/bar", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match("foo/**/bar", "foo/x/bar", false))
|
||||||
testing.expect_value(t, result, "(^|/)foo/(.*/)?bar$")
|
testing.expect(t, !glob_match("foo/**/bar", "foo/barx", false))
|
||||||
|
testing.expect(t, !glob_match("foo/**/bar", "foo/x/y/baz", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_backslash_escape :: proc(t: ^testing.T) {
|
test_glob_backslash_escape :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("\\!foo", false)
|
testing.expect(t, glob_match("\\!foo", "!foo", false))
|
||||||
defer delete(result)
|
testing.expect(t, !glob_match("\\!foo", "foo", false))
|
||||||
testing.expect_value(t, result, "(^|/)!foo$")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_hash_escaped :: proc(t: ^testing.T) {
|
test_glob_hash_literal :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("#foo", false)
|
testing.expect(t, glob_match("#foo", "#foo", false))
|
||||||
defer delete(result)
|
testing.expect(t, !glob_match("#foo", "foo", false))
|
||||||
testing.expect_value(t, result, "(^|/)\\#foo$")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_hash_in_pattern :: proc(t: ^testing.T) {
|
test_glob_hash_pattern :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("#*#", false)
|
testing.expect(t, glob_match("#*#", "#test#", false))
|
||||||
defer delete(result)
|
testing.expect(t, glob_match("#*#", "##", false))
|
||||||
testing.expect_value(t, result, "(^|/)\\#[^/]*\\#$")
|
testing.expect(t, !glob_match("#*#", "test", false))
|
||||||
|
testing.expect(t, !glob_match("#*#", "#test", false))
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
test_glob_empty :: proc(t: ^testing.T) {
|
test_glob_empty :: proc(t: ^testing.T) {
|
||||||
result := glob_to_regex("", false)
|
testing.expect(t, glob_match("", "", false))
|
||||||
defer delete(result)
|
testing.expect(t, !glob_match("", "foo", false))
|
||||||
testing.expect_value(t, result, "(^|/)$")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@(test)
|
@(test)
|
||||||
|
|||||||
210
glob.odin
Normal file
210
glob.odin
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
package findr
|
||||||
|
|
||||||
|
Range :: struct {
|
||||||
|
lo: u8,
|
||||||
|
hi: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
Class_Data :: struct {
|
||||||
|
negated: bool,
|
||||||
|
ranges: [dynamic]Range,
|
||||||
|
}
|
||||||
|
|
||||||
|
Token_Kind :: enum u8 { Char, Star, Globstar, Question, Class }
|
||||||
|
|
||||||
|
Token :: struct {
|
||||||
|
kind: Token_Kind,
|
||||||
|
byte: u8,
|
||||||
|
class_idx: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
GlobPattern :: struct {
|
||||||
|
tokens: [dynamic]Token,
|
||||||
|
classes: [dynamic]Class_Data,
|
||||||
|
anchored: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
glob_compile :: proc(pattern: string, anchored: bool) -> GlobPattern {
|
||||||
|
gp: GlobPattern
|
||||||
|
gp.tokens = make([dynamic]Token)
|
||||||
|
gp.classes = make([dynamic]Class_Data)
|
||||||
|
gp.anchored = anchored
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
for i < len(pattern) {
|
||||||
|
c := pattern[i]
|
||||||
|
|
||||||
|
if c == '*' {
|
||||||
|
if i + 1 < len(pattern) && pattern[i + 1] == '*' {
|
||||||
|
prev_slash := i == 0 || pattern[i - 1] == '/'
|
||||||
|
at_end := i + 2 >= len(pattern)
|
||||||
|
next_slash := !at_end && pattern[i + 2] == '/'
|
||||||
|
|
||||||
|
if prev_slash && (next_slash || at_end) {
|
||||||
|
append(&gp.tokens, Token{kind = .Globstar})
|
||||||
|
if next_slash {
|
||||||
|
i += 3
|
||||||
|
} else {
|
||||||
|
i += 2
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
append(&gp.tokens, Token{kind = .Star})
|
||||||
|
i += 2
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
append(&gp.tokens, Token{kind = .Star})
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
} else if c == '?' {
|
||||||
|
append(&gp.tokens, Token{kind = .Question})
|
||||||
|
i += 1
|
||||||
|
} else if c == '[' {
|
||||||
|
i += 1
|
||||||
|
negated := false
|
||||||
|
if i < len(pattern) && pattern[i] == '!' {
|
||||||
|
negated = true
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
ranges := make([dynamic]Range)
|
||||||
|
|
||||||
|
if i < len(pattern) && pattern[i] == ']' {
|
||||||
|
append(&ranges, Range{lo = ']', hi = ']'})
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
for i < len(pattern) && pattern[i] != ']' {
|
||||||
|
if i + 2 < len(pattern) && pattern[i + 1] == '-' && pattern[i + 2] != ']' {
|
||||||
|
append(&ranges, Range{lo = pattern[i], hi = pattern[i + 2]})
|
||||||
|
i += 3
|
||||||
|
} else {
|
||||||
|
append(&ranges, Range{lo = pattern[i], hi = pattern[i]})
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if i < len(pattern) {
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
class_idx := u16(len(gp.classes))
|
||||||
|
append(&gp.classes, Class_Data{negated = negated, ranges = ranges})
|
||||||
|
append(&gp.tokens, Token{kind = .Class, class_idx = class_idx})
|
||||||
|
} else if c == '\\' {
|
||||||
|
i += 1
|
||||||
|
if i < len(pattern) {
|
||||||
|
append(&gp.tokens, Token{kind = .Char, byte = pattern[i]})
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
append(&gp.tokens, Token{kind = .Char, byte = c})
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return gp
|
||||||
|
}
|
||||||
|
|
||||||
|
match_tokens :: proc(tokens: []Token, classes: []Class_Data, ti: int, path: string, pi: int) -> bool {
|
||||||
|
if ti >= len(tokens) {
|
||||||
|
return pi == len(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
tok := tokens[ti]
|
||||||
|
switch tok.kind {
|
||||||
|
case .Char:
|
||||||
|
if pi < len(path) && path[pi] == tok.byte {
|
||||||
|
return match_tokens(tokens, classes, ti + 1, path, pi + 1)
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
|
||||||
|
case .Question:
|
||||||
|
if pi < len(path) && path[pi] != '/' {
|
||||||
|
return match_tokens(tokens, classes, ti + 1, path, pi + 1)
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
|
||||||
|
case .Star:
|
||||||
|
max_end := pi
|
||||||
|
for max_end < len(path) && path[max_end] != '/' {
|
||||||
|
max_end += 1
|
||||||
|
}
|
||||||
|
for end := max_end; end >= pi; end -= 1 {
|
||||||
|
if match_tokens(tokens, classes, ti + 1, path, end) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
|
||||||
|
case .Globstar:
|
||||||
|
if ti + 1 >= len(tokens) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if match_tokens(tokens, classes, ti + 1, path, pi) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for end := pi + 1; end <= len(path); end += 1 {
|
||||||
|
if path[end - 1] == '/' {
|
||||||
|
if match_tokens(tokens, classes, ti + 1, path, end) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
|
||||||
|
case .Class:
|
||||||
|
if pi >= len(path) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
cd := classes[tok.class_idx]
|
||||||
|
ch := path[pi]
|
||||||
|
in_range := false
|
||||||
|
for r in cd.ranges {
|
||||||
|
if ch >= r.lo && ch <= r.hi {
|
||||||
|
in_range = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if in_range != cd.negated {
|
||||||
|
return match_tokens(tokens, classes, ti + 1, path, pi + 1)
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
glob_match_compiled :: proc(gp: ^GlobPattern, path: string) -> bool {
|
||||||
|
tokens := gp.tokens[:]
|
||||||
|
classes := gp.classes[:]
|
||||||
|
|
||||||
|
if gp.anchored {
|
||||||
|
return match_tokens(tokens, classes, 0, path, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if match_tokens(tokens, classes, 0, path, 0) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for i := 1; i < len(path); i += 1 {
|
||||||
|
if path[i - 1] == '/' {
|
||||||
|
if match_tokens(tokens, classes, 0, path, i) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
glob_destroy :: proc(gp: ^GlobPattern) {
|
||||||
|
for &cd in gp.classes {
|
||||||
|
delete(cd.ranges)
|
||||||
|
}
|
||||||
|
delete(gp.classes)
|
||||||
|
delete(gp.tokens)
|
||||||
|
}
|
||||||
|
|
||||||
|
glob_match :: proc(pattern: string, path: string, anchored: bool) -> bool {
|
||||||
|
gp := glob_compile(pattern, anchored)
|
||||||
|
result := glob_match_compiled(&gp, path)
|
||||||
|
glob_destroy(&gp)
|
||||||
|
return result
|
||||||
|
}
|
||||||
14
walker.odin
14
walker.odin
@@ -256,7 +256,15 @@ process_dir :: proc(pool: ^WalkerPool, item: WorkItem, local_results: ^[dynamic]
|
|||||||
if !ignored {
|
if !ignored {
|
||||||
child_rel, _ := strings.clone(entry_rel)
|
child_rel, _ := strings.clone(entry_rel)
|
||||||
child_path := join_path(dir_path, entry.name)
|
child_path := join_path(dir_path, entry.name)
|
||||||
push_work(pool, WorkItem{path = child_path, rel = child_rel, gi_ctx = gi_ctx, in_repo = child_in_repo})
|
push_work(
|
||||||
|
pool,
|
||||||
|
WorkItem {
|
||||||
|
path = child_path,
|
||||||
|
rel = child_rel,
|
||||||
|
gi_ctx = gi_ctx,
|
||||||
|
in_repo = child_in_repo,
|
||||||
|
},
|
||||||
|
)
|
||||||
}
|
}
|
||||||
} else if is_nondir {
|
} else if is_nondir {
|
||||||
if should_emit && matches_pattern(pool, entry.name) {
|
if should_emit && matches_pattern(pool, entry.name) {
|
||||||
@@ -285,7 +293,8 @@ check_chain :: proc(ctx: ^GIContext, entry_rel: string, is_dir: bool) -> bool {
|
|||||||
relative_to :: proc(entry_rel, base_rel: string) -> string {
|
relative_to :: proc(entry_rel, base_rel: string) -> string {
|
||||||
if len(base_rel) == 0 do return entry_rel
|
if len(base_rel) == 0 do return entry_rel
|
||||||
prefix_len := len(base_rel)
|
prefix_len := len(base_rel)
|
||||||
if len(entry_rel) > prefix_len && entry_rel[prefix_len] == '/' &&
|
if len(entry_rel) > prefix_len &&
|
||||||
|
entry_rel[prefix_len] == '/' &&
|
||||||
strings.has_prefix(entry_rel, base_rel) {
|
strings.has_prefix(entry_rel, base_rel) {
|
||||||
return entry_rel[prefix_len + 1:]
|
return entry_rel[prefix_len + 1:]
|
||||||
}
|
}
|
||||||
@@ -422,3 +431,4 @@ join_path_dir :: proc(parent, child: string) -> string {
|
|||||||
buf[pos] = '/'
|
buf[pos] = '/'
|
||||||
return string(buf)
|
return string(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user