diff --git a/PERFORMANCE_IDEAS.md b/PERFORMANCE_IDEAS.md index f943484..863f048 100644 --- a/PERFORMANCE_IDEAS.md +++ b/PERFORMANCE_IDEAS.md @@ -3,3 +3,16 @@ findr is 4.5x slower than fd (case 1: 658ms vs 146ms). Opportunities: - Arena allocator for path strings - Larger getdents buffer (8KB → 64KB+) - Buffered stdout output + +- Write while walking rather than waiting until the end? + +1. Per-thread result buffers (biggest win) +Every result append currently takes results_mutex. With millions of files, that's millions of lock/unlock cycles. Fix: each thread accumulates results locally, then merges once when done. +2. Path allocation waste (join_path/join_path_dir) +Every path construction spins up a strings.Builder, does fmt.sbprintf, to_string, clone, then builder_destroy — 2 heap allocs + 2 frees per path. Could be a simple memcpy into a stack buffer with a single alloc. +3. Larger getdents buffer +Currently 8KB. Increasing to 64KB+ means fewer syscalls per directory with many entries. +4. Eliminate entry name cloning +strings.clone(name) in read_dir_entries heap-allocates per dirent. Names are valid in the getdents buffer during process_dir, so the clone may be unnecessary. +5. Arena allocator per thread +Replace the default allocator for transient strings with a bump allocator — allocate in bulk, free all at once. diff --git a/findr.odin b/findr.odin index ce89474..cd5db19 100644 --- a/findr.odin +++ b/findr.odin @@ -1,5 +1,6 @@ package findr +import "core:bufio" import "core:fmt" import "core:os" import "core:strings" @@ -75,7 +76,13 @@ main :: proc() { thread_count := os.get_processor_core_count() walk(paths[:], &results, opts, thread_count) + w: bufio.Writer + bufio.writer_init(&w, os.to_stream(os.stdout), 1 << 13) + defer bufio.writer_destroy(&w) + for r in results { - fmt.println(r) + bufio.writer_write_string(&w, r) + bufio.writer_write_byte(&w, '\n') } + bufio.writer_flush(&w) }