diff --git a/src/bin/paint_bench.rs b/src/bin/paint_bench.rs new file mode 100644 index 00000000..3eac42b7 --- /dev/null +++ b/src/bin/paint_bench.rs @@ -0,0 +1,40 @@ +//! Hot-path bench: build the optimizer corpus once, then loop calling +//! `evaluate(corpus, &default_params)` for a fixed wall-clock duration. +//! Prints iter count + ms/iter so you have a baseline number, and +//! holds the process up long enough that an external profiler +//! (samply, sample, Instruments) can capture a representative trace. +//! +//! Usage: paint_bench [seconds] (default 60) + +use std::time::{Duration, Instant}; +use trac3r_lib::brush_paint::PaintParams; +use trac3r_lib::brush_paint_opt::{build_corpus, evaluate}; + +fn main() { + let secs: u64 = std::env::args().nth(1) + .and_then(|s| s.parse().ok()) + .unwrap_or(60); + eprintln!("[bench] building corpus..."); + let corpus = build_corpus(); + eprintln!("[bench] corpus: {} hulls", corpus.len()); + let params = PaintParams::default(); + eprintln!("[bench] pid={} running for {}s", std::process::id(), secs); + + // Warm up the hull cache + jit any lazy code paths. + let _ = evaluate(&corpus, ¶ms); + + let deadline = Instant::now() + Duration::from_secs(secs); + let mut iters = 0u32; + let start = Instant::now(); + while Instant::now() < deadline { + let _ = evaluate(&corpus, ¶ms); + iters += 1; + if iters.is_multiple_of(10) { + let elapsed = start.elapsed().as_secs_f64(); + eprintln!("[bench] {iters} iters, {:.0} ms/iter", 1000.0 * elapsed / iters as f64); + } + } + let elapsed = start.elapsed().as_secs_f64(); + eprintln!("[bench] DONE: {iters} iters in {:.1}s = {:.0} ms/iter", + elapsed, 1000.0 * elapsed / iters as f64); +} diff --git a/src/brush_paint.rs b/src/brush_paint.rs index 08ccdac5..d7566a90 100644 --- a/src/brush_paint.rs +++ b/src/brush_paint.rs @@ -478,11 +478,28 @@ struct HullData { width: i32, height: i32, was_ink: BitMask, sdf: Vec, + /// Sorted chamfer-distance values for the ink pixels (the same set + /// `chamfer_distance` returns). Lets `sdf_percentile_q(q)` answer + /// in O(1) instead of recomputing chamfer + sort. Critical for + /// the optimizer hot path: `paint_fill_with` needs an SDF + /// percentile to derive `brush_radius` and was redundantly + /// recomputing chamfer per call. + sdf_values_sorted: Vec, skel_endpoints: Vec<(i32, i32)>, skeleton_length: u32, ink_total: i32, } +impl HullData { + fn sdf_percentile_q(&self, q: f32) -> f32 { + let v = &self.sdf_values_sorted; + if v.is_empty() { return 0.0; } + let q = q.clamp(0.0, 1.0); + let idx = ((v.len() as f32 - 1.0) * q).round() as usize; + v[idx.min(v.len() - 1)] + } +} + /// Cache key. `hull.id` alone isn't enough — extract_hulls assigns /// IDs from a per-call counter, so distinct hulls from different /// rasterizations collide on id. Mirror-image letters (p/q at the @@ -554,6 +571,8 @@ fn compute_hull_data(hull: &Hull) -> HullData { sdf[(ly * width + lx) as usize] = d; } let sdf_max = dist.values().copied().fold(0.0_f32, f32::max).max(0.5); + let mut sdf_values_sorted: Vec = dist.values().copied().collect(); + sdf_values_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)); let mut skel = zhang_suen_thin(&hull.pixels); let spur_len = (sdf_max * 1.5).round() as usize; prune_skeleton_spurs(&mut skel, spur_len.max(2)); @@ -562,8 +581,8 @@ fn compute_hull_data(hull: &Hull) -> HullData { .map(|&(x, y)| (x as i32, y as i32)) .collect(); let skeleton_length = skel.len() as u32; - HullData { bx, by, width, height, was_ink, sdf, skel_endpoints, - skeleton_length, ink_total: count } + HullData { bx, by, width, height, was_ink, sdf, sdf_values_sorted, + skel_endpoints, skeleton_length, ink_total: count } } // ── Coverage grid: per-call mutable state, sized to the hull's bbox ───── @@ -607,7 +626,13 @@ struct Grid { impl Grid { fn from_hull(hull: &Hull) -> Self { - let h = get_or_compute_hull_data(hull); + Self::from_hull_data(get_or_compute_hull_data(hull)) + } + + /// Construct a Grid from an already-fetched HullData. Lets the + /// caller use the same Arc for cheap SDF-percentile + /// lookup AND for the Grid, avoiding two cache lookups per call. + fn from_hull_data(h: Arc) -> Self { let unpainted = h.was_ink.clone(); let ink_total = h.ink_total; let bx = h.bx; let by = h.by; @@ -1160,12 +1185,11 @@ pub fn paint_fill_with(hull: &Hull, params: &PaintParams) -> FillResult { if hull.pixels.is_empty() { return FillResult { hull_id: hull.id, strokes: vec![] }; } - let pixel_set: HashSet<(u32, u32)> = hull.pixels.iter().copied().collect(); - let dist = chamfer_distance(hull, &pixel_set); - let effective_sdf = sdf_percentile(&dist, params.brush_radius_percentile).max(0.5); + let h = get_or_compute_hull_data(hull); + let effective_sdf = h.sdf_percentile_q(params.brush_radius_percentile).max(0.5); let brush_radius = params.brush_radius_factor * effective_sdf + params.brush_radius_offset_px; - let mut grid = Grid::from_hull(hull); + let mut grid = Grid::from_hull_data(h); grid.set_brush(brush_radius); let mut strokes: Vec> = Vec::new(); @@ -1240,12 +1264,11 @@ pub struct PaintMetrics { pub brush_radius: f32, } -/// Compute metrics by running paint_fill_debug. This gives an -/// authoritative `ink_unpainted` (paint_fill_with stamps single disks -/// for sub-threshold components, which don't appear in the returned -/// stroke geometry — replaying strokes alone overcounts unpainted ink). +/// Compute metrics by running the painter. Skips walk-trace +/// recording and PNG rendering — both are debug-viewer-only and +/// add ~25% overhead to the optimizer's hot loop. pub fn metrics_for(hull: &Hull, params: &PaintParams) -> (FillResult, PaintMetrics) { - let dbg = paint_fill_debug(hull, params); + let dbg = paint_fill_debug_inner(hull, params, false, false); let strokes = dbg.strokes.iter().filter(|s| s.len() >= 2).cloned().collect::>(); let total_length: f32 = strokes.iter().map(|s| { s.windows(2).map(|w| { @@ -1507,18 +1530,25 @@ pub fn score_weighted(m: &PaintMetrics, w: ScoreWeights) -> f32 { - w.brush_size * m.brush_radius } -pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug { +/// Internal: do the painting and produce a fully-populated PaintDebug. +/// `record_walks` enables the WalkTrace step recording (heavy — also +/// triggers per-candidate breakdown work in walk_brush). `render_pngs` +/// enables base64 PNG encoding for the frontend overlays. Both +/// default-off paths are taken by `metrics_for`, the optimizer's +/// per-call entry, where neither output is read — that path runs +/// noticeably faster as a result. +fn paint_fill_debug_inner(hull: &Hull, params: &PaintParams, + record_walks: bool, render_pngs: bool) -> PaintDebug { let bounds = [ hull.bounds.x_min as f32, hull.bounds.y_min as f32, hull.bounds.x_max as f32, hull.bounds.y_max as f32, ]; - let pixel_set: HashSet<(u32, u32)> = hull.pixels.iter().copied().collect(); - let dist = chamfer_distance(hull, &pixel_set); - let sdf_max = dist.values().cloned().fold(0.0_f32, f32::max).max(0.5); - let effective_sdf = sdf_percentile(&dist, params.brush_radius_percentile).max(0.5); + let h = get_or_compute_hull_data(hull); + let sdf_max = h.sdf_values_sorted.last().copied().unwrap_or(0.0).max(0.5); + let effective_sdf = h.sdf_percentile_q(params.brush_radius_percentile).max(0.5); let brush_radius = params.brush_radius_factor * effective_sdf + params.brush_radius_offset_px; - let mut grid = Grid::from_hull(hull); + let mut grid = Grid::from_hull_data(h); grid.set_brush(brush_radius); let mut trajectories: Vec> = Vec::new(); let mut starts: Vec<(f32, f32)> = Vec::new(); @@ -1532,8 +1562,9 @@ pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug { let start = match grid.pick_next_component(min_component_pixels) { Some(s) => s, None => break, }; + let walk_log = if record_walks { Some(&mut walks) } else { None }; let path = trace_stroke(start, &mut grid, params, brush_radius, - Some(&mut walks), stroke_idx); + walk_log, stroke_idx); if path.len() >= 2 { // Record path[0] as the "start" — that's where the gcode // pen actually comes down. @@ -1551,18 +1582,22 @@ pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug { .filter(|s| s.len() >= 2) .collect(); - let (sdf_b64, _) = encode_sdf_b64(hull); let ink_unpainted = grid.ink_remaining.max(0) as u32; let (bg_painted, total_swept, repaint) = measure_sweep_full(&strokes, &grid); let skeleton_length = grid.skeleton_length; let unpainted_clusters = grid.unpainted_cluster_sizes(); + let (source_b64, sdf_b64, coverage_b64) = if render_pngs { + (encode_hull_pixels_b64(hull), encode_sdf_b64(hull).0, encode_coverage_b64(&grid)) + } else { + (String::new(), String::new(), String::new()) + }; PaintDebug { bounds, - source_b64: encode_hull_pixels_b64(hull), + source_b64, sdf_b64, sdf_max, brush_radius, - coverage_b64: encode_coverage_b64(&grid), + coverage_b64, ink_total: grid.ink_total.max(0) as u32, ink_unpainted, bg_painted, @@ -1577,6 +1612,10 @@ pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug { } } +pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug { + paint_fill_debug_inner(hull, params, true, true) +} + #[cfg(test)] mod tests { use super::*;