brush-paint: fix optimizer hot-path leaks discovered via samply profile
Profiled `evaluate(corpus, &default)` (the per-call optimizer entry) under samply for 60s. Two big leaks both around `paint_fill_debug`, which `metrics_for` was calling unconditionally: 1. SDF was being recomputed in `paint_fill_with` and `paint_fill_debug` outside the hull cache, just to derive brush_radius from a percentile lookup. Fix: cache the sorted chamfer values in HullData so `sdf_percentile_q(q)` is O(1). Both functions now go through `Grid::from_hull_data(h)` using the same Arc<HullData> they used for the percentile. 2. `paint_fill_debug` always rendered three base64-encoded PNGs (~23% of CPU was in flate2's ZlibEncoder) and recorded full per-step WalkTrace lists, even when called by `metrics_for` which discards both. Fix: split into a private inner function with `record_walks` / `render_pngs` flags; the public `paint_fill_debug` keeps both on, `metrics_for` turns both off. Bench: `evaluate(corpus, &default)` × 1700 iters before: 52 ms/iter after: 35 ms/iter → 1.49× faster Bit-exact w.r.t. the alphabet report. Also adds `src/bin/paint_bench.rs` — a small fixed-duration loop over `evaluate` for profiling under samply / Instruments.
This commit is contained in:
40
src/bin/paint_bench.rs
Normal file
40
src/bin/paint_bench.rs
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
//! Hot-path bench: build the optimizer corpus once, then loop calling
|
||||||
|
//! `evaluate(corpus, &default_params)` for a fixed wall-clock duration.
|
||||||
|
//! Prints iter count + ms/iter so you have a baseline number, and
|
||||||
|
//! holds the process up long enough that an external profiler
|
||||||
|
//! (samply, sample, Instruments) can capture a representative trace.
|
||||||
|
//!
|
||||||
|
//! Usage: paint_bench [seconds] (default 60)
|
||||||
|
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use trac3r_lib::brush_paint::PaintParams;
|
||||||
|
use trac3r_lib::brush_paint_opt::{build_corpus, evaluate};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let secs: u64 = std::env::args().nth(1)
|
||||||
|
.and_then(|s| s.parse().ok())
|
||||||
|
.unwrap_or(60);
|
||||||
|
eprintln!("[bench] building corpus...");
|
||||||
|
let corpus = build_corpus();
|
||||||
|
eprintln!("[bench] corpus: {} hulls", corpus.len());
|
||||||
|
let params = PaintParams::default();
|
||||||
|
eprintln!("[bench] pid={} running for {}s", std::process::id(), secs);
|
||||||
|
|
||||||
|
// Warm up the hull cache + jit any lazy code paths.
|
||||||
|
let _ = evaluate(&corpus, ¶ms);
|
||||||
|
|
||||||
|
let deadline = Instant::now() + Duration::from_secs(secs);
|
||||||
|
let mut iters = 0u32;
|
||||||
|
let start = Instant::now();
|
||||||
|
while Instant::now() < deadline {
|
||||||
|
let _ = evaluate(&corpus, ¶ms);
|
||||||
|
iters += 1;
|
||||||
|
if iters.is_multiple_of(10) {
|
||||||
|
let elapsed = start.elapsed().as_secs_f64();
|
||||||
|
eprintln!("[bench] {iters} iters, {:.0} ms/iter", 1000.0 * elapsed / iters as f64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let elapsed = start.elapsed().as_secs_f64();
|
||||||
|
eprintln!("[bench] DONE: {iters} iters in {:.1}s = {:.0} ms/iter",
|
||||||
|
elapsed, 1000.0 * elapsed / iters as f64);
|
||||||
|
}
|
||||||
@@ -478,11 +478,28 @@ struct HullData {
|
|||||||
width: i32, height: i32,
|
width: i32, height: i32,
|
||||||
was_ink: BitMask,
|
was_ink: BitMask,
|
||||||
sdf: Vec<f32>,
|
sdf: Vec<f32>,
|
||||||
|
/// Sorted chamfer-distance values for the ink pixels (the same set
|
||||||
|
/// `chamfer_distance` returns). Lets `sdf_percentile_q(q)` answer
|
||||||
|
/// in O(1) instead of recomputing chamfer + sort. Critical for
|
||||||
|
/// the optimizer hot path: `paint_fill_with` needs an SDF
|
||||||
|
/// percentile to derive `brush_radius` and was redundantly
|
||||||
|
/// recomputing chamfer per call.
|
||||||
|
sdf_values_sorted: Vec<f32>,
|
||||||
skel_endpoints: Vec<(i32, i32)>,
|
skel_endpoints: Vec<(i32, i32)>,
|
||||||
skeleton_length: u32,
|
skeleton_length: u32,
|
||||||
ink_total: i32,
|
ink_total: i32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl HullData {
|
||||||
|
fn sdf_percentile_q(&self, q: f32) -> f32 {
|
||||||
|
let v = &self.sdf_values_sorted;
|
||||||
|
if v.is_empty() { return 0.0; }
|
||||||
|
let q = q.clamp(0.0, 1.0);
|
||||||
|
let idx = ((v.len() as f32 - 1.0) * q).round() as usize;
|
||||||
|
v[idx.min(v.len() - 1)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Cache key. `hull.id` alone isn't enough — extract_hulls assigns
|
/// Cache key. `hull.id` alone isn't enough — extract_hulls assigns
|
||||||
/// IDs from a per-call counter, so distinct hulls from different
|
/// IDs from a per-call counter, so distinct hulls from different
|
||||||
/// rasterizations collide on id. Mirror-image letters (p/q at the
|
/// rasterizations collide on id. Mirror-image letters (p/q at the
|
||||||
@@ -554,6 +571,8 @@ fn compute_hull_data(hull: &Hull) -> HullData {
|
|||||||
sdf[(ly * width + lx) as usize] = d;
|
sdf[(ly * width + lx) as usize] = d;
|
||||||
}
|
}
|
||||||
let sdf_max = dist.values().copied().fold(0.0_f32, f32::max).max(0.5);
|
let sdf_max = dist.values().copied().fold(0.0_f32, f32::max).max(0.5);
|
||||||
|
let mut sdf_values_sorted: Vec<f32> = dist.values().copied().collect();
|
||||||
|
sdf_values_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
|
||||||
let mut skel = zhang_suen_thin(&hull.pixels);
|
let mut skel = zhang_suen_thin(&hull.pixels);
|
||||||
let spur_len = (sdf_max * 1.5).round() as usize;
|
let spur_len = (sdf_max * 1.5).round() as usize;
|
||||||
prune_skeleton_spurs(&mut skel, spur_len.max(2));
|
prune_skeleton_spurs(&mut skel, spur_len.max(2));
|
||||||
@@ -562,8 +581,8 @@ fn compute_hull_data(hull: &Hull) -> HullData {
|
|||||||
.map(|&(x, y)| (x as i32, y as i32))
|
.map(|&(x, y)| (x as i32, y as i32))
|
||||||
.collect();
|
.collect();
|
||||||
let skeleton_length = skel.len() as u32;
|
let skeleton_length = skel.len() as u32;
|
||||||
HullData { bx, by, width, height, was_ink, sdf, skel_endpoints,
|
HullData { bx, by, width, height, was_ink, sdf, sdf_values_sorted,
|
||||||
skeleton_length, ink_total: count }
|
skel_endpoints, skeleton_length, ink_total: count }
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Coverage grid: per-call mutable state, sized to the hull's bbox ─────
|
// ── Coverage grid: per-call mutable state, sized to the hull's bbox ─────
|
||||||
@@ -607,7 +626,13 @@ struct Grid {
|
|||||||
|
|
||||||
impl Grid {
|
impl Grid {
|
||||||
fn from_hull(hull: &Hull) -> Self {
|
fn from_hull(hull: &Hull) -> Self {
|
||||||
let h = get_or_compute_hull_data(hull);
|
Self::from_hull_data(get_or_compute_hull_data(hull))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a Grid from an already-fetched HullData. Lets the
|
||||||
|
/// caller use the same Arc<HullData> for cheap SDF-percentile
|
||||||
|
/// lookup AND for the Grid, avoiding two cache lookups per call.
|
||||||
|
fn from_hull_data(h: Arc<HullData>) -> Self {
|
||||||
let unpainted = h.was_ink.clone();
|
let unpainted = h.was_ink.clone();
|
||||||
let ink_total = h.ink_total;
|
let ink_total = h.ink_total;
|
||||||
let bx = h.bx; let by = h.by;
|
let bx = h.bx; let by = h.by;
|
||||||
@@ -1160,12 +1185,11 @@ pub fn paint_fill_with(hull: &Hull, params: &PaintParams) -> FillResult {
|
|||||||
if hull.pixels.is_empty() {
|
if hull.pixels.is_empty() {
|
||||||
return FillResult { hull_id: hull.id, strokes: vec![] };
|
return FillResult { hull_id: hull.id, strokes: vec![] };
|
||||||
}
|
}
|
||||||
let pixel_set: HashSet<(u32, u32)> = hull.pixels.iter().copied().collect();
|
let h = get_or_compute_hull_data(hull);
|
||||||
let dist = chamfer_distance(hull, &pixel_set);
|
let effective_sdf = h.sdf_percentile_q(params.brush_radius_percentile).max(0.5);
|
||||||
let effective_sdf = sdf_percentile(&dist, params.brush_radius_percentile).max(0.5);
|
|
||||||
let brush_radius = params.brush_radius_factor * effective_sdf + params.brush_radius_offset_px;
|
let brush_radius = params.brush_radius_factor * effective_sdf + params.brush_radius_offset_px;
|
||||||
|
|
||||||
let mut grid = Grid::from_hull(hull);
|
let mut grid = Grid::from_hull_data(h);
|
||||||
grid.set_brush(brush_radius);
|
grid.set_brush(brush_radius);
|
||||||
let mut strokes: Vec<Vec<(f32, f32)>> = Vec::new();
|
let mut strokes: Vec<Vec<(f32, f32)>> = Vec::new();
|
||||||
|
|
||||||
@@ -1240,12 +1264,11 @@ pub struct PaintMetrics {
|
|||||||
pub brush_radius: f32,
|
pub brush_radius: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute metrics by running paint_fill_debug. This gives an
|
/// Compute metrics by running the painter. Skips walk-trace
|
||||||
/// authoritative `ink_unpainted` (paint_fill_with stamps single disks
|
/// recording and PNG rendering — both are debug-viewer-only and
|
||||||
/// for sub-threshold components, which don't appear in the returned
|
/// add ~25% overhead to the optimizer's hot loop.
|
||||||
/// stroke geometry — replaying strokes alone overcounts unpainted ink).
|
|
||||||
pub fn metrics_for(hull: &Hull, params: &PaintParams) -> (FillResult, PaintMetrics) {
|
pub fn metrics_for(hull: &Hull, params: &PaintParams) -> (FillResult, PaintMetrics) {
|
||||||
let dbg = paint_fill_debug(hull, params);
|
let dbg = paint_fill_debug_inner(hull, params, false, false);
|
||||||
let strokes = dbg.strokes.iter().filter(|s| s.len() >= 2).cloned().collect::<Vec<_>>();
|
let strokes = dbg.strokes.iter().filter(|s| s.len() >= 2).cloned().collect::<Vec<_>>();
|
||||||
let total_length: f32 = strokes.iter().map(|s| {
|
let total_length: f32 = strokes.iter().map(|s| {
|
||||||
s.windows(2).map(|w| {
|
s.windows(2).map(|w| {
|
||||||
@@ -1507,18 +1530,25 @@ pub fn score_weighted(m: &PaintMetrics, w: ScoreWeights) -> f32 {
|
|||||||
- w.brush_size * m.brush_radius
|
- w.brush_size * m.brush_radius
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug {
|
/// Internal: do the painting and produce a fully-populated PaintDebug.
|
||||||
|
/// `record_walks` enables the WalkTrace step recording (heavy — also
|
||||||
|
/// triggers per-candidate breakdown work in walk_brush). `render_pngs`
|
||||||
|
/// enables base64 PNG encoding for the frontend overlays. Both
|
||||||
|
/// default-off paths are taken by `metrics_for`, the optimizer's
|
||||||
|
/// per-call entry, where neither output is read — that path runs
|
||||||
|
/// noticeably faster as a result.
|
||||||
|
fn paint_fill_debug_inner(hull: &Hull, params: &PaintParams,
|
||||||
|
record_walks: bool, render_pngs: bool) -> PaintDebug {
|
||||||
let bounds = [
|
let bounds = [
|
||||||
hull.bounds.x_min as f32, hull.bounds.y_min as f32,
|
hull.bounds.x_min as f32, hull.bounds.y_min as f32,
|
||||||
hull.bounds.x_max as f32, hull.bounds.y_max as f32,
|
hull.bounds.x_max as f32, hull.bounds.y_max as f32,
|
||||||
];
|
];
|
||||||
let pixel_set: HashSet<(u32, u32)> = hull.pixels.iter().copied().collect();
|
let h = get_or_compute_hull_data(hull);
|
||||||
let dist = chamfer_distance(hull, &pixel_set);
|
let sdf_max = h.sdf_values_sorted.last().copied().unwrap_or(0.0).max(0.5);
|
||||||
let sdf_max = dist.values().cloned().fold(0.0_f32, f32::max).max(0.5);
|
let effective_sdf = h.sdf_percentile_q(params.brush_radius_percentile).max(0.5);
|
||||||
let effective_sdf = sdf_percentile(&dist, params.brush_radius_percentile).max(0.5);
|
|
||||||
let brush_radius = params.brush_radius_factor * effective_sdf + params.brush_radius_offset_px;
|
let brush_radius = params.brush_radius_factor * effective_sdf + params.brush_radius_offset_px;
|
||||||
|
|
||||||
let mut grid = Grid::from_hull(hull);
|
let mut grid = Grid::from_hull_data(h);
|
||||||
grid.set_brush(brush_radius);
|
grid.set_brush(brush_radius);
|
||||||
let mut trajectories: Vec<Vec<(f32, f32)>> = Vec::new();
|
let mut trajectories: Vec<Vec<(f32, f32)>> = Vec::new();
|
||||||
let mut starts: Vec<(f32, f32)> = Vec::new();
|
let mut starts: Vec<(f32, f32)> = Vec::new();
|
||||||
@@ -1532,8 +1562,9 @@ pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug {
|
|||||||
let start = match grid.pick_next_component(min_component_pixels) {
|
let start = match grid.pick_next_component(min_component_pixels) {
|
||||||
Some(s) => s, None => break,
|
Some(s) => s, None => break,
|
||||||
};
|
};
|
||||||
|
let walk_log = if record_walks { Some(&mut walks) } else { None };
|
||||||
let path = trace_stroke(start, &mut grid, params, brush_radius,
|
let path = trace_stroke(start, &mut grid, params, brush_radius,
|
||||||
Some(&mut walks), stroke_idx);
|
walk_log, stroke_idx);
|
||||||
if path.len() >= 2 {
|
if path.len() >= 2 {
|
||||||
// Record path[0] as the "start" — that's where the gcode
|
// Record path[0] as the "start" — that's where the gcode
|
||||||
// pen actually comes down.
|
// pen actually comes down.
|
||||||
@@ -1551,18 +1582,22 @@ pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug {
|
|||||||
.filter(|s| s.len() >= 2)
|
.filter(|s| s.len() >= 2)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let (sdf_b64, _) = encode_sdf_b64(hull);
|
|
||||||
let ink_unpainted = grid.ink_remaining.max(0) as u32;
|
let ink_unpainted = grid.ink_remaining.max(0) as u32;
|
||||||
let (bg_painted, total_swept, repaint) = measure_sweep_full(&strokes, &grid);
|
let (bg_painted, total_swept, repaint) = measure_sweep_full(&strokes, &grid);
|
||||||
let skeleton_length = grid.skeleton_length;
|
let skeleton_length = grid.skeleton_length;
|
||||||
let unpainted_clusters = grid.unpainted_cluster_sizes();
|
let unpainted_clusters = grid.unpainted_cluster_sizes();
|
||||||
|
let (source_b64, sdf_b64, coverage_b64) = if render_pngs {
|
||||||
|
(encode_hull_pixels_b64(hull), encode_sdf_b64(hull).0, encode_coverage_b64(&grid))
|
||||||
|
} else {
|
||||||
|
(String::new(), String::new(), String::new())
|
||||||
|
};
|
||||||
PaintDebug {
|
PaintDebug {
|
||||||
bounds,
|
bounds,
|
||||||
source_b64: encode_hull_pixels_b64(hull),
|
source_b64,
|
||||||
sdf_b64,
|
sdf_b64,
|
||||||
sdf_max,
|
sdf_max,
|
||||||
brush_radius,
|
brush_radius,
|
||||||
coverage_b64: encode_coverage_b64(&grid),
|
coverage_b64,
|
||||||
ink_total: grid.ink_total.max(0) as u32,
|
ink_total: grid.ink_total.max(0) as u32,
|
||||||
ink_unpainted,
|
ink_unpainted,
|
||||||
bg_painted,
|
bg_painted,
|
||||||
@@ -1577,6 +1612,10 @@ pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn paint_fill_debug(hull: &Hull, params: &PaintParams) -> PaintDebug {
|
||||||
|
paint_fill_debug_inner(hull, params, true, true)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
Reference in New Issue
Block a user