brush-paint: cache hull-derived data (SDF + skeleton + was_ink) per hull
The dominant cost of one paint_fill_with call (~50% wall time at small radii) is computing the chamfer-3-4 SDF and Zhang-Suen skeleton, both pure functions of the hull. The optimizer calls paint_fill_with thousands of times per hull while only varying brush/walker params, so this work is fully redundant across calls. New: a process-global Mutex<HashMap<HullKey, Arc<HullData>>> cache keyed by FNV-1a fingerprint of the hull's pixel coordinates. First call computes; subsequent calls hand back an Arc<HullData> in O(N) fingerprint hash + O(1) lookup. Grid now holds Arc<HullData> for the immutable hull-derived state (was_ink, sdf, skel_endpoints) and clones only the mutable `unpainted` mask per call. Bbox + skeleton_length are duplicated into Grid so the disk-iteration hot path doesn't pay an Arc deref. Bit-exact w.r.t. the alphabet report. Expected speedup is largest in the optimizer's tight loop (many calls per hull); the alphabet report only paints each hull once so most of its wins were from the prior precompute/bitset commits.
This commit is contained in:
@@ -12,7 +12,8 @@
|
|||||||
// over direction has unpainted ink ahead, while alternate directions
|
// over direction has unpainted ink ahead, while alternate directions
|
||||||
// don't.
|
// don't.
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::sync::{Arc, Mutex, OnceLock};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use crate::fill::{FillResult, rdp_simplify_f32, chamfer_distance,
|
use crate::fill::{FillResult, rdp_simplify_f32, chamfer_distance,
|
||||||
zhang_suen_thin, prune_skeleton_spurs, zs_neighbors};
|
zhang_suen_thin, prune_skeleton_spurs, zs_neighbors};
|
||||||
@@ -290,7 +291,7 @@ fn measure_sweep_full(strokes: &[Vec<(f32, f32)>], grid: &Grid)
|
|||||||
-> (u32, u32, u32)
|
-> (u32, u32, u32)
|
||||||
{
|
{
|
||||||
if strokes.is_empty() { return (0, 0, 0); }
|
if strokes.is_empty() { return (0, 0, 0); }
|
||||||
let mut count = vec![0u32; grid.was_ink.len()];
|
let mut count = vec![0u32; grid.hull.was_ink.len()];
|
||||||
let r2 = grid.brush_radius_sq;
|
let r2 = grid.brush_radius_sq;
|
||||||
for stroke in strokes {
|
for stroke in strokes {
|
||||||
for win in stroke.windows(2) {
|
for win in stroke.windows(2) {
|
||||||
@@ -322,7 +323,7 @@ fn measure_sweep_full(strokes: &[Vec<(f32, f32)>], grid: &Grid)
|
|||||||
for (i, &c) in count.iter().enumerate() {
|
for (i, &c) in count.iter().enumerate() {
|
||||||
if c == 0 { continue; }
|
if c == 0 { continue; }
|
||||||
total += 1;
|
total += 1;
|
||||||
if !grid.was_ink.get(i) { bg += 1; }
|
if !grid.hull.was_ink.get(i) { bg += 1; }
|
||||||
else { repaint += c - 1; }
|
else { repaint += c - 1; }
|
||||||
}
|
}
|
||||||
(bg, total, repaint)
|
(bg, total, repaint)
|
||||||
@@ -434,6 +435,7 @@ fn encode_coverage_b64(grid: &Grid) -> String {
|
|||||||
/// fits L1 nicely, and word-at-a-time popcount is available when
|
/// fits L1 nicely, and word-at-a-time popcount is available when
|
||||||
/// scanning whole grids. All ops are `#[inline]` since they're called
|
/// scanning whole grids. All ops are `#[inline]` since they're called
|
||||||
/// from the disk-iteration hot path.
|
/// from the disk-iteration hot path.
|
||||||
|
#[derive(Clone)]
|
||||||
struct BitMask {
|
struct BitMask {
|
||||||
bits: Vec<u64>,
|
bits: Vec<u64>,
|
||||||
len: usize,
|
len: usize,
|
||||||
@@ -461,26 +463,124 @@ impl BitMask {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Coverage grid: bool per pixel, sized to the hull's bbox ─────────────
|
// ── Hull-derived data: cached per hull.id ───────────────────────────────
|
||||||
|
|
||||||
struct Grid {
|
/// Pure-function-of-the-hull state: the bbox/grid dimensions, ink mask,
|
||||||
|
/// chamfer SDF, and skeleton-endpoint set. Computing chamfer +
|
||||||
|
/// Zhang-Suen thin + spur-prune is the dominant cost of one
|
||||||
|
/// `paint_fill_with` call (~50% wall time at small radii). The
|
||||||
|
/// optimizer calls `paint_fill_with` thousands of times per hull while
|
||||||
|
/// only varying brush/walker params, so the result is identical every
|
||||||
|
/// time. A small `(hull.id) → Arc<HullData>` cache eliminates the
|
||||||
|
/// recomputation across calls.
|
||||||
|
struct HullData {
|
||||||
bx: i32, by: i32,
|
bx: i32, by: i32,
|
||||||
width: i32, height: i32,
|
width: i32, height: i32,
|
||||||
/// `true` = ink pixel that hasn't been painted yet.
|
|
||||||
unpainted: BitMask,
|
|
||||||
/// `true` = pixel was ink in the original glyph (immutable; never
|
|
||||||
/// changes after construction). Lets relaxation tell "ink" apart from
|
|
||||||
/// "background" without conflating it with painted state.
|
|
||||||
was_ink: BitMask,
|
was_ink: BitMask,
|
||||||
/// Chamfer 3-4 distance / 3 (≈ Euclidean px from boundary). Used to
|
|
||||||
/// snap raw start points up the gradient onto the medial-axis ridge,
|
|
||||||
/// so strokes begin at stroke-centerline rather than polygon-edge.
|
|
||||||
sdf: Vec<f32>,
|
sdf: Vec<f32>,
|
||||||
/// Skeleton-endpoint pixel positions (degree-1 nodes of the thinned
|
|
||||||
/// glyph after spur pruning). These are the "legs" — the natural
|
|
||||||
/// pen-down anchors for a human writing the letter. A closed shape
|
|
||||||
/// (O, 0, etc.) has zero endpoints.
|
|
||||||
skel_endpoints: Vec<(i32, i32)>,
|
skel_endpoints: Vec<(i32, i32)>,
|
||||||
|
skeleton_length: u32,
|
||||||
|
ink_total: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cache key. `hull.id` alone isn't enough — extract_hulls assigns
|
||||||
|
/// IDs from a per-call counter, so distinct hulls from different
|
||||||
|
/// rasterizations collide on id. Mirror-image letters (p/q at the
|
||||||
|
/// same scale) can also share area + bounds. We use a full FNV-1a
|
||||||
|
/// hash over the pixel coordinate stream as the key — O(N) once
|
||||||
|
/// per cache miss, but conclusive against collisions.
|
||||||
|
type HullKey = u64;
|
||||||
|
|
||||||
|
fn hull_key(hull: &Hull) -> HullKey {
|
||||||
|
let mut h = 0xcbf29ce484222325u64;
|
||||||
|
for &(x, y) in &hull.pixels {
|
||||||
|
h ^= x as u64;
|
||||||
|
h = h.wrapping_mul(0x100000001b3);
|
||||||
|
h ^= y as u64;
|
||||||
|
h = h.wrapping_mul(0x100000001b3);
|
||||||
|
}
|
||||||
|
h
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hull_cache() -> &'static Mutex<HashMap<HullKey, Arc<HullData>>> {
|
||||||
|
static CACHE: OnceLock<Mutex<HashMap<HullKey, Arc<HullData>>>> = OnceLock::new();
|
||||||
|
CACHE.get_or_init(|| Mutex::new(HashMap::new()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_or_compute_hull_data(hull: &Hull) -> Arc<HullData> {
|
||||||
|
let key = hull_key(hull);
|
||||||
|
{
|
||||||
|
let cache = hull_cache().lock().unwrap();
|
||||||
|
if let Some(c) = cache.get(&key) {
|
||||||
|
return c.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Compute outside the lock so concurrent callers for different
|
||||||
|
// hulls don't serialize. A small race is possible (two threads
|
||||||
|
// miss for the same hull and both compute) — both produce
|
||||||
|
// identical data, so the loser's copy is just dropped.
|
||||||
|
let computed = Arc::new(compute_hull_data(hull));
|
||||||
|
let mut cache = hull_cache().lock().unwrap();
|
||||||
|
cache.entry(key).or_insert_with(|| computed.clone()).clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_hull_data(hull: &Hull) -> HullData {
|
||||||
|
// Pad the grid past the hull's AABB so that bg pixels swept by a
|
||||||
|
// brush that overhangs the polygon (e.g. at the top of an `I`,
|
||||||
|
// or the corners of a square letter) are counted instead of
|
||||||
|
// silently dropped by the bounds check. PAD must exceed any
|
||||||
|
// brush_radius the optimizer might try.
|
||||||
|
const PAD: i32 = 32;
|
||||||
|
let bx = hull.bounds.x_min as i32 - PAD;
|
||||||
|
let by = hull.bounds.y_min as i32 - PAD;
|
||||||
|
let width = (hull.bounds.x_max as i32 - hull.bounds.x_min as i32 + 1 + 2 * PAD).max(1);
|
||||||
|
let height = (hull.bounds.y_max as i32 - hull.bounds.y_min as i32 + 1 + 2 * PAD).max(1);
|
||||||
|
let cells = (width * height) as usize;
|
||||||
|
let mut was_ink = BitMask::new(cells);
|
||||||
|
let mut sdf = vec![0.0_f32; cells];
|
||||||
|
let mut count = 0;
|
||||||
|
for &(x, y) in &hull.pixels {
|
||||||
|
let lx = x as i32 - bx; let ly = y as i32 - by;
|
||||||
|
if lx < 0 || ly < 0 || lx >= width || ly >= height { continue; }
|
||||||
|
let idx = (ly * width + lx) as usize;
|
||||||
|
was_ink.set(idx);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
let pixel_set: HashSet<(u32, u32)> = hull.pixels.iter().copied().collect();
|
||||||
|
let dist = chamfer_distance(hull, &pixel_set);
|
||||||
|
for (&(x, y), &d) in dist.iter() {
|
||||||
|
let lx = x as i32 - bx; let ly = y as i32 - by;
|
||||||
|
if lx < 0 || ly < 0 || lx >= width || ly >= height { continue; }
|
||||||
|
sdf[(ly * width + lx) as usize] = d;
|
||||||
|
}
|
||||||
|
let sdf_max = dist.values().copied().fold(0.0_f32, f32::max).max(0.5);
|
||||||
|
let mut skel = zhang_suen_thin(&hull.pixels);
|
||||||
|
let spur_len = (sdf_max * 1.5).round() as usize;
|
||||||
|
prune_skeleton_spurs(&mut skel, spur_len.max(2));
|
||||||
|
let skel_endpoints: Vec<(i32, i32)> = skel.iter()
|
||||||
|
.filter(|&&p| zs_neighbors(p.0, p.1).iter().filter(|n| skel.contains(n)).count() == 1)
|
||||||
|
.map(|&(x, y)| (x as i32, y as i32))
|
||||||
|
.collect();
|
||||||
|
let skeleton_length = skel.len() as u32;
|
||||||
|
HullData { bx, by, width, height, was_ink, sdf, skel_endpoints,
|
||||||
|
skeleton_length, ink_total: count }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Coverage grid: per-call mutable state, sized to the hull's bbox ─────
|
||||||
|
|
||||||
|
struct Grid {
|
||||||
|
// Bbox is duplicated from `hull` so the disk-iteration hot path
|
||||||
|
// doesn't pay an Arc deref on every step.
|
||||||
|
bx: i32, by: i32,
|
||||||
|
width: i32, height: i32,
|
||||||
|
/// Cached hull-derived state: was_ink mask, SDF, skeleton
|
||||||
|
/// endpoints. Shared across all `paint_fill_with` calls on the
|
||||||
|
/// same hull via Arc — avoids recomputing chamfer + skeleton
|
||||||
|
/// per call. Read-only from this struct's perspective.
|
||||||
|
hull: Arc<HullData>,
|
||||||
|
/// `true` = ink pixel that hasn't been painted yet. Owned, mutable.
|
||||||
|
/// Initialized as a clone of `hull.was_ink`.
|
||||||
|
unpainted: BitMask,
|
||||||
/// Approximate medial-axis length, in pixels. Counted as skeleton
|
/// Approximate medial-axis length, in pixels. Counted as skeleton
|
||||||
/// pixel count (each connected skeleton pixel contributes ~1 px to
|
/// pixel count (each connected skeleton pixel contributes ~1 px to
|
||||||
/// the centerline length). Used as the "ideal" path length — a
|
/// the centerline length). Used as the "ideal" path length — a
|
||||||
@@ -507,58 +607,23 @@ struct Grid {
|
|||||||
|
|
||||||
impl Grid {
|
impl Grid {
|
||||||
fn from_hull(hull: &Hull) -> Self {
|
fn from_hull(hull: &Hull) -> Self {
|
||||||
// Pad the grid past the hull's AABB so that bg pixels swept by a
|
let h = get_or_compute_hull_data(hull);
|
||||||
// brush that overhangs the polygon (e.g. at the top of an `I`,
|
let unpainted = h.was_ink.clone();
|
||||||
// or the corners of a square letter) are counted instead of
|
let ink_total = h.ink_total;
|
||||||
// silently dropped by the bounds check. PAD must exceed any
|
let bx = h.bx; let by = h.by;
|
||||||
// brush_radius the optimizer might try.
|
let width = h.width; let height = h.height;
|
||||||
const PAD: i32 = 32;
|
let skeleton_length = h.skeleton_length;
|
||||||
let bx = hull.bounds.x_min as i32 - PAD;
|
Self {
|
||||||
let by = hull.bounds.y_min as i32 - PAD;
|
bx, by, width, height,
|
||||||
let width = (hull.bounds.x_max as i32 - hull.bounds.x_min as i32 + 1 + 2 * PAD).max(1);
|
hull: h,
|
||||||
let height = (hull.bounds.y_max as i32 - hull.bounds.y_min as i32 + 1 + 2 * PAD).max(1);
|
unpainted,
|
||||||
let cells = (width * height) as usize;
|
skeleton_length,
|
||||||
let mut unpainted = BitMask::new(cells);
|
ink_total,
|
||||||
let mut was_ink = BitMask::new(cells);
|
ink_remaining: ink_total,
|
||||||
let mut sdf = vec![0.0_f32; cells];
|
brush_radius: 0.0,
|
||||||
let mut count = 0;
|
brush_radius_sq: 0.0,
|
||||||
for &(x, y) in &hull.pixels {
|
disk_offsets: Vec::new(),
|
||||||
let lx = x as i32 - bx; let ly = y as i32 - by;
|
|
||||||
if lx < 0 || ly < 0 || lx >= width || ly >= height { continue; }
|
|
||||||
let idx = (ly * width + lx) as usize;
|
|
||||||
unpainted.set(idx);
|
|
||||||
was_ink.set(idx);
|
|
||||||
count += 1;
|
|
||||||
}
|
}
|
||||||
// Chamfer distance (per-pixel, in approximate Euclidean units)
|
|
||||||
// for medial-axis snapping in pick_start.
|
|
||||||
let pixel_set: HashSet<(u32, u32)> = hull.pixels.iter().copied().collect();
|
|
||||||
let dist = chamfer_distance(hull, &pixel_set);
|
|
||||||
for (&(x, y), &d) in dist.iter() {
|
|
||||||
let lx = x as i32 - bx; let ly = y as i32 - by;
|
|
||||||
if lx < 0 || ly < 0 || lx >= width || ly >= height { continue; }
|
|
||||||
sdf[(ly * width + lx) as usize] = d;
|
|
||||||
}
|
|
||||||
// Skeleton + endpoint detection. Spur prune length is keyed to the
|
|
||||||
// glyph's max stroke half-width so tiny notches at thick strokes
|
|
||||||
// don't masquerade as legs. (zhang_suen + spur_prune is the same
|
|
||||||
// recipe the topo-stroke and skeleton-fill paths use.)
|
|
||||||
let sdf_max = dist.values().copied().fold(0.0_f32, f32::max).max(0.5);
|
|
||||||
let mut skel = zhang_suen_thin(&hull.pixels);
|
|
||||||
let spur_len = (sdf_max * 1.5).round() as usize;
|
|
||||||
prune_skeleton_spurs(&mut skel, spur_len.max(2));
|
|
||||||
let skel_endpoints: Vec<(i32, i32)> = skel.iter()
|
|
||||||
.filter(|&&p| zs_neighbors(p.0, p.1).iter().filter(|n| skel.contains(n)).count() == 1)
|
|
||||||
.map(|&(x, y)| (x as i32, y as i32))
|
|
||||||
.collect();
|
|
||||||
// Skeleton length ≈ skeleton pixel count. For an 8-connected
|
|
||||||
// skeleton this slightly under-counts diagonal segments (sqrt(2)
|
|
||||||
// each), but it's close enough for a path-budget heuristic.
|
|
||||||
let skeleton_length = skel.len() as u32;
|
|
||||||
|
|
||||||
Self { bx, by, width, height, unpainted, was_ink, sdf, skel_endpoints,
|
|
||||||
skeleton_length, ink_total: count, ink_remaining: count,
|
|
||||||
brush_radius: 0.0, brush_radius_sq: 0.0, disk_offsets: Vec::new() }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Configure the disk shape used for evaluate_disk / paint_disk /
|
/// Configure the disk shape used for evaluate_disk / paint_disk /
|
||||||
@@ -591,7 +656,7 @@ impl Grid {
|
|||||||
fn sdf_at(&self, x: i32, y: i32) -> f32 {
|
fn sdf_at(&self, x: i32, y: i32) -> f32 {
|
||||||
let lx = x - self.bx; let ly = y - self.by;
|
let lx = x - self.bx; let ly = y - self.by;
|
||||||
if lx < 0 || ly < 0 || lx >= self.width || ly >= self.height { return 0.0; }
|
if lx < 0 || ly < 0 || lx >= self.width || ly >= self.height { return 0.0; }
|
||||||
self.sdf[(ly * self.width + lx) as usize]
|
self.hull.sdf[(ly * self.width + lx) as usize]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Snap a raw pixel position onto the medial-axis ridge by greedy
|
/// Snap a raw pixel position onto the medial-axis ridge by greedy
|
||||||
@@ -665,7 +730,7 @@ impl Grid {
|
|||||||
fn is_ink(&self, x: i32, y: i32) -> bool {
|
fn is_ink(&self, x: i32, y: i32) -> bool {
|
||||||
let lx = x - self.bx; let ly = y - self.by;
|
let lx = x - self.bx; let ly = y - self.by;
|
||||||
if lx < 0 || ly < 0 || lx >= self.width || ly >= self.height { return false; }
|
if lx < 0 || ly < 0 || lx >= self.width || ly >= self.height { return false; }
|
||||||
self.was_ink.get((ly * self.width + lx) as usize)
|
self.hull.was_ink.get((ly * self.width + lx) as usize)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns (new_ink, repaint_ink, bg) — pixel counts under disk(p, r):
|
/// Returns (new_ink, repaint_ink, bg) — pixel counts under disk(p, r):
|
||||||
@@ -695,7 +760,7 @@ impl Grid {
|
|||||||
let idx = (ly * self.width + lx) as usize;
|
let idx = (ly * self.width + lx) as usize;
|
||||||
if self.unpainted.get(idx) {
|
if self.unpainted.get(idx) {
|
||||||
new_ink += 1;
|
new_ink += 1;
|
||||||
} else if self.was_ink.get(idx) {
|
} else if self.hull.was_ink.get(idx) {
|
||||||
repaint_ink += 1;
|
repaint_ink += 1;
|
||||||
} else {
|
} else {
|
||||||
bg += 1;
|
bg += 1;
|
||||||
@@ -816,7 +881,7 @@ impl Grid {
|
|||||||
let (pixels, _) = &components[chosen];
|
let (pixels, _) = &components[chosen];
|
||||||
let comp_set: HashSet<usize> = pixels.iter().copied().collect();
|
let comp_set: HashSet<usize> = pixels.iter().copied().collect();
|
||||||
let mut best_endpoint: Option<(i32, i32)> = None;
|
let mut best_endpoint: Option<(i32, i32)> = None;
|
||||||
for &(ex, ey) in &self.skel_endpoints {
|
for &(ex, ey) in self.hull.skel_endpoints.iter() {
|
||||||
let lx = ex - self.bx; let ly = ey - self.by;
|
let lx = ex - self.bx; let ly = ey - self.by;
|
||||||
if lx < 0 || ly < 0 || lx >= self.width || ly >= self.height { continue; }
|
if lx < 0 || ly < 0 || lx >= self.width || ly >= self.height { continue; }
|
||||||
let idx = (ly * self.width + lx) as usize;
|
let idx = (ly * self.width + lx) as usize;
|
||||||
|
|||||||
Reference in New Issue
Block a user