Workgroup sizes of 8 along with reducing the amount of work groups improves performance 20x
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
#version 450
|
#version 450
|
||||||
|
|
||||||
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 0) buffer wData {
|
layout(set = 0, binding = 0) buffer wData {
|
||||||
int buf[];
|
int buf[];
|
||||||
@@ -35,43 +35,38 @@ void main() {
|
|||||||
|
|
||||||
uint idx = get_idx(0,0);
|
uint idx = get_idx(0,0);
|
||||||
|
|
||||||
// ivec4 p = separate(read_buffer.buf[get_idx(0 , 0)]);
|
ivec4 p = separate(read_buffer.buf[get_idx(0 , 0)]);
|
||||||
// ivec4 p0 = separate(read_buffer.buf[get_idx(0 , 1)]);
|
ivec4 p0 = separate(read_buffer.buf[get_idx(0 , 1)]);
|
||||||
// ivec4 p1 = separate(read_buffer.buf[get_idx(0 ,-1)]);
|
ivec4 p1 = separate(read_buffer.buf[get_idx(0 ,-1)]);
|
||||||
// ivec4 p2 = separate(read_buffer.buf[get_idx(1 , 1)]);
|
ivec4 p2 = separate(read_buffer.buf[get_idx(1 , 1)]);
|
||||||
// ivec4 p3 = separate(read_buffer.buf[get_idx(-1,-1)]);
|
ivec4 p3 = separate(read_buffer.buf[get_idx(-1,-1)]);
|
||||||
// ivec4 p4 = separate(read_buffer.buf[get_idx(1 , 0)]);
|
ivec4 p4 = separate(read_buffer.buf[get_idx(1 , 0)]);
|
||||||
// ivec4 p5 = separate(read_buffer.buf[get_idx(-1, 0)]);
|
ivec4 p5 = separate(read_buffer.buf[get_idx(-1, 0)]);
|
||||||
// ivec4 p6 = separate(read_buffer.buf[get_idx(1 ,-1)]);
|
ivec4 p6 = separate(read_buffer.buf[get_idx(1 ,-1)]);
|
||||||
// ivec4 p7 = separate(read_buffer.buf[get_idx(-1, 1)]);
|
ivec4 p7 = separate(read_buffer.buf[get_idx(-1, 1)]);
|
||||||
//
|
|
||||||
// ivec3 d0 = abs(p0.xyz - p1.xyz);
|
|
||||||
// ivec3 d1 = abs(p2.xyz - p3.xyz);
|
|
||||||
// ivec3 d2 = abs(p4.xyz - p5.xyz);
|
|
||||||
// ivec3 d3 = abs(p6.xyz - p7.xyz);
|
|
||||||
//
|
|
||||||
// ivec3 m = max(max(max(d0, d1), d2), d3);
|
|
||||||
//
|
|
||||||
// if ((m.x + m.y + m.z) > 200){
|
|
||||||
// p.x = 0;
|
|
||||||
// p.y = 0;
|
|
||||||
// p.z = 255;
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
//
|
|
||||||
// //p.w = 125;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
|
ivec3 d0 = abs(p0.xyz - p1.xyz);
|
||||||
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8);
|
ivec3 d1 = abs(p2.xyz - p3.xyz);
|
||||||
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x00FF0000) ) | (p.z << 16);
|
ivec3 d2 = abs(p4.xyz - p5.xyz);
|
||||||
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0xFF000000) ) | (p.w << 24);
|
ivec3 d3 = abs(p6.xyz - p7.xyz);
|
||||||
|
|
||||||
|
ivec3 m = max(max(max(d0, d1), d2), d3);
|
||||||
|
|
||||||
|
if ((m.x + m.y + m.z) > 200){
|
||||||
|
p.x = 0;
|
||||||
|
p.y = 0;
|
||||||
|
p.z = 255;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//p.w = 125;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
|
||||||
|
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8);
|
||||||
|
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x00FF0000) ) | (p.z << 16);
|
||||||
|
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0xFF000000) ) | (p.w << 24);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Just gonna keep this around
|
// Just gonna keep this around
|
||||||
// read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
|
// read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
|
||||||
|
|||||||
@@ -329,31 +329,10 @@ impl CanvasState {
|
|||||||
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
//TODO should probably use cpu accessible buffer instead of recreating immutes each frame
|
|
||||||
/*
|
|
||||||
CpuAccessibleBuffer::from_iter(
|
|
||||||
|
|
||||||
device.clone(),
|
|
||||||
BufferUsage::vertex_buffer(),
|
|
||||||
self.colored_drawables.iter().cloned(),
|
|
||||||
).unwrap().0;
|
|
||||||
*/
|
|
||||||
|
|
||||||
//if self.colored_vertex_buffer.len() == 0 {
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
self.colored_vertex_buffer.clear();
|
self.colored_vertex_buffer.clear();
|
||||||
{
|
{
|
||||||
let g = hprof::enter("Colored Vertex Buffer : From Data");
|
let g = hprof::enter("Colored Vertex Buffer");
|
||||||
self.colored_vertex_buffer.push(
|
|
||||||
ImmutableBuffer::from_data(self.colored_drawables.clone(),
|
|
||||||
BufferUsage::vertex_buffer(),
|
|
||||||
self.queue.clone()).unwrap().0
|
|
||||||
);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let g = hprof::enter("Colored Vertex Buffer : From Iter");
|
|
||||||
self.colored_vertex_buffer.push(
|
self.colored_vertex_buffer.push(
|
||||||
ImmutableBuffer::from_iter(
|
ImmutableBuffer::from_iter(
|
||||||
self.colored_drawables.iter().cloned(),
|
self.colored_drawables.iter().cloned(),
|
||||||
@@ -363,18 +342,10 @@ impl CanvasState {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
self.colored_vertex_buffer.clear();
|
|
||||||
//println!("{:?}", self.colored_vertex_buffer.last().unwrap().size());
|
|
||||||
//self.colored_vertex_buffer.pop();
|
|
||||||
// }
|
|
||||||
// println!("{} {}", self.textured_vertex_buffer.len() , self.textured_drawables.len());
|
|
||||||
|
|
||||||
// if self.textured_vertex_buffer.len() != self.textured_drawables.len() {
|
|
||||||
self.textured_vertex_buffer.clear();
|
self.textured_vertex_buffer.clear();
|
||||||
for (k, v) in self.textured_drawables.drain() {
|
|
||||||
{
|
{
|
||||||
let g = hprof::enter("Textured Vertex Buffer : From Iter");
|
let g = hprof::enter("Textured Vertex Buffer");
|
||||||
|
for (k, v) in self.textured_drawables.drain() {
|
||||||
self.textured_vertex_buffer.insert(
|
self.textured_vertex_buffer.insert(
|
||||||
k.clone(),
|
k.clone(),
|
||||||
ImmutableBuffer::from_iter(
|
ImmutableBuffer::from_iter(
|
||||||
@@ -384,19 +355,7 @@ impl CanvasState {
|
|||||||
).unwrap().0,
|
).unwrap().0,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
// {
|
|
||||||
// let g = hprof::enter("Textured Vertex Buffer : From Data");
|
|
||||||
// self.textured_vertex_buffer.insert(
|
|
||||||
// k.clone(),
|
|
||||||
// ImmutableBuffer::from_data(v.first().unwrap().clone(),
|
|
||||||
// BufferUsage::vertex_buffer(),
|
|
||||||
// self.queue.clone()
|
|
||||||
// ).unwrap().0
|
|
||||||
// );
|
|
||||||
// }
|
|
||||||
//self.textured_vertex_buffer.pop();
|
|
||||||
}
|
}
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_solid_color_descriptor_set(&self, kernel: Arc<CanvasShader>) -> Box<dyn DescriptorSet + Send + Sync> {
|
fn get_solid_color_descriptor_set(&self, kernel: Arc<CanvasShader>) -> Box<dyn DescriptorSet + Send + Sync> {
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ impl CompuState {
|
|||||||
handle
|
handle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO : THIS IS BROKEN
|
||||||
pub fn get_kernel_handle(&self, kernel_name: String) -> Option<Arc<CompuKernelHandle>> {
|
pub fn get_kernel_handle(&self, kernel_name: String) -> Option<Arc<CompuKernelHandle>> {
|
||||||
for i in self.kernels.clone() {
|
for i in self.kernels.clone() {
|
||||||
if i.get_name() == kernel_name {
|
if i.get_name() == kernel_name {
|
||||||
@@ -104,7 +105,7 @@ impl CompuState {
|
|||||||
let size = buffer.get_size();
|
let size = buffer.get_size();
|
||||||
|
|
||||||
command_buffer = command_buffer
|
command_buffer = command_buffer
|
||||||
.dispatch([size.0,size.1,1], p, d, ()).unwrap()
|
.dispatch([size.0/8,size.1/8,1], p, d, ()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
// i = (Buffer, Image, Kernel)
|
// i = (Buffer, Image, Kernel)
|
||||||
|
|||||||
@@ -165,11 +165,11 @@ fn main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut compu_frame = CompuFrame::new();
|
let mut compu_frame = CompuFrame::new();
|
||||||
// compu_frame.add(compute_buffer.clone(), compute_kernel.clone());
|
compu_frame.add(compute_buffer.clone(), compute_kernel.clone());
|
||||||
// compu_frame.add_with_image_swap(compute_buffer.clone(), compute_kernel.clone(), &compu_sprite1);
|
// compu_frame.add_with_image_swap(compute_buffer.clone(), compute_kernel.clone(), &compu_sprite1);
|
||||||
|
|
||||||
let mut canvas = CanvasFrame::new();
|
let mut canvas = CanvasFrame::new();
|
||||||
canvas.draw(&sprite);
|
// canvas.draw(&sprite);
|
||||||
// canvas.draw(&sprite2);
|
// canvas.draw(&sprite2);
|
||||||
// canvas.draw(&sprite3);
|
// canvas.draw(&sprite3);
|
||||||
//canvas.draw(&compu_sprite1);
|
//canvas.draw(&compu_sprite1);
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ impl<'a> VkProcessor<'a> {
|
|||||||
|
|
||||||
Swapchain::new(self.device.clone(),
|
Swapchain::new(self.device.clone(),
|
||||||
surface.clone(),
|
surface.clone(),
|
||||||
capabilities.min_image_count + 10, // number of attachment images
|
capabilities.min_image_count, // number of attachment images
|
||||||
format,
|
format,
|
||||||
initial_dimensions,
|
initial_dimensions,
|
||||||
1, // Layers
|
1, // Layers
|
||||||
@@ -103,7 +103,7 @@ impl<'a> VkProcessor<'a> {
|
|||||||
&self.queue,
|
&self.queue,
|
||||||
SurfaceTransform::Identity,
|
SurfaceTransform::Identity,
|
||||||
alpha,
|
alpha,
|
||||||
PresentMode::Mailbox, true, None).unwrap()
|
PresentMode::Immediate, true, None).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
self.swapchain = Some(swapchain);
|
self.swapchain = Some(swapchain);
|
||||||
@@ -174,12 +174,10 @@ impl<'a> VkProcessor<'a> {
|
|||||||
|
|
||||||
pub fn run(&mut self,
|
pub fn run(&mut self,
|
||||||
surface: &'a Arc<Surface<Window>>,
|
surface: &'a Arc<Surface<Window>>,
|
||||||
// mut frame_future: Box<dyn GpuFuture>,
|
|
||||||
canvas_frame: CanvasFrame,
|
canvas_frame: CanvasFrame,
|
||||||
compute_frame: CompuFrame,
|
compute_frame: CompuFrame,
|
||||||
)
|
) {
|
||||||
// -> Box<dyn GpuFuture> {
|
|
||||||
{
|
|
||||||
{
|
{
|
||||||
let g = hprof::enter("Waiting at queue");
|
let g = hprof::enter("Waiting at queue");
|
||||||
self.queue.wait();
|
self.queue.wait();
|
||||||
@@ -189,9 +187,6 @@ impl<'a> VkProcessor<'a> {
|
|||||||
let mut framebuffers =
|
let mut framebuffers =
|
||||||
self.canvas.window_size_dependent_setup(&self.swapchain_images.clone().unwrap().clone());
|
self.canvas.window_size_dependent_setup(&self.swapchain_images.clone().unwrap().clone());
|
||||||
|
|
||||||
// The docs said to call this on each loop.
|
|
||||||
// frame_future.cleanup_finished();
|
|
||||||
|
|
||||||
// Whenever the window resizes we need to recreate everything dependent on the window size.
|
// Whenever the window resizes we need to recreate everything dependent on the window size.
|
||||||
// In this example that includes the swapchain, the framebuffers and the dynamic state viewport.
|
// In this example that includes the swapchain, the framebuffers and the dynamic state viewport.
|
||||||
if self.swapchain_recreate_needed {
|
if self.swapchain_recreate_needed {
|
||||||
@@ -201,33 +196,27 @@ impl<'a> VkProcessor<'a> {
|
|||||||
self.swapchain_recreate_needed = false;
|
self.swapchain_recreate_needed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// This function can block if no image is available. The parameter is an optional timeout
|
// This function can block if no image is available. The parameter is an optional timeout
|
||||||
// after which the function call will return an error.
|
// after which the function call will return an error.
|
||||||
let (image_num, acquire_future) =
|
let (image_num, acquire_future) =
|
||||||
match vulkano::swapchain::acquire_next_image(
|
match vulkano::swapchain::acquire_next_image(
|
||||||
self.swapchain.clone().unwrap().clone(),
|
self.swapchain.clone().unwrap().clone(),
|
||||||
//Some(Duration::from_millis(3)),
|
|
||||||
None,
|
None,
|
||||||
) {
|
) {
|
||||||
Ok(r) => r,
|
Ok(r) => r,
|
||||||
Err(AcquireError::OutOfDate) => {
|
Err(AcquireError::OutOfDate) => {
|
||||||
self.swapchain_recreate_needed = true;
|
self.swapchain_recreate_needed = true;
|
||||||
//return Box::new(sync::now(self.device.clone())) as Box<_>;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Err(err) => panic!("{:?}", err)
|
Err(err) => panic!("{:?}", err)
|
||||||
};
|
};
|
||||||
drop(g);
|
|
||||||
|
|
||||||
let g = hprof::enter("Joining the future");
|
|
||||||
// let future = frame_future.join(acquire_future);
|
|
||||||
drop(g);
|
drop(g);
|
||||||
|
|
||||||
{
|
{
|
||||||
let g = hprof::enter("Canvas creates GPU buffers");
|
let g = hprof::enter("Canvas creates GPU buffers");
|
||||||
// take the canvas frame and create the vertex buffers
|
// take the canvas frame and create the vertex buffers
|
||||||
// TODO: This performs gpu buffer creation. Shouldn't be in hotpath
|
// TODO: This performs gpu buffer creation. Shouldn't be in hotpath??
|
||||||
self.canvas.draw(canvas_frame);
|
self.canvas.draw(canvas_frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -236,41 +225,43 @@ impl<'a> VkProcessor<'a> {
|
|||||||
|
|
||||||
let g = hprof::enter("Push compute commands to command buffer");
|
let g = hprof::enter("Push compute commands to command buffer");
|
||||||
// Add the compute commands
|
// Add the compute commands
|
||||||
// let mut command_buffer = self.compute_state.compute_commands(compute_frame, command_buffer, &self.canvas);
|
let mut command_buffer = self.compute_state.compute_commands(compute_frame, command_buffer, &self.canvas);
|
||||||
drop(g);
|
drop(g);
|
||||||
|
|
||||||
let g = hprof::enter("Push draw commands to command buffer");
|
let g = hprof::enter("Push draw commands to command buffer");
|
||||||
// Add the draw commands
|
// Add the draw commands
|
||||||
// let mut command_buffer = self.canvas.draw_commands(command_buffer, framebuffers, image_num);
|
let mut command_buffer = self.canvas.draw_commands(command_buffer, framebuffers, image_num);
|
||||||
drop(g);
|
|
||||||
|
|
||||||
// And build
|
// And build
|
||||||
let command_buffer = command_buffer.build().unwrap();
|
let command_buffer = command_buffer.build().unwrap();
|
||||||
|
drop(g);
|
||||||
|
|
||||||
// Wait on the previous frame, then execute the command buffer and present the image
|
// Wait on the previous frame, then execute the command buffer and present the image
|
||||||
{
|
{
|
||||||
let g = hprof::enter("Mussing with the frame future");
|
|
||||||
|
|
||||||
//let future = future //frame_future.join(acquire_future)
|
let g = hprof::enter("Joining on the framebuffer");
|
||||||
let future = sync::now(self.device.clone())
|
let mut future = sync::now(self.device.clone())
|
||||||
|
.join(acquire_future);
|
||||||
|
drop(g);
|
||||||
|
|
||||||
|
let g = hprof::enter("Running the kernel and waiting on the future");
|
||||||
|
|
||||||
|
let future = future
|
||||||
.then_execute(self.queue.clone(), command_buffer).unwrap()
|
.then_execute(self.queue.clone(), command_buffer).unwrap()
|
||||||
.then_swapchain_present(self.queue.clone(), self.swapchain.clone().unwrap().clone(), image_num)
|
.then_swapchain_present(self.queue.clone(), self.swapchain.clone().unwrap().clone(), image_num)
|
||||||
.then_signal_fence_and_flush();
|
.then_signal_fence_and_flush();
|
||||||
future.unwrap().wait(None).unwrap();
|
|
||||||
|
|
||||||
// match future {
|
match future {
|
||||||
// Ok(future) => {
|
Ok(future) => {
|
||||||
// (Box::new(future) as Box<_>)
|
future.wait(None).unwrap();
|
||||||
// }
|
}
|
||||||
// Err(FlushError::OutOfDate) => {
|
Err(FlushError::OutOfDate) => {
|
||||||
// self.swapchain_recreate_needed = true;
|
self.swapchain_recreate_needed = true;
|
||||||
// (Box::new(sync::now(self.device.clone())) as Box<_>)
|
}
|
||||||
// }
|
Err(e) => {
|
||||||
// Err(e) => {
|
println!("{:?}", e);
|
||||||
// println!("{:?}", e);
|
}
|
||||||
// (Box::new(sync::now(self.device.clone())) as Box<_>)
|
}
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user