Workgroup sizes of 8 along with reducing the amount of work groups improves performance 20x

This commit is contained in:
2019-09-06 00:21:50 -07:00
parent 711e678969
commit 314fa3e4af
5 changed files with 67 additions and 121 deletions

View File

@@ -1,6 +1,6 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) buffer wData {
int buf[];
@@ -35,44 +35,39 @@ void main() {
uint idx = get_idx(0,0);
// ivec4 p = separate(read_buffer.buf[get_idx(0 , 0)]);
// ivec4 p0 = separate(read_buffer.buf[get_idx(0 , 1)]);
// ivec4 p1 = separate(read_buffer.buf[get_idx(0 ,-1)]);
// ivec4 p2 = separate(read_buffer.buf[get_idx(1 , 1)]);
// ivec4 p3 = separate(read_buffer.buf[get_idx(-1,-1)]);
// ivec4 p4 = separate(read_buffer.buf[get_idx(1 , 0)]);
// ivec4 p5 = separate(read_buffer.buf[get_idx(-1, 0)]);
// ivec4 p6 = separate(read_buffer.buf[get_idx(1 ,-1)]);
// ivec4 p7 = separate(read_buffer.buf[get_idx(-1, 1)]);
//
// ivec3 d0 = abs(p0.xyz - p1.xyz);
// ivec3 d1 = abs(p2.xyz - p3.xyz);
// ivec3 d2 = abs(p4.xyz - p5.xyz);
// ivec3 d3 = abs(p6.xyz - p7.xyz);
//
// ivec3 m = max(max(max(d0, d1), d2), d3);
//
// if ((m.x + m.y + m.z) > 200){
// p.x = 0;
// p.y = 0;
// p.z = 255;
// }
// else {
//
// //p.w = 125;
// }
ivec4 p = separate(read_buffer.buf[get_idx(0 , 0)]);
ivec4 p0 = separate(read_buffer.buf[get_idx(0 , 1)]);
ivec4 p1 = separate(read_buffer.buf[get_idx(0 ,-1)]);
ivec4 p2 = separate(read_buffer.buf[get_idx(1 , 1)]);
ivec4 p3 = separate(read_buffer.buf[get_idx(-1,-1)]);
ivec4 p4 = separate(read_buffer.buf[get_idx(1 , 0)]);
ivec4 p5 = separate(read_buffer.buf[get_idx(-1, 0)]);
ivec4 p6 = separate(read_buffer.buf[get_idx(1 ,-1)]);
ivec4 p7 = separate(read_buffer.buf[get_idx(-1, 1)]);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x00FF0000) ) | (p.z << 16);
// write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0xFF000000) ) | (p.w << 24);
ivec3 d0 = abs(p0.xyz - p1.xyz);
ivec3 d1 = abs(p2.xyz - p3.xyz);
ivec3 d2 = abs(p4.xyz - p5.xyz);
ivec3 d3 = abs(p6.xyz - p7.xyz);
ivec3 m = max(max(max(d0, d1), d2), d3);
if ((m.x + m.y + m.z) > 200){
p.x = 0;
p.y = 0;
p.z = 255;
}
else {
//p.w = 125;
}
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8);
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0x00FF0000) ) | (p.z << 16);
write_buffer.buf[idx] = (write_buffer.buf[idx] & (~0xFF000000) ) | (p.w << 24);
}
// Just gonna keep this around
// read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x000000FF) ) | (p.x);
// read_buffer.buf[idx] = (read_buffer.buf[idx] & (~0x0000FF00) ) | (p.y << 8);