Trying to find a way to make this as simd friendly as possible. Perhaps a kernel for calucation and then a kernel to "clean" the back buffer
This commit is contained in:
@@ -28,7 +28,7 @@ float elap_time() {
|
|||||||
return (float)((counter - start) / double(frequency));
|
return (float)((counter - start) / double(frequency));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* convert the kernel file into a string */
|
// convert the kernel file into a string
|
||||||
int convertToString(const char *filename, std::string& s)
|
int convertToString(const char *filename, std::string& s)
|
||||||
{
|
{
|
||||||
size_t size;
|
size_t size;
|
||||||
@@ -63,8 +63,8 @@ int main(int argc, char* argv[])
|
|||||||
{
|
{
|
||||||
int WINDOW_X = 1000;
|
int WINDOW_X = 1000;
|
||||||
int WINDOW_Y = 1000;
|
int WINDOW_Y = 1000;
|
||||||
int GRID_WIDTH = 1000;
|
int GRID_WIDTH = WINDOW_X;
|
||||||
int GRID_HEIGHT = 1000;
|
int GRID_HEIGHT = WINDOW_Y;
|
||||||
int WORKER_SIZE = 2000;
|
int WORKER_SIZE = 2000;
|
||||||
|
|
||||||
// ============================== OpenCL Setup ==================================================================
|
// ============================== OpenCL Setup ==================================================================
|
||||||
@@ -139,18 +139,19 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Now create the kernel
|
// Now create the kernel
|
||||||
cl_kernel kernel = clCreateKernel(program, "conway", NULL);
|
cl_kernel front_kernel = clCreateKernel(program, "conway", NULL);
|
||||||
|
cl_kernel back_kernel = clCreateKernel(program, "conway", NULL);
|
||||||
|
|
||||||
// ======================================= Setup grid =========================================================
|
// ======================================= Setup grid =========================================================
|
||||||
|
|
||||||
// Setup the rng
|
// Setup the rng
|
||||||
std::mt19937 rng(time(NULL));
|
std::mt19937 rng(time(NULL));
|
||||||
std::uniform_int_distribution<int> rgen(0, 12); // 25% chance
|
std::uniform_int_distribution<int> rgen(0, 4); // 25% chance
|
||||||
|
|
||||||
// Init the grids
|
// Init the grids
|
||||||
unsigned char* front_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT* 2];
|
unsigned char* front_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT];
|
||||||
|
|
||||||
for (int i = 0; i < 1000 * 1000; i += 2) {
|
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT; i++) {
|
||||||
if (rgen(rng) == 1) {
|
if (rgen(rng) == 1) {
|
||||||
front_grid[i] = 1;
|
front_grid[i] = 1;
|
||||||
}
|
}
|
||||||
@@ -159,31 +160,14 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned char* rear_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT * 2];
|
unsigned char* back_grid = new unsigned char[GRID_WIDTH * GRID_HEIGHT];
|
||||||
|
|
||||||
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT; i++) {
|
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT; i++) {
|
||||||
rear_grid[i] = front_grid[i];
|
back_grid[i] = front_grid[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// ====================================== Setup SFML ==========================================================
|
// ====================================== Setup SFML ==========================================================
|
||||||
|
|
||||||
sf::Uint8* asdf = rear_grid;
|
|
||||||
|
|
||||||
sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4];
|
|
||||||
|
|
||||||
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT * 2; i += 2) {
|
|
||||||
|
|
||||||
int p = i / 2;
|
|
||||||
|
|
||||||
pixel_array[p * 4] = 49; // R?
|
|
||||||
pixel_array[p * 4 + 1] = 68; // G?
|
|
||||||
pixel_array[p * 4 + 2] = 72; // B?
|
|
||||||
pixel_array[p * 4 + 3] = 255; // A?
|
|
||||||
}
|
|
||||||
|
|
||||||
char* arr = new char[1000 * 1000];
|
|
||||||
|
|
||||||
|
|
||||||
// Init window, and loop data
|
// Init window, and loop data
|
||||||
sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games");
|
sf::RenderWindow window(sf::VideoMode(GRID_WIDTH, GRID_HEIGHT), "Classic Games");
|
||||||
|
|
||||||
@@ -191,26 +175,51 @@ int main(int argc, char* argv[])
|
|||||||
double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0;
|
double frame_time = 0.0, elapsed_time = 0.0, delta_time = 0.0, accumulator_time = 0.0, current_time = 0.0;
|
||||||
int frame_count = 0;
|
int frame_count = 0;
|
||||||
|
|
||||||
|
sf::Uint8* pixel_array = new sf::Uint8[WINDOW_X * WINDOW_Y * 4];
|
||||||
|
|
||||||
int err = 0;
|
for (int i = 0; i < GRID_WIDTH * GRID_HEIGHT; i++) {
|
||||||
cl_mem frontBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)front_grid, &err);
|
|
||||||
cl_mem rearBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, &err);
|
|
||||||
|
|
||||||
cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err);
|
|
||||||
cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err);
|
|
||||||
cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err);
|
|
||||||
|
|
||||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&frontBuffer);
|
|
||||||
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&workerCountBuffer);
|
|
||||||
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&gridWidthBuffer);
|
|
||||||
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&gridHeightBuffer);
|
|
||||||
|
|
||||||
|
pixel_array[i * 4] = 49; // R?
|
||||||
|
pixel_array[i * 4 + 1] = 68; // G?
|
||||||
|
pixel_array[i * 4 + 2] = 72; // B?
|
||||||
|
pixel_array[i * 4 + 3] = 255; // A?
|
||||||
|
}
|
||||||
|
|
||||||
sf::Texture texture;
|
sf::Texture texture;
|
||||||
texture.create(WINDOW_X, WINDOW_Y);
|
texture.create(WINDOW_X, WINDOW_Y);
|
||||||
sf::Sprite sprite(texture);
|
sf::Sprite sprite(texture);
|
||||||
|
|
||||||
|
// ========================================= Setup the buffers ==================================================
|
||||||
|
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
cl_mem frontBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)front_grid, &err);
|
||||||
|
cl_mem backBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)back_grid, &err);
|
||||||
|
cl_mem pixelBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)pixel_array, &err);
|
||||||
|
|
||||||
|
cl_mem workerCountBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &WORKER_SIZE, &err);
|
||||||
|
cl_mem gridWidthBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_WIDTH, &err);
|
||||||
|
cl_mem gridHeightBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &GRID_HEIGHT, &err);
|
||||||
|
|
||||||
|
// Kernel args for front kernel
|
||||||
|
status = clSetKernelArg(front_kernel, 0, sizeof(cl_mem), (void *)&frontBuffer);
|
||||||
|
status = clSetKernelArg(front_kernel, 1, sizeof(cl_mem), (void *)&backBuffer);
|
||||||
|
status = clSetKernelArg(front_kernel, 2, sizeof(cl_mem), (void *)&pixelBuffer);
|
||||||
|
|
||||||
|
status = clSetKernelArg(front_kernel, 3, sizeof(cl_mem), (void *)&workerCountBuffer);
|
||||||
|
status = clSetKernelArg(front_kernel, 4, sizeof(cl_mem), (void *)&gridWidthBuffer);
|
||||||
|
status = clSetKernelArg(front_kernel, 5, sizeof(cl_mem), (void *)&gridHeightBuffer);
|
||||||
|
|
||||||
|
// Flipped kernel args for the back kernel
|
||||||
|
status = clSetKernelArg(back_kernel, 0, sizeof(cl_mem), (void *)&backBuffer); // Flipped
|
||||||
|
status = clSetKernelArg(back_kernel, 1, sizeof(cl_mem), (void *)&frontBuffer); // Flipped
|
||||||
|
status = clSetKernelArg(back_kernel, 2, sizeof(cl_mem), (void *)&pixelBuffer);
|
||||||
|
|
||||||
|
status = clSetKernelArg(back_kernel, 3, sizeof(cl_mem), (void *)&workerCountBuffer);
|
||||||
|
status = clSetKernelArg(back_kernel, 4, sizeof(cl_mem), (void *)&gridWidthBuffer);
|
||||||
|
status = clSetKernelArg(back_kernel, 5, sizeof(cl_mem), (void *)&gridHeightBuffer);
|
||||||
|
|
||||||
|
bool flipped = false;
|
||||||
// ===================================== Loop ==================================================================
|
// ===================================== Loop ==================================================================
|
||||||
while (window.isOpen()) {
|
while (window.isOpen()) {
|
||||||
|
|
||||||
@@ -241,12 +250,16 @@ int main(int argc, char* argv[])
|
|||||||
// Work size, for each y line
|
// Work size, for each y line
|
||||||
size_t global_work_size[1] = { WORKER_SIZE };
|
size_t global_work_size[1] = { WORKER_SIZE };
|
||||||
|
|
||||||
// Run the kernel
|
if (flipped) {
|
||||||
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
|
status = clEnqueueNDRangeKernel(commandQueue, back_kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
|
||||||
|
status = clEnqueueReadBuffer(commandQueue, pixelBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 4 * sizeof(unsigned char), (void*)pixel_array, 0, NULL, NULL);
|
||||||
// Get output, put back into grid
|
}
|
||||||
status = clEnqueueReadBuffer(commandQueue, frontBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * sizeof(char), (void*)rear_grid, 0, NULL, NULL);
|
else {
|
||||||
|
status = clEnqueueNDRangeKernel(commandQueue, front_kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
|
||||||
|
status = clEnqueueReadBuffer(commandQueue, pixelBuffer, CL_TRUE, 0, GRID_WIDTH * GRID_HEIGHT * 4 * sizeof(unsigned char), (void*)pixel_array, 0, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
flipped = !flipped;
|
||||||
|
|
||||||
texture.update(pixel_array);
|
texture.update(pixel_array);
|
||||||
window.draw(sprite);
|
window.draw(sprite);
|
||||||
@@ -257,21 +270,24 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Temporary
|
|
||||||
|
// Release the buffers
|
||||||
status = clReleaseMemObject(frontBuffer);
|
status = clReleaseMemObject(frontBuffer);
|
||||||
|
status = clReleaseMemObject(backBuffer);
|
||||||
|
status = clReleaseMemObject(pixelBuffer);
|
||||||
status = clReleaseMemObject(workerCountBuffer);
|
status = clReleaseMemObject(workerCountBuffer);
|
||||||
status = clReleaseMemObject(gridWidthBuffer);
|
status = clReleaseMemObject(gridWidthBuffer);
|
||||||
status = clReleaseMemObject(gridHeightBuffer);
|
status = clReleaseMemObject(gridHeightBuffer);
|
||||||
|
|
||||||
/*Step 12: Clean the resources.*/
|
// And the program stuff
|
||||||
status = clReleaseKernel(kernel); //Release kernel.
|
status = clReleaseKernel(front_kernel); //Release kernel.
|
||||||
status = clReleaseProgram(program); //Release the program object.
|
status = clReleaseProgram(program); //Release the program object.
|
||||||
status = clReleaseCommandQueue(commandQueue); //Release Command queue.
|
status = clReleaseCommandQueue(commandQueue); //Release Command queue.
|
||||||
status = clReleaseContext(context); //Release context.
|
status = clReleaseContext(context); //Release context.
|
||||||
|
|
||||||
if (devices != NULL)
|
if (devices != NULL)
|
||||||
{
|
{
|
||||||
free(devices);
|
delete devices;
|
||||||
devices = NULL;
|
devices = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -227,7 +227,8 @@
|
|||||||
<ClCompile Include="Conway.cpp" />
|
<ClCompile Include="Conway.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="conway_kernel.cl" />
|
<None Include="conway_compute.cl" />
|
||||||
|
<None Include="conway_align.cl" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
|||||||
7
Conway_OpenCL/conway_align.cl
Normal file
7
Conway_OpenCL/conway_align.cl
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
__kernel void conway(__global unsigned char* front_grid, __global unsigned char* rear_grid, __global unsigned char* pixel_out, __global int* num_workers, __global int* grid_width, __global int* grid_height){
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
61
Conway_OpenCL/conway_compute.cl
Normal file
61
Conway_OpenCL/conway_compute.cl
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
__kernel void conway(__global unsigned char* front_grid, __global unsigned char* rear_grid, __global unsigned char* pixel_out, __global int* num_workers, __global int* grid_width, __global int* grid_height)
|
||||||
|
{
|
||||||
|
// Caclulate the start and end range that this worker will be calculating
|
||||||
|
|
||||||
|
int data_length = *grid_width * *grid_height;
|
||||||
|
|
||||||
|
int start_range = (data_length / *num_workers) * get_global_id(0);
|
||||||
|
int end_range = (data_length / *num_workers) * (get_global_id(0) + 1);
|
||||||
|
|
||||||
|
// x, y + 1
|
||||||
|
|
||||||
|
int neighbors = 0;
|
||||||
|
|
||||||
|
for (int i = start_range; i < end_range; i++){
|
||||||
|
|
||||||
|
// add all 8 blocks to neighbors
|
||||||
|
neighbors = 0;
|
||||||
|
|
||||||
|
// Top
|
||||||
|
neighbors += front_grid[i - *grid_width];
|
||||||
|
|
||||||
|
// Top right
|
||||||
|
neighbors += front_grid[i - *grid_width + 1];
|
||||||
|
|
||||||
|
// Right
|
||||||
|
neighbors += front_grid[i + 1];
|
||||||
|
|
||||||
|
// Bottom Right
|
||||||
|
neighbors += front_grid[i + *grid_width + 1];
|
||||||
|
|
||||||
|
// Bottom
|
||||||
|
neighbors += front_grid[i + *grid_width];
|
||||||
|
|
||||||
|
// Bottom Left
|
||||||
|
neighbors += front_grid[i + *grid_width - 1];
|
||||||
|
|
||||||
|
// Left
|
||||||
|
neighbors += front_grid[i - 1];
|
||||||
|
|
||||||
|
// Top left
|
||||||
|
neighbors += front_grid[i - *grid_width - 1];
|
||||||
|
|
||||||
|
|
||||||
|
if (neighbors == 3 || (neighbors == 2 && front_grid[i])) {
|
||||||
|
rear_grid[i] = 1;
|
||||||
|
pixel_out[i * 4] = 255; // R
|
||||||
|
pixel_out[i * 4 + 1] = 255; // G
|
||||||
|
pixel_out[i * 4 + 2] = 255; // B
|
||||||
|
pixel_out[i * 4 + 3] = 255; // A
|
||||||
|
}
|
||||||
|
|
||||||
|
else {
|
||||||
|
rear_grid[i] = 0;
|
||||||
|
pixel_out[i * 4] = 49; // R
|
||||||
|
pixel_out[i * 4 + 1] = 68; // G
|
||||||
|
pixel_out[i * 4 + 2] = 72; // B
|
||||||
|
pixel_out[i * 4 + 3] = 255; // A
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
__kernel void conway(__global unsigned char* front_grid, __global unsigned char* rear_grid, __global int* num_workers, __global int* grid_width, __global int* grid_height)
|
|
||||||
{
|
|
||||||
// Caclulate the start and end range that this worker will be calculating
|
|
||||||
|
|
||||||
int data_length = *grid_width * *grid_height;
|
|
||||||
|
|
||||||
int start_range = (data_length / *num_workers) * get_global_id(0) * 2; // * 2 = padding
|
|
||||||
int end_range = (data_length / *num_workers) * (get_global_id(0) + 1) * 2;
|
|
||||||
|
|
||||||
// x, y + 1
|
|
||||||
|
|
||||||
int neighbors = 0;
|
|
||||||
|
|
||||||
for (int i = start_range; i < end_range; i += 2){
|
|
||||||
|
|
||||||
// add all 8 blocks to neghbors
|
|
||||||
neighbors = 0;
|
|
||||||
// Top
|
|
||||||
neighbors += in[i - *grid_width * 2];
|
|
||||||
|
|
||||||
// Top right
|
|
||||||
neighbors += in[i - *grid_width * 2 + 2];
|
|
||||||
|
|
||||||
// Right
|
|
||||||
neighbors += in[i + 2];
|
|
||||||
|
|
||||||
// Bottom Right
|
|
||||||
neighbors += in[i + *grid_width * 2 + 2];
|
|
||||||
|
|
||||||
// Bottom
|
|
||||||
neighbors += in[i + *grid_width * 2];
|
|
||||||
|
|
||||||
// Bottom Left
|
|
||||||
neighbors += in[i + *grid_width * 2 - 2];
|
|
||||||
|
|
||||||
// Left
|
|
||||||
neighbors += in[i - 2];
|
|
||||||
|
|
||||||
// Top left
|
|
||||||
neighbors += in[i - *grid_width * 2 - 2];
|
|
||||||
|
|
||||||
// push living status to the padded second char
|
|
||||||
|
|
||||||
if (neighbors == 3 || (neighbors == 2 && in[i])){
|
|
||||||
in[i + 1] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
else
|
|
||||||
in[i + 1] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -227,7 +227,8 @@
|
|||||||
<ClCompile Include="Conway.cpp" />
|
<ClCompile Include="Conway.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="conway_kernel.cl" />
|
<None Include="conway_compute.cl" />
|
||||||
|
<None Include="conway_align.cl" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
|||||||
Reference in New Issue
Block a user