that was a pain. Got it working on windows again. MSVC was being really

picky about a few errors. Good thing though, I'm not really sure why clang + osx let me be that lax with memory
2016-09-03 19:23:50 -07:00
parent 9c99f9edd0
commit f487895f9f
6 changed files with 173 additions and 142 deletions
--- a/include/CL_Wrapper.h
+++ b/include/CL_Wrapper.h
@@ -25,6 +25,7 @@ struct device {
    cl_uint clock_frequency;
    char version[128];
    cl_platform_id platform;
+	cl_uint comp_units;
 };

 class CL_Wrapper {
--- a/kernels/kernel.cl
+++ b/kernels/kernel.cl
@@ -1,125 +1,125 @@
-// global  :  local  :  constant  :  private
-
-// Function arguments of type image2d_t, image3d_t, image2d_array_t, image1d_t, image1d_buffer_t,
-// and image1d_array_t refer to image memory objects allocated in the **global** address space.
-
-// http://downloads.ti.com/mctools/esd/docs/opencl/memory/buffers.html
-
-// Open CL C
-// https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/opencl-c/
-
-__kernel void hello(
-    global int2* resolution,
-    global char* map,
-    global float3* projection_matrix,
-    global float3* cam_dir,
-    global float3* cam_pos,
-    global image2d_t* canvas) {
-
-    printf("%s\n", "this is a test string\n");
-
-
-
-    const int MAX_RAY_STEPS = 64;
-
-    // The pixel coord we are at
-    int2 screenPos = (int2)(get_global_id(0) % resolution->x, get_global_id(0) / resolution->x);
-
-    // The X and Y planes
-    //float3 cameraPlaneU = vec3(1.0, 0.0, 0.0)
-
-    // Y being multiplied by the aspect ratio, usually around .5-6ish;
-    //cl_float3 cameraPlaneV = vec3(0.0, 1.0, 0.0) * iResolution.y / iResolution.x;
-
-    // So this is how they do that ray aiming! hah this is so tiny
-    // (camera direction) + (pixel.x * the X plane) + (product of pixel.y * Y plane)
-    // Oh all it's doing is adding the x and y coords of the pixel to the camera direction vector, interesting
-
-    //cl_float3 rayDir = cameraDir + screenPos.x * cameraPlaneU + screenPos.y * cameraPlaneV;
-
-    // the origin of the ray
-    // So the sign thing is for the up and down motion
-
-    //cl_float3 rayPos = vec3(0.0, 2.0 * sin(iGlobalTime * 2.7), -12.0);
-
-    // Ah, and here is where it spins around the center axis
-    // So it looks like its applying a function to rotate the x and z axis
-    //rayPos.xz = rotate2d(rayPos.xz, iGlobalTime);
-    //rayDir.xz = rotate2d(rayDir.xz, iGlobalTime);
-
-    // Just an intvec of out coords
-    //ivec3 mapPos = ivec3(floor(rayPos));
-
-    // I think this is the delta t value
-    // the magnitude of the vector divided by the rays direction. Not sure what the aim of that is
-    // The ray direction might always be normalized, so that would be the dame as my delta_T
-    //vec3 deltaDist = abs(vec3(length(rayDir)) / rayDir);
-
-    // The steps are the signs of the ray direction
-    //ivec3 rayStep = ivec3(sign(rayDir));
-
-    //  ithe sign of the rays direction
-    //  *
-    //	Convert map position to a floating point vector and take away the ray position
-    //  +
-    //  the sign of the rays direction by 0.5
-    //  +
-    //  0.5
-    //  Now multyply everything by 0.5
-    //vec3 sideDist = (sign(rayDir) * (vec3(mapPos) - rayPos) + (sign(rayDir) * 0.5) + 0.5) * deltaDist;
-
-    // A byte mask
-    //bvec3 mask;
-
-    // repeat until the max steps
-    //for (int i = 0; i < MAX_RAY_STEPS; i++) {
-
-            // If there is a voxel at the map position, continue?
-            //if (getVoxel(mapPos))
-            //        break;
-
-            //
-            // find which is smaller
-            // y ? z --> x`
-            // z ? x --> y`
-            // x ? y --> z`
-            //
-            // find which os is less or equal
-            // x` ? x --> x
-            // y` ? y --> y
-            // z` ? z --> z
-
-            // Now find which ons is
-            //mask = lessThanEqual(sideDist.xyz, min(sideDist.yzx, sideDist.zxy));
-
-
-            // Originally he used a component wise
-            /*bvec3 b1 = lessThan(sideDist.xyz, sideDist.yzx);
-            bvec3 b2 = lessThanEqual(sideDist.xyz, sideDist.zxy);
-            mask.x = b1.x && b2.x;
-            mask.y = b1.y && b2.y;
-            mask.z = b1.z && b2.z;*/
-            //Would've done mask = b1 && b2 but the compiler is making me do it component wise.
-
-            //All components of mask are false except for the corresponding largest component
-            //of sideDist, which is the axis along which the ray should be incremented.
-
-            //sideDist += vec3(mask) * deltaDist;
-            //mapPos += ivec3(mask) * rayStep;
-    //}
-
-    // Ah this is for coloring obviously, seems to be odd though, no indexing
-    //vec4 color;
-    //if (mask.x) {
-    //       color = vec4(0.5);
-    //}
-    //if (mask.y) {
-    //       color = vec4(1.0);
-    //}
-    //if (mask.z) {
-    //        color = vec4(0.75);
-    //}
-    //write_imagef(image, pixel, color);
-
-
+// global  :  local  :  constant  :  private
+
+// Function arguments of type image2d_t, image3d_t, image2d_array_t, image1d_t, image1d_buffer_t,
+// and image1d_array_t refer to image memory objects allocated in the **global** address space.
+
+// http://downloads.ti.com/mctools/esd/docs/opencl/memory/buffers.html
+
+// Open CL C
+// https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/opencl-c/
+
+__kernel void hello(
+    global int2* resolution,
+    global char* map,
+    global float3* projection_matrix,
+    global float3* cam_dir,
+    global float3* cam_pos,
+    global image2d_t* canvas) {
+
+    printf("%s\n", "this is a test string\n");
+
+
+
+    const int MAX_RAY_STEPS = 64;
+
+    // The pixel coord we are at
+    int2 screenPos = (int2)(get_global_id(0) % resolution->x, get_global_id(0) / resolution->x);
+
+    // The X and Y planes
+    //float3 cameraPlaneU = vec3(1.0, 0.0, 0.0)
+
+    // Y being multiplied by the aspect ratio, usually around .5-6ish;
+    //cl_float3 cameraPlaneV = vec3(0.0, 1.0, 0.0) * iResolution.y / iResolution.x;
+
+    // So this is how they do that ray aiming! hah this is so tiny
+    // (camera direction) + (pixel.x * the X plane) + (product of pixel.y * Y plane)
+    // Oh all it's doing is adding the x and y coords of the pixel to the camera direction vector, interesting
+
+    //cl_float3 rayDir = cameraDir + screenPos.x * cameraPlaneU + screenPos.y * cameraPlaneV;
+
+    // the origin of the ray
+    // So the sign thing is for the up and down motion
+
+    //cl_float3 rayPos = vec3(0.0, 2.0 * sin(iGlobalTime * 2.7), -12.0);
+
+    // Ah, and here is where it spins around the center axis
+    // So it looks like its applying a function to rotate the x and z axis
+    //rayPos.xz = rotate2d(rayPos.xz, iGlobalTime);
+    //rayDir.xz = rotate2d(rayDir.xz, iGlobalTime);
+
+    // Just an intvec of out coords
+    //ivec3 mapPos = ivec3(floor(rayPos));
+
+    // I think this is the delta t value
+    // the magnitude of the vector divided by the rays direction. Not sure what the aim of that is
+    // The ray direction might always be normalized, so that would be the dame as my delta_T
+    //vec3 deltaDist = abs(vec3(length(rayDir)) / rayDir);
+
+    // The steps are the signs of the ray direction
+    //ivec3 rayStep = ivec3(sign(rayDir));
+
+    //  ithe sign of the rays direction
+    //  *
+    //	Convert map position to a floating point vector and take away the ray position
+    //  +
+    //  the sign of the rays direction by 0.5
+    //  +
+    //  0.5
+    //  Now multyply everything by 0.5
+    //vec3 sideDist = (sign(rayDir) * (vec3(mapPos) - rayPos) + (sign(rayDir) * 0.5) + 0.5) * deltaDist;
+
+    // A byte mask
+    //bvec3 mask;
+
+    // repeat until the max steps
+    //for (int i = 0; i < MAX_RAY_STEPS; i++) {
+
+            // If there is a voxel at the map position, continue?
+            //if (getVoxel(mapPos))
+            //        break;
+
+            //
+            // find which is smaller
+            // y ? z --> x`
+            // z ? x --> y`
+            // x ? y --> z`
+            //
+            // find which os is less or equal
+            // x` ? x --> x
+            // y` ? y --> y
+            // z` ? z --> z
+
+            // Now find which ons is
+            //mask = lessThanEqual(sideDist.xyz, min(sideDist.yzx, sideDist.zxy));
+
+
+            // Originally he used a component wise
+            /*bvec3 b1 = lessThan(sideDist.xyz, sideDist.yzx);
+            bvec3 b2 = lessThanEqual(sideDist.xyz, sideDist.zxy);
+            mask.x = b1.x && b2.x;
+            mask.y = b1.y && b2.y;
+            mask.z = b1.z && b2.z;*/
+            //Would've done mask = b1 && b2 but the compiler is making me do it component wise.
+
+            //All components of mask are false except for the corresponding largest component
+            //of sideDist, which is the axis along which the ray should be incremented.
+
+            //sideDist += vec3(mask) * deltaDist;
+            //mapPos += ivec3(mask) * rayStep;
+    //}
+
+    // Ah this is for coloring obviously, seems to be odd though, no indexing
+    //vec4 color;
+    //if (mask.x) {
+    //       color = vec4(0.5);
+    //}
+    //if (mask.y) {
+    //       color = vec4(1.0);
+    //}
+    //if (mask.z) {
+    //        color = vec4(0.75);
+    //}
+    //write_imagef(image, pixel, color);
+
+
 }
--- a/kernels/minimal_kernel.cl
+++ b/kernels/minimal_kernel.cl
@@ -48,17 +48,17 @@ __kernel void min_kern(

    // Setup the voxel coords from the camera origin
    int3 voxel = {
-            floorf(cam_pos->x),
-            floorf(cam_pos->y),
-            floorf(cam_pos->z)
+            floor(cam_pos->x),
+            floor(cam_pos->y),
+            floor(cam_pos->z)
    };

    // Delta T is the units a ray must travel along an axis in order to
    // traverse an integer split
    float3 delta_t = {
-            fabsf(1.0f / ray_dir.x),
-            fabsf(1.0f / ray_dir.y),
-            fabsf(1.0f / ray_dir.z)
+            fabs(1.0f / ray_dir.x),
+            fabs(1.0f / ray_dir.y),
+            fabs(1.0f / ray_dir.z)
    };

    // Intersection T is the collection of the next intersection points
--- a/src/CL_Wrapper.cpp
+++ b/src/CL_Wrapper.cpp
@@ -44,10 +44,11 @@ int CL_Wrapper::acquire_platform_and_device(){

            d.id = deviceIds[q];

-            clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, 128, &d.platform, NULL);
-            clGetDeviceInfo(d.id, CL_DEVICE_VERSION, 128, &d.version, NULL);
-            clGetDeviceInfo(d.id, CL_DEVICE_TYPE, 128, &d.type, NULL);
-            clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, 128, &d.clock_frequency, NULL);
+            clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &d.platform, NULL);
+            clGetDeviceInfo(d.id, CL_DEVICE_VERSION, sizeof(char) * 128, &d.version, NULL);
+            clGetDeviceInfo(d.id, CL_DEVICE_TYPE, sizeof(cl_device_type), &d.type, NULL);
+            clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &d.clock_frequency, NULL);
+			clGetDeviceInfo(d.id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &d.comp_units, NULL);

            plt_ids.at(d.platform).push_back(d);
        }
@@ -58,7 +59,8 @@ int CL_Wrapper::acquire_platform_and_device(){
    // falling back to the cpu with the fastest clock if we weren't able to find one

    device current_best_device;
-    current_best_device.clock_frequency = 0; // Set this to 0 so the first run always selects a new device
+	current_best_device.type = -1; // Set this to -1 so the first run always selects a new device
+

    for (auto kvp: plt_ids){

@@ -72,7 +74,10 @@ int CL_Wrapper::acquire_platform_and_device(){
            if (device.type == CL_DEVICE_TYPE_GPU && current_best_device.type != CL_DEVICE_TYPE_GPU){
                current_best_device = device;
            }
-            else if (device.clock_frequency > current_best_device.clock_frequency){
+			else if (device.comp_units > current_best_device.comp_units) {
+				current_best_device = device;
+			}
+            else if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency){
                current_best_device = device;
            }
        }
@@ -108,7 +113,7 @@ int CL_Wrapper::create_shared_context() {
 	//};
 	HGLRC hGLRC = wglGetCurrentContext();
 	HDC hDC = wglGetCurrentDC();
-	cl_context_properties context_properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformIds[1], CL_GL_CONTEXT_KHR, (cl_context_properties)hGLRC, CL_WGL_HDC_KHR, (cl_context_properties)hDC, 0 };
+	cl_context_properties context_properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, CL_GL_CONTEXT_KHR, (cl_context_properties)hGLRC, CL_WGL_HDC_KHR, (cl_context_properties)hDC, 0 };


 #elif defined TARGET_OS_MAC
@@ -206,6 +211,8 @@ int CL_Wrapper::compile_kernel(std::string kernel_source, bool is_path, std::str
        return -1;

    kernel_map.emplace(std::make_pair(kernel_name, kernel));
+
+	return 1;
 }

 int CL_Wrapper::set_kernel_arg(
@@ -228,6 +235,7 @@ int CL_Wrapper::set_kernel_arg(

 int CL_Wrapper::store_buffer(cl_mem buffer, std::string buffer_name){
    buffer_map.emplace(std::make_pair(buffer_name, buffer));
+	return 1;
 }

 int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){
@@ -246,7 +254,7 @@ int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){
    if (assert(error, "clEnqueueNDRangeKernel"))
        return -1;

-
+	return 1;
 }


--- a/src/TestPlatform.cpp
+++ b/src/TestPlatform.cpp
@@ -1,13 +1,16 @@
 #pragma once
 #include <cstdio>
 #include <cstring>
-#include <OpenCL/cl_ext.h>
 #include <iostream>
 #include <vector>

 #ifdef linux

 #elif defined _WIN32
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+#include <CL/cl_gl.h>
+#include <CL/cl.h>
+#include <CL/opencl.h>

 #elif defined TARGET_OS_MAC
 # include <OpenGL/OpenGL.h>
@@ -111,6 +114,21 @@ inline int query_platform_devices() {

 			clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 128, buf, NULL);
 			fprintf(stdout, "%s\n", buf);
+
+
+			//cl_device_type a;
+			//clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, 128, &a, NULL);
+			//std::cout << a << std::endl;
+
+			//cl_uint b;
+			//clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, 128, &b, NULL);
+			//std::cout << b << std::endl;
+
+			//cl_uint c;
+			//clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 128, &c, NULL);
+			//std::cout << c << std::endl;
+			
+			std::cout << devices[i] << std::endl;
 		}

 		free(devices);
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -9,9 +9,12 @@
 #include <CL/opencl.h>

 #elif defined _WIN32
+#include <windows.h>
 #include <CL/cl_gl.h>
 #include <CL/cl.h>
 #include <CL/opencl.h>
+#include <GL/GL.h>
+
 #include <windows.h>

 #elif defined TARGET_OS_MAC
@@ -66,12 +69,13 @@ int main() {
    sf::Texture t;

    CL_Wrapper c;
+	query_platform_devices();
    c.acquire_platform_and_device();
    c.create_shared_context();
    c.create_command_queue();

-    c.compile_kernel("../kernels/kernel.c", true, "hello");
-    c.compile_kernel("../kernels/minimal_kernel.c", true, "min_kern");
+    //c.compile_kernel("../kernels/kernel.cl", true, "hello");
+	c.compile_kernel("../kernels/minimal_kernel.cl", true, "min_kern");

    sf::Vector3i map_dim(MAP_X, MAP_Y, MAP_Z);
    Map* map = new Map(map_dim);
@@ -101,7 +105,7 @@ int main() {

    // SFML 2.4 has Vector4 datatypes.......

-    float view_matrix[view_res.x * view_res.y * 4];
+    float* view_matrix = new float[WINDOW_X * WINDOW_Y * 4];
    for (int y = -view_res.y / 2; y < view_res.y / 2; y++) {
        for (int x = -view_res.x / 2; x < view_res.x / 2; x++) {