that was a pain. Got it working on windows again. MSVC was being really
picky about a few errors. Good thing though, I'm not really sure why clang + osx let me be that lax with memory
This commit is contained in:
@@ -25,6 +25,7 @@ struct device {
|
|||||||
cl_uint clock_frequency;
|
cl_uint clock_frequency;
|
||||||
char version[128];
|
char version[128];
|
||||||
cl_platform_id platform;
|
cl_platform_id platform;
|
||||||
|
cl_uint comp_units;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CL_Wrapper {
|
class CL_Wrapper {
|
||||||
|
|||||||
@@ -1,125 +1,125 @@
|
|||||||
// global : local : constant : private
|
// global : local : constant : private
|
||||||
|
|
||||||
// Function arguments of type image2d_t, image3d_t, image2d_array_t, image1d_t, image1d_buffer_t,
|
// Function arguments of type image2d_t, image3d_t, image2d_array_t, image1d_t, image1d_buffer_t,
|
||||||
// and image1d_array_t refer to image memory objects allocated in the **global** address space.
|
// and image1d_array_t refer to image memory objects allocated in the **global** address space.
|
||||||
|
|
||||||
// http://downloads.ti.com/mctools/esd/docs/opencl/memory/buffers.html
|
// http://downloads.ti.com/mctools/esd/docs/opencl/memory/buffers.html
|
||||||
|
|
||||||
// Open CL C
|
// Open CL C
|
||||||
// https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/opencl-c/
|
// https://www.fixstars.com/en/opencl/book/OpenCLProgrammingBook/opencl-c/
|
||||||
|
|
||||||
__kernel void hello(
|
__kernel void hello(
|
||||||
global int2* resolution,
|
global int2* resolution,
|
||||||
global char* map,
|
global char* map,
|
||||||
global float3* projection_matrix,
|
global float3* projection_matrix,
|
||||||
global float3* cam_dir,
|
global float3* cam_dir,
|
||||||
global float3* cam_pos,
|
global float3* cam_pos,
|
||||||
global image2d_t* canvas) {
|
global image2d_t* canvas) {
|
||||||
|
|
||||||
printf("%s\n", "this is a test string\n");
|
printf("%s\n", "this is a test string\n");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const int MAX_RAY_STEPS = 64;
|
const int MAX_RAY_STEPS = 64;
|
||||||
|
|
||||||
// The pixel coord we are at
|
// The pixel coord we are at
|
||||||
int2 screenPos = (int2)(get_global_id(0) % resolution->x, get_global_id(0) / resolution->x);
|
int2 screenPos = (int2)(get_global_id(0) % resolution->x, get_global_id(0) / resolution->x);
|
||||||
|
|
||||||
// The X and Y planes
|
// The X and Y planes
|
||||||
//float3 cameraPlaneU = vec3(1.0, 0.0, 0.0)
|
//float3 cameraPlaneU = vec3(1.0, 0.0, 0.0)
|
||||||
|
|
||||||
// Y being multiplied by the aspect ratio, usually around .5-6ish;
|
// Y being multiplied by the aspect ratio, usually around .5-6ish;
|
||||||
//cl_float3 cameraPlaneV = vec3(0.0, 1.0, 0.0) * iResolution.y / iResolution.x;
|
//cl_float3 cameraPlaneV = vec3(0.0, 1.0, 0.0) * iResolution.y / iResolution.x;
|
||||||
|
|
||||||
// So this is how they do that ray aiming! hah this is so tiny
|
// So this is how they do that ray aiming! hah this is so tiny
|
||||||
// (camera direction) + (pixel.x * the X plane) + (product of pixel.y * Y plane)
|
// (camera direction) + (pixel.x * the X plane) + (product of pixel.y * Y plane)
|
||||||
// Oh all it's doing is adding the x and y coords of the pixel to the camera direction vector, interesting
|
// Oh all it's doing is adding the x and y coords of the pixel to the camera direction vector, interesting
|
||||||
|
|
||||||
//cl_float3 rayDir = cameraDir + screenPos.x * cameraPlaneU + screenPos.y * cameraPlaneV;
|
//cl_float3 rayDir = cameraDir + screenPos.x * cameraPlaneU + screenPos.y * cameraPlaneV;
|
||||||
|
|
||||||
// the origin of the ray
|
// the origin of the ray
|
||||||
// So the sign thing is for the up and down motion
|
// So the sign thing is for the up and down motion
|
||||||
|
|
||||||
//cl_float3 rayPos = vec3(0.0, 2.0 * sin(iGlobalTime * 2.7), -12.0);
|
//cl_float3 rayPos = vec3(0.0, 2.0 * sin(iGlobalTime * 2.7), -12.0);
|
||||||
|
|
||||||
// Ah, and here is where it spins around the center axis
|
// Ah, and here is where it spins around the center axis
|
||||||
// So it looks like its applying a function to rotate the x and z axis
|
// So it looks like its applying a function to rotate the x and z axis
|
||||||
//rayPos.xz = rotate2d(rayPos.xz, iGlobalTime);
|
//rayPos.xz = rotate2d(rayPos.xz, iGlobalTime);
|
||||||
//rayDir.xz = rotate2d(rayDir.xz, iGlobalTime);
|
//rayDir.xz = rotate2d(rayDir.xz, iGlobalTime);
|
||||||
|
|
||||||
// Just an intvec of out coords
|
// Just an intvec of out coords
|
||||||
//ivec3 mapPos = ivec3(floor(rayPos));
|
//ivec3 mapPos = ivec3(floor(rayPos));
|
||||||
|
|
||||||
// I think this is the delta t value
|
// I think this is the delta t value
|
||||||
// the magnitude of the vector divided by the rays direction. Not sure what the aim of that is
|
// the magnitude of the vector divided by the rays direction. Not sure what the aim of that is
|
||||||
// The ray direction might always be normalized, so that would be the dame as my delta_T
|
// The ray direction might always be normalized, so that would be the dame as my delta_T
|
||||||
//vec3 deltaDist = abs(vec3(length(rayDir)) / rayDir);
|
//vec3 deltaDist = abs(vec3(length(rayDir)) / rayDir);
|
||||||
|
|
||||||
// The steps are the signs of the ray direction
|
// The steps are the signs of the ray direction
|
||||||
//ivec3 rayStep = ivec3(sign(rayDir));
|
//ivec3 rayStep = ivec3(sign(rayDir));
|
||||||
|
|
||||||
// ithe sign of the rays direction
|
// ithe sign of the rays direction
|
||||||
// *
|
// *
|
||||||
// Convert map position to a floating point vector and take away the ray position
|
// Convert map position to a floating point vector and take away the ray position
|
||||||
// +
|
// +
|
||||||
// the sign of the rays direction by 0.5
|
// the sign of the rays direction by 0.5
|
||||||
// +
|
// +
|
||||||
// 0.5
|
// 0.5
|
||||||
// Now multyply everything by 0.5
|
// Now multyply everything by 0.5
|
||||||
//vec3 sideDist = (sign(rayDir) * (vec3(mapPos) - rayPos) + (sign(rayDir) * 0.5) + 0.5) * deltaDist;
|
//vec3 sideDist = (sign(rayDir) * (vec3(mapPos) - rayPos) + (sign(rayDir) * 0.5) + 0.5) * deltaDist;
|
||||||
|
|
||||||
// A byte mask
|
// A byte mask
|
||||||
//bvec3 mask;
|
//bvec3 mask;
|
||||||
|
|
||||||
// repeat until the max steps
|
// repeat until the max steps
|
||||||
//for (int i = 0; i < MAX_RAY_STEPS; i++) {
|
//for (int i = 0; i < MAX_RAY_STEPS; i++) {
|
||||||
|
|
||||||
// If there is a voxel at the map position, continue?
|
// If there is a voxel at the map position, continue?
|
||||||
//if (getVoxel(mapPos))
|
//if (getVoxel(mapPos))
|
||||||
// break;
|
// break;
|
||||||
|
|
||||||
//
|
//
|
||||||
// find which is smaller
|
// find which is smaller
|
||||||
// y ? z --> x`
|
// y ? z --> x`
|
||||||
// z ? x --> y`
|
// z ? x --> y`
|
||||||
// x ? y --> z`
|
// x ? y --> z`
|
||||||
//
|
//
|
||||||
// find which os is less or equal
|
// find which os is less or equal
|
||||||
// x` ? x --> x
|
// x` ? x --> x
|
||||||
// y` ? y --> y
|
// y` ? y --> y
|
||||||
// z` ? z --> z
|
// z` ? z --> z
|
||||||
|
|
||||||
// Now find which ons is
|
// Now find which ons is
|
||||||
//mask = lessThanEqual(sideDist.xyz, min(sideDist.yzx, sideDist.zxy));
|
//mask = lessThanEqual(sideDist.xyz, min(sideDist.yzx, sideDist.zxy));
|
||||||
|
|
||||||
|
|
||||||
// Originally he used a component wise
|
// Originally he used a component wise
|
||||||
/*bvec3 b1 = lessThan(sideDist.xyz, sideDist.yzx);
|
/*bvec3 b1 = lessThan(sideDist.xyz, sideDist.yzx);
|
||||||
bvec3 b2 = lessThanEqual(sideDist.xyz, sideDist.zxy);
|
bvec3 b2 = lessThanEqual(sideDist.xyz, sideDist.zxy);
|
||||||
mask.x = b1.x && b2.x;
|
mask.x = b1.x && b2.x;
|
||||||
mask.y = b1.y && b2.y;
|
mask.y = b1.y && b2.y;
|
||||||
mask.z = b1.z && b2.z;*/
|
mask.z = b1.z && b2.z;*/
|
||||||
//Would've done mask = b1 && b2 but the compiler is making me do it component wise.
|
//Would've done mask = b1 && b2 but the compiler is making me do it component wise.
|
||||||
|
|
||||||
//All components of mask are false except for the corresponding largest component
|
//All components of mask are false except for the corresponding largest component
|
||||||
//of sideDist, which is the axis along which the ray should be incremented.
|
//of sideDist, which is the axis along which the ray should be incremented.
|
||||||
|
|
||||||
//sideDist += vec3(mask) * deltaDist;
|
//sideDist += vec3(mask) * deltaDist;
|
||||||
//mapPos += ivec3(mask) * rayStep;
|
//mapPos += ivec3(mask) * rayStep;
|
||||||
//}
|
//}
|
||||||
|
|
||||||
// Ah this is for coloring obviously, seems to be odd though, no indexing
|
// Ah this is for coloring obviously, seems to be odd though, no indexing
|
||||||
//vec4 color;
|
//vec4 color;
|
||||||
//if (mask.x) {
|
//if (mask.x) {
|
||||||
// color = vec4(0.5);
|
// color = vec4(0.5);
|
||||||
//}
|
//}
|
||||||
//if (mask.y) {
|
//if (mask.y) {
|
||||||
// color = vec4(1.0);
|
// color = vec4(1.0);
|
||||||
//}
|
//}
|
||||||
//if (mask.z) {
|
//if (mask.z) {
|
||||||
// color = vec4(0.75);
|
// color = vec4(0.75);
|
||||||
//}
|
//}
|
||||||
//write_imagef(image, pixel, color);
|
//write_imagef(image, pixel, color);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -48,17 +48,17 @@ __kernel void min_kern(
|
|||||||
|
|
||||||
// Setup the voxel coords from the camera origin
|
// Setup the voxel coords from the camera origin
|
||||||
int3 voxel = {
|
int3 voxel = {
|
||||||
floorf(cam_pos->x),
|
floor(cam_pos->x),
|
||||||
floorf(cam_pos->y),
|
floor(cam_pos->y),
|
||||||
floorf(cam_pos->z)
|
floor(cam_pos->z)
|
||||||
};
|
};
|
||||||
|
|
||||||
// Delta T is the units a ray must travel along an axis in order to
|
// Delta T is the units a ray must travel along an axis in order to
|
||||||
// traverse an integer split
|
// traverse an integer split
|
||||||
float3 delta_t = {
|
float3 delta_t = {
|
||||||
fabsf(1.0f / ray_dir.x),
|
fabs(1.0f / ray_dir.x),
|
||||||
fabsf(1.0f / ray_dir.y),
|
fabs(1.0f / ray_dir.y),
|
||||||
fabsf(1.0f / ray_dir.z)
|
fabs(1.0f / ray_dir.z)
|
||||||
};
|
};
|
||||||
|
|
||||||
// Intersection T is the collection of the next intersection points
|
// Intersection T is the collection of the next intersection points
|
||||||
@@ -44,10 +44,11 @@ int CL_Wrapper::acquire_platform_and_device(){
|
|||||||
|
|
||||||
d.id = deviceIds[q];
|
d.id = deviceIds[q];
|
||||||
|
|
||||||
clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, 128, &d.platform, NULL);
|
clGetDeviceInfo(d.id, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &d.platform, NULL);
|
||||||
clGetDeviceInfo(d.id, CL_DEVICE_VERSION, 128, &d.version, NULL);
|
clGetDeviceInfo(d.id, CL_DEVICE_VERSION, sizeof(char) * 128, &d.version, NULL);
|
||||||
clGetDeviceInfo(d.id, CL_DEVICE_TYPE, 128, &d.type, NULL);
|
clGetDeviceInfo(d.id, CL_DEVICE_TYPE, sizeof(cl_device_type), &d.type, NULL);
|
||||||
clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, 128, &d.clock_frequency, NULL);
|
clGetDeviceInfo(d.id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &d.clock_frequency, NULL);
|
||||||
|
clGetDeviceInfo(d.id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &d.comp_units, NULL);
|
||||||
|
|
||||||
plt_ids.at(d.platform).push_back(d);
|
plt_ids.at(d.platform).push_back(d);
|
||||||
}
|
}
|
||||||
@@ -58,7 +59,8 @@ int CL_Wrapper::acquire_platform_and_device(){
|
|||||||
// falling back to the cpu with the fastest clock if we weren't able to find one
|
// falling back to the cpu with the fastest clock if we weren't able to find one
|
||||||
|
|
||||||
device current_best_device;
|
device current_best_device;
|
||||||
current_best_device.clock_frequency = 0; // Set this to 0 so the first run always selects a new device
|
current_best_device.type = -1; // Set this to -1 so the first run always selects a new device
|
||||||
|
|
||||||
|
|
||||||
for (auto kvp: plt_ids){
|
for (auto kvp: plt_ids){
|
||||||
|
|
||||||
@@ -72,7 +74,10 @@ int CL_Wrapper::acquire_platform_and_device(){
|
|||||||
if (device.type == CL_DEVICE_TYPE_GPU && current_best_device.type != CL_DEVICE_TYPE_GPU){
|
if (device.type == CL_DEVICE_TYPE_GPU && current_best_device.type != CL_DEVICE_TYPE_GPU){
|
||||||
current_best_device = device;
|
current_best_device = device;
|
||||||
}
|
}
|
||||||
else if (device.clock_frequency > current_best_device.clock_frequency){
|
else if (device.comp_units > current_best_device.comp_units) {
|
||||||
|
current_best_device = device;
|
||||||
|
}
|
||||||
|
else if (current_best_device.type != CL_DEVICE_TYPE_GPU && device.clock_frequency > current_best_device.clock_frequency){
|
||||||
current_best_device = device;
|
current_best_device = device;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -108,7 +113,7 @@ int CL_Wrapper::create_shared_context() {
|
|||||||
//};
|
//};
|
||||||
HGLRC hGLRC = wglGetCurrentContext();
|
HGLRC hGLRC = wglGetCurrentContext();
|
||||||
HDC hDC = wglGetCurrentDC();
|
HDC hDC = wglGetCurrentDC();
|
||||||
cl_context_properties context_properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformIds[1], CL_GL_CONTEXT_KHR, (cl_context_properties)hGLRC, CL_WGL_HDC_KHR, (cl_context_properties)hDC, 0 };
|
cl_context_properties context_properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, CL_GL_CONTEXT_KHR, (cl_context_properties)hGLRC, CL_WGL_HDC_KHR, (cl_context_properties)hDC, 0 };
|
||||||
|
|
||||||
|
|
||||||
#elif defined TARGET_OS_MAC
|
#elif defined TARGET_OS_MAC
|
||||||
@@ -206,6 +211,8 @@ int CL_Wrapper::compile_kernel(std::string kernel_source, bool is_path, std::str
|
|||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
kernel_map.emplace(std::make_pair(kernel_name, kernel));
|
kernel_map.emplace(std::make_pair(kernel_name, kernel));
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int CL_Wrapper::set_kernel_arg(
|
int CL_Wrapper::set_kernel_arg(
|
||||||
@@ -228,6 +235,7 @@ int CL_Wrapper::set_kernel_arg(
|
|||||||
|
|
||||||
int CL_Wrapper::store_buffer(cl_mem buffer, std::string buffer_name){
|
int CL_Wrapper::store_buffer(cl_mem buffer, std::string buffer_name){
|
||||||
buffer_map.emplace(std::make_pair(buffer_name, buffer));
|
buffer_map.emplace(std::make_pair(buffer_name, buffer));
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){
|
int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){
|
||||||
@@ -246,7 +254,7 @@ int CL_Wrapper::run_kernel(std::string kernel_name, const int work_size){
|
|||||||
if (assert(error, "clEnqueueNDRangeKernel"))
|
if (assert(error, "clEnqueueNDRangeKernel"))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <OpenCL/cl_ext.h>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#ifdef linux
|
#ifdef linux
|
||||||
|
|
||||||
#elif defined _WIN32
|
#elif defined _WIN32
|
||||||
|
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||||
|
#include <CL/cl_gl.h>
|
||||||
|
#include <CL/cl.h>
|
||||||
|
#include <CL/opencl.h>
|
||||||
|
|
||||||
#elif defined TARGET_OS_MAC
|
#elif defined TARGET_OS_MAC
|
||||||
# include <OpenGL/OpenGL.h>
|
# include <OpenGL/OpenGL.h>
|
||||||
@@ -111,6 +114,21 @@ inline int query_platform_devices() {
|
|||||||
|
|
||||||
clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 128, buf, NULL);
|
clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 128, buf, NULL);
|
||||||
fprintf(stdout, "%s\n", buf);
|
fprintf(stdout, "%s\n", buf);
|
||||||
|
|
||||||
|
|
||||||
|
//cl_device_type a;
|
||||||
|
//clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, 128, &a, NULL);
|
||||||
|
//std::cout << a << std::endl;
|
||||||
|
|
||||||
|
//cl_uint b;
|
||||||
|
//clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, 128, &b, NULL);
|
||||||
|
//std::cout << b << std::endl;
|
||||||
|
|
||||||
|
//cl_uint c;
|
||||||
|
//clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, 128, &c, NULL);
|
||||||
|
//std::cout << c << std::endl;
|
||||||
|
|
||||||
|
std::cout << devices[i] << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
free(devices);
|
free(devices);
|
||||||
|
|||||||
10
src/main.cpp
10
src/main.cpp
@@ -9,9 +9,12 @@
|
|||||||
#include <CL/opencl.h>
|
#include <CL/opencl.h>
|
||||||
|
|
||||||
#elif defined _WIN32
|
#elif defined _WIN32
|
||||||
|
#include <windows.h>
|
||||||
#include <CL/cl_gl.h>
|
#include <CL/cl_gl.h>
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#include <CL/opencl.h>
|
#include <CL/opencl.h>
|
||||||
|
#include <GL/GL.h>
|
||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
|
||||||
#elif defined TARGET_OS_MAC
|
#elif defined TARGET_OS_MAC
|
||||||
@@ -66,12 +69,13 @@ int main() {
|
|||||||
sf::Texture t;
|
sf::Texture t;
|
||||||
|
|
||||||
CL_Wrapper c;
|
CL_Wrapper c;
|
||||||
|
query_platform_devices();
|
||||||
c.acquire_platform_and_device();
|
c.acquire_platform_and_device();
|
||||||
c.create_shared_context();
|
c.create_shared_context();
|
||||||
c.create_command_queue();
|
c.create_command_queue();
|
||||||
|
|
||||||
c.compile_kernel("../kernels/kernel.c", true, "hello");
|
//c.compile_kernel("../kernels/kernel.cl", true, "hello");
|
||||||
c.compile_kernel("../kernels/minimal_kernel.c", true, "min_kern");
|
c.compile_kernel("../kernels/minimal_kernel.cl", true, "min_kern");
|
||||||
|
|
||||||
sf::Vector3i map_dim(MAP_X, MAP_Y, MAP_Z);
|
sf::Vector3i map_dim(MAP_X, MAP_Y, MAP_Z);
|
||||||
Map* map = new Map(map_dim);
|
Map* map = new Map(map_dim);
|
||||||
@@ -101,7 +105,7 @@ int main() {
|
|||||||
|
|
||||||
// SFML 2.4 has Vector4 datatypes.......
|
// SFML 2.4 has Vector4 datatypes.......
|
||||||
|
|
||||||
float view_matrix[view_res.x * view_res.y * 4];
|
float* view_matrix = new float[WINDOW_X * WINDOW_Y * 4];
|
||||||
for (int y = -view_res.y / 2; y < view_res.y / 2; y++) {
|
for (int y = -view_res.y / 2; y < view_res.y / 2; y++) {
|
||||||
for (int x = -view_res.x / 2; x < view_res.x / 2; x++) {
|
for (int x = -view_res.x / 2; x < view_res.x / 2; x++) {
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user