~10 FPS from moving some oct stuff to const, ~0.5 fps from adding a few more consts to initializers in the kernel
This commit is contained in:
27
include/FrameWatcher.h
Normal file
27
include/FrameWatcher.h
Normal file
@@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
#include "Pub_Sub.h"
|
||||
|
||||
class FrameWatcher : public VrEventPublisher{
|
||||
|
||||
|
||||
|
||||
|
||||
public:
|
||||
FrameWatcher();
|
||||
~FrameWatcher();
|
||||
|
||||
void do_tick();
|
||||
|
||||
private:
|
||||
|
||||
float get_elapsed_time();
|
||||
|
||||
float step_size = 0.0166f;
|
||||
double frame_time = 0.0;
|
||||
double elapsed_time = 0.0;
|
||||
double delta_time = 0.0;
|
||||
double accumulator_time = 0.0;
|
||||
double current_time = 0.0;
|
||||
|
||||
|
||||
};
|
||||
@@ -4,8 +4,6 @@
|
||||
#include "util.hpp"
|
||||
#include <tuple>
|
||||
|
||||
#define OCT_DIM 128
|
||||
|
||||
struct OctState {
|
||||
|
||||
int parent_stack_position = 0;
|
||||
@@ -69,8 +67,11 @@ public:
|
||||
|
||||
bool Validate(char* data, sf::Vector3i dimensions);
|
||||
|
||||
unsigned int getDimensions();
|
||||
private:
|
||||
|
||||
unsigned int oct_dimensions = 1;
|
||||
|
||||
std::tuple<uint64_t, uint64_t> GenerationRecursion(
|
||||
char* data, // raw octree data
|
||||
sf::Vector3i dimensions, // dimensions of the raw data
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
|
||||
__constant float4 zeroed_float4 = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
__constant float3 zeroed_float3 = {0.0f, 0.0f, 0.0f};
|
||||
__constant float2 zeroed_float2 = {0.0f, 0.0f};
|
||||
__constant int4 zeroed_int4 = {0, 0, 0, 0};
|
||||
__constant int3 zeroed_int3 = {0, 0, 0};
|
||||
__constant int2 zeroed_int2 = {0, 0};
|
||||
|
||||
float DistanceBetweenPoints(float3 a, float3 b) {
|
||||
return fast_distance(a, b);
|
||||
//return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2) + pow(a.z - b.z, 2));
|
||||
@@ -31,8 +38,8 @@ float4 white_light(float4 input, float3 light, int3 mask) {
|
||||
|
||||
float4 view_light(float4 in_color, float3 light, float4 light_color, float3 view, int3 mask) {
|
||||
|
||||
if (all(light == (0.0f,0.0f,0.0f)))
|
||||
return (0,0,0,0);
|
||||
if (all(light == zeroed_float3))
|
||||
return zeroed_float4;
|
||||
|
||||
float d = Distance(light) / 100.0f;
|
||||
d *= d;
|
||||
@@ -65,6 +72,30 @@ int rand(int* seed) // 1 <= *seed < m
|
||||
return(*seed);
|
||||
}
|
||||
|
||||
// (X, Y, Z) mask for the idx
|
||||
__constant const uchar idx_set_x_mask = 0x1;
|
||||
__constant const uchar idx_set_y_mask = 0x2;
|
||||
__constant const uchar idx_set_z_mask = 0x4;
|
||||
|
||||
__constant const uchar mask_8[8] = {
|
||||
0x1, 0x2, 0x4, 0x8,
|
||||
0x10, 0x20, 0x40, 0x80
|
||||
};
|
||||
|
||||
// Mask for counting the previous valid bits
|
||||
__constant const uchar count_mask_8[8] = {
|
||||
0x1, 0x3, 0x7, 0xF,
|
||||
0x1F, 0x3F, 0x7F, 0xFF
|
||||
};
|
||||
|
||||
// uint64_t manipulation masks
|
||||
__constant const ulong child_pointer_mask = 0x0000000000007fff;
|
||||
__constant const ulong far_bit_mask = 0x8000;
|
||||
__constant const ulong valid_mask = 0xFF0000;
|
||||
__constant const ulong leaf_mask = 0xFF000000;
|
||||
__constant const ulong contour_pointer_mask = 0xFFFFFF00000000;
|
||||
__constant const ulong contour_mask = 0xFF00000000000000;
|
||||
|
||||
bool get_oct_vox(
|
||||
int3 position,
|
||||
global ulong *octree_descriptor_buffer,
|
||||
@@ -73,31 +104,6 @@ bool get_oct_vox(
|
||||
global ulong *settings_buffer
|
||||
){
|
||||
|
||||
// (X, Y, Z) mask for the idx
|
||||
const uchar idx_set_x_mask = 0x1;
|
||||
const uchar idx_set_y_mask = 0x2;
|
||||
const uchar idx_set_z_mask = 0x4;
|
||||
|
||||
const uchar mask_8[8] = {
|
||||
0x1, 0x2, 0x4, 0x8,
|
||||
0x10, 0x20, 0x40, 0x80
|
||||
};
|
||||
|
||||
// Mask for counting the previous valid bits
|
||||
const uchar count_mask_8[8] = {
|
||||
0x1, 0x3, 0x7, 0xF,
|
||||
0x1F, 0x3F, 0x7F, 0xFF
|
||||
};
|
||||
|
||||
// uint64_t manipulation masks
|
||||
const ulong child_pointer_mask = 0x0000000000007fff;
|
||||
const ulong far_bit_mask = 0x8000;
|
||||
const ulong valid_mask = 0xFF0000;
|
||||
const ulong leaf_mask = 0xFF000000;
|
||||
const ulong contour_pointer_mask = 0xFFFFFF00000000;
|
||||
const ulong contour_mask = 0xFF00000000000000;
|
||||
|
||||
|
||||
// push the root node to the parent stack
|
||||
ulong current_index = *settings_buffer;
|
||||
ulong head = octree_descriptor_buffer[current_index];
|
||||
@@ -115,8 +121,8 @@ bool get_oct_vox(
|
||||
parent_stack[parent_stack_position] = head;
|
||||
|
||||
// Set our initial dimension and the position at the corner of the oct to keep track of our position
|
||||
int dimension = 128;
|
||||
int3 quad_position = (0, 0, 0);
|
||||
int dimension = 64;
|
||||
int3 quad_position = zeroed_int3;
|
||||
|
||||
// While we are not at the required resolution
|
||||
// Traverse down by setting the valid/leaf mask to the subvoxel
|
||||
@@ -156,7 +162,9 @@ bool get_oct_vox(
|
||||
mask_index += 2;
|
||||
|
||||
// TODO What is up with the binary operator on this one?
|
||||
idx_stack[scale] ^= idx_set_y_mask;
|
||||
// Alright, I switched it over and seems not to have done anything?
|
||||
// idx_stack[scale] ^= idx_set_y_mask;
|
||||
idx_stack[scale] |= idx_set_y_mask;
|
||||
|
||||
}
|
||||
if (position.z >= (dimension / 2) + quad_position.z) {
|
||||
@@ -237,7 +245,7 @@ bool cast_light_intersection_ray(
|
||||
int3 voxel_step = { 1, 1, 1 };
|
||||
voxel_step *= (ray_dir > 0) - (ray_dir < 0);
|
||||
|
||||
if (any(ray_dir == (0.0f,0.0f,0.0f)))
|
||||
if (any(ray_dir == zeroed_float3))
|
||||
return false;
|
||||
|
||||
// Setup the voxel coords from the camera origin
|
||||
@@ -252,7 +260,7 @@ bool cast_light_intersection_ray(
|
||||
// for negative values, wrap around the delta_t
|
||||
intersection_t += delta_t * -convert_float3(isless(intersection_t, 0));
|
||||
|
||||
int3 face_mask = { 0, 0, 0 };
|
||||
int3 face_mask =zeroed_int3;
|
||||
int length_cutoff = 0;
|
||||
|
||||
// Andrew Woo's raycasting algo
|
||||
@@ -344,7 +352,7 @@ __kernel void raycaster(
|
||||
|
||||
// Delta T is the units a ray must travel along an axis in order to
|
||||
// traverse an integer split
|
||||
if (any(ray_dir == (0.0f,0.0f,0.0f)))
|
||||
if (any(ray_dir == zeroed_float3))
|
||||
return;
|
||||
float3 delta_t = fabs(1.0f / ray_dir);
|
||||
|
||||
@@ -366,11 +374,11 @@ __kernel void raycaster(
|
||||
uint bounce_count = 0;
|
||||
int3 face_mask = { 0, 0, 0 };
|
||||
int voxel_data = 0;
|
||||
float3 face_position = (0,0,0);
|
||||
float4 voxel_color= (0,0,0,0);
|
||||
float2 tile_face_position = (0,0);
|
||||
float3 sign = (0,0,0);
|
||||
float4 first_strike = (0,0,0,0);
|
||||
float3 face_position = zeroed_float3;
|
||||
float4 voxel_color= zeroed_float4;
|
||||
float2 tile_face_position = zeroed_float2;
|
||||
float3 sign = zeroed_float3;
|
||||
float4 first_strike = zeroed_float4;
|
||||
|
||||
bool shadow_ray = false;
|
||||
|
||||
@@ -386,35 +394,35 @@ __kernel void raycaster(
|
||||
if (any(voxel >= *map_dim) || any(voxel < 0)){
|
||||
voxel_data = 5;
|
||||
voxel.xyz -= voxel_step.xyz * face_mask.xyz;
|
||||
first_strike = mix(fog_color, voxel_color, 1.0 - max(distance_traveled / 700.0f, (float)0));
|
||||
first_strike = mix(fog_color, voxel_color, 1.0f - max(distance_traveled / 700.0f, 0.0f));
|
||||
}
|
||||
|
||||
|
||||
|
||||
// If we hit a voxel
|
||||
// if (voxel.x < 128 && voxel.y < 128 && voxel.z < 128){
|
||||
// if (get_oct_vox(
|
||||
// voxel,
|
||||
// octree_descriptor_buffer,
|
||||
// octree_attachment_lookup_buffer,
|
||||
// octree_attachment_buffer,
|
||||
// settings_buffer
|
||||
// )){
|
||||
// voxel_data = 1;
|
||||
// } else {
|
||||
// voxel_data = 0;
|
||||
// }
|
||||
// } else {
|
||||
if (voxel.x < 64 && voxel.y < 64 && voxel.z < 64){
|
||||
if (get_oct_vox(
|
||||
voxel,
|
||||
octree_descriptor_buffer,
|
||||
octree_attachment_lookup_buffer,
|
||||
octree_attachment_buffer,
|
||||
settings_buffer
|
||||
)){
|
||||
voxel_data = 5;
|
||||
} else {
|
||||
voxel_data = 0;
|
||||
}
|
||||
} else {
|
||||
voxel_data = map[voxel.x + (*map_dim).x * (voxel.y + (*map_dim).z * (voxel.z))];
|
||||
//}
|
||||
}
|
||||
|
||||
|
||||
if (voxel_data != 0) {
|
||||
|
||||
|
||||
// Determine where on the 2d plane the ray intersected
|
||||
face_position = (float3)(0);
|
||||
tile_face_position = (float2)(0);
|
||||
face_position = zeroed_float3;
|
||||
tile_face_position = zeroed_float2;
|
||||
sign = (float3)(1.0f, 1.0f, 1.0f);
|
||||
|
||||
// First determine the percent of the way the ray is towards the next intersection_t
|
||||
@@ -517,7 +525,7 @@ __kernel void raycaster(
|
||||
|
||||
float3 hit_pos = convert_float3(voxel) + face_position;
|
||||
ray_dir = normalize((float3)(lights[4], lights[5], lights[6]) - hit_pos);
|
||||
if (any(ray_dir == (0.0f,0.0f,0.0f)))
|
||||
if (any(ray_dir == zeroed_float3))
|
||||
return;
|
||||
|
||||
voxel -= voxel_step * face_mask;
|
||||
@@ -539,13 +547,13 @@ __kernel void raycaster(
|
||||
convert_int2((float2)(3, 4) * convert_float2(*atlas_dim / *tile_dim))
|
||||
).xyz/2;
|
||||
|
||||
voxel_color.w += 0.3f;
|
||||
max_distance = 500;
|
||||
voxel_color.w -= 0.3f;
|
||||
max_distance = 700;
|
||||
distance_traveled = 0;
|
||||
|
||||
float3 hit_pos = convert_float3(voxel) + face_position;
|
||||
ray_dir *= sign;
|
||||
if (any(ray_dir == (0.0f,0.0f,0.0f)))
|
||||
if (any(ray_dir == zeroed_float3))
|
||||
return;
|
||||
|
||||
voxel -= voxel_step * face_mask;
|
||||
|
||||
@@ -23,9 +23,6 @@ Application::~Application() {
|
||||
|
||||
bool Application::init_clcaster() {
|
||||
|
||||
//Map _map(32);
|
||||
//return 0;
|
||||
|
||||
// Start up the raycaster
|
||||
raycaster = std::make_shared<CLCaster>();
|
||||
if (!raycaster->init())
|
||||
@@ -38,7 +35,9 @@ bool Application::init_clcaster() {
|
||||
// Send the data to the GPU
|
||||
raycaster->assign_map(map);
|
||||
|
||||
octree = std::make_shared<Map>(128, map.get());
|
||||
// Init the raycaster with a specified dimension and a pointer to the source
|
||||
// array style data
|
||||
octree = std::make_shared<Map>(64, map.get());
|
||||
raycaster->assign_octree(octree);
|
||||
|
||||
|
||||
|
||||
47
src/FrameWatcher.cpp
Normal file
47
src/FrameWatcher.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "FrameWatcher.h"
|
||||
#include <chrono>
|
||||
|
||||
FrameWatcher::FrameWatcher() {
|
||||
|
||||
}
|
||||
|
||||
FrameWatcher::~FrameWatcher()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void FrameWatcher::do_tick() {
|
||||
|
||||
|
||||
elapsed_time = get_elapsed_time();
|
||||
delta_time = elapsed_time - current_time;
|
||||
current_time = elapsed_time;
|
||||
|
||||
if (delta_time > 0.2f)
|
||||
delta_time = 0.2f;
|
||||
|
||||
accumulator_time += delta_time;
|
||||
|
||||
while ((accumulator_time - step_size) >= step_size) {
|
||||
accumulator_time -= step_size;
|
||||
|
||||
// ==== DELTA TIME LOCKED ====
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
float FrameWatcher::get_elapsed_time() {
|
||||
|
||||
static std::chrono::time_point<std::chrono::system_clock> start;
|
||||
static bool started = false;
|
||||
|
||||
if (!started) {
|
||||
start = std::chrono::system_clock::now();
|
||||
started = true;
|
||||
}
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
|
||||
std::chrono::duration<double> elapsed_time = now - start;
|
||||
return static_cast<float>(elapsed_time.count());
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
* - Octree, Map interface with the GPU
|
||||
* - Octree, Map refactoring
|
||||
* - Separate Application stages into areas that make sense
|
||||
* -
|
||||
* - Saving, loading of RLE voxel data and raw oct data
|
||||
*/
|
||||
|
||||
#include "Application.h"
|
||||
|
||||
@@ -66,7 +66,7 @@ bool Map::test_oct_arr_traversal(sf::Vector3i dimensions) {
|
||||
}
|
||||
|
||||
void Map::setVoxel(sf::Vector3i pos, int val) {
|
||||
voxel_data[pos.x + OCT_DIM * (pos.y + OCT_DIM * pos.z)] = val;
|
||||
voxel_data[pos.x + octree.getDimensions() * (pos.y + octree.getDimensions() * pos.z)] = val;
|
||||
}
|
||||
|
||||
char Map::getVoxel(sf::Vector3i pos){
|
||||
|
||||
@@ -11,13 +11,15 @@ Octree::Octree() {
|
||||
|
||||
void Octree::Generate(char* data, sf::Vector3i dimensions) {
|
||||
|
||||
oct_dimensions = dimensions.x;
|
||||
|
||||
// Launch the recursive generator at (0,0,0) as the first point
|
||||
// and the octree dimension as the initial block size
|
||||
std::tuple<uint64_t, uint64_t> root_node = GenerationRecursion(data, dimensions, sf::Vector3i(0, 0, 0), OCT_DIM/2);
|
||||
std::tuple<uint64_t, uint64_t> root_node = GenerationRecursion(data, dimensions, sf::Vector3i(0, 0, 0), oct_dimensions/2);
|
||||
|
||||
// ========= DEBUG ==============
|
||||
PrettyPrintUINT64(std::get<0>(root_node), &output_stream);
|
||||
output_stream << " " << OCT_DIM << " " << counter++ << std::endl;
|
||||
output_stream << " " << oct_dimensions << " " << counter++ << std::endl;
|
||||
// ==============================
|
||||
|
||||
// set the root nodes relative pointer to 1 because the next element will be the top of the tree, and push to the stack
|
||||
@@ -51,7 +53,7 @@ OctState Octree::GetVoxel(sf::Vector3i position) {
|
||||
state.parent_stack[state.parent_stack_position] = head;
|
||||
|
||||
// Set our initial dimension and the position at the corner of the oct to keep track of our position
|
||||
int dimension = OCT_DIM;
|
||||
int dimension = oct_dimensions;
|
||||
sf::Vector3i quad_position(0, 0, 0);
|
||||
|
||||
// While we are not at the required resolution
|
||||
@@ -313,7 +315,7 @@ std::tuple<uint64_t, uint64_t> Octree::GenerationRecursion(char* data, sf::Vecto
|
||||
}
|
||||
|
||||
char Octree::get1DIndexedVoxel(char* data, sf::Vector3i dimensions, sf::Vector3i position) {
|
||||
return data[position.x + OCT_DIM * (position.y + OCT_DIM * position.z)];
|
||||
return data[position.x + oct_dimensions * (position.y + oct_dimensions * position.z)];
|
||||
}
|
||||
|
||||
bool Octree::Validate(char* data, sf::Vector3i dimensions){
|
||||
@@ -343,3 +345,7 @@ bool Octree::Validate(char* data, sf::Vector3i dimensions){
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned int Octree::getDimensions() {
|
||||
return oct_dimensions;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user