549 lines
16 KiB
C++
549 lines
16 KiB
C++
#ifndef VOXRAYTRACERCUDA_H
|
|
#define VOXRAYTRACERCUDA_H
|
|
|
|
#ifdef USE_CUDA
|
|
|
|
#include "Math/VoxImage.h"
|
|
#include "Math/VoxRaytracer.h"
|
|
#include <cuda_runtime.h>
|
|
|
|
namespace uLib {
|
|
|
|
#ifdef __CUDACC__
|
|
template <typename VoxelT>
|
|
__global__ void
|
|
RaytraceAccumulateKernel(const float *lines_data, int num_lines,
|
|
VoxelT *d_image, int dim0, int dim1, int dim2,
|
|
const float *inv_world_matrix_data, float scale0,
|
|
float scale1, float scale2) {
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (idx >= num_lines)
|
|
return;
|
|
|
|
const float *line_ptr = &lines_data[idx * 8];
|
|
|
|
float o_vec[4] = {line_ptr[0], line_ptr[1], line_ptr[2], line_ptr[3]};
|
|
float d_vec[4] = {line_ptr[4], line_ptr[5], line_ptr[6], line_ptr[7]};
|
|
|
|
float pt[4] = {0, 0, 0, 0};
|
|
float s[4] = {0, 0, 0, 0};
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
for (int j = 0; j < 4; ++j) {
|
|
float m_val = inv_world_matrix_data[i + j * 4];
|
|
pt[i] += m_val * o_vec[j];
|
|
s[i] += m_val * d_vec[j];
|
|
}
|
|
}
|
|
|
|
float l = sqrtf(s[0] * s[0] + s[1] * s[1] + s[2] * s[2]);
|
|
if (l == 0)
|
|
return;
|
|
|
|
float L[3];
|
|
L[0] = l / s[0];
|
|
L[1] = l / s[1];
|
|
L[2] = l / s[2];
|
|
|
|
float offset[3];
|
|
for (int i = 0; i < 3; ++i) {
|
|
float fpt_i = floorf(pt[i]);
|
|
offset[i] = (s[i] >= 0) ? (1.0f - (pt[i] - fpt_i)) : (pt[i] - fpt_i);
|
|
offset[i] = fabsf(offset[i] * L[i]);
|
|
L[i] = fabsf(L[i]);
|
|
}
|
|
|
|
int id;
|
|
float d;
|
|
int vid[3] = {(int)floorf(pt[0]), (int)floorf(pt[1]), (int)floorf(pt[2])};
|
|
|
|
float scale_arr[3] = {scale0, scale1, scale2};
|
|
|
|
while (vid[0] >= 0 && vid[0] < dim0 && vid[1] >= 0 && vid[1] < dim1 &&
|
|
vid[2] >= 0 && vid[2] < dim2) {
|
|
|
|
d = offset[0];
|
|
id = 0;
|
|
if (offset[1] < d) {
|
|
d = offset[1];
|
|
id = 1;
|
|
}
|
|
if (offset[2] < d) {
|
|
d = offset[2];
|
|
id = 2;
|
|
}
|
|
|
|
float L_intersect = d * scale_arr[id];
|
|
size_t vox_index = vid[0] * dim1 * dim2 + vid[1] * dim2 + vid[2];
|
|
|
|
atomicAdd(&(d_image[vox_index].Value), L_intersect);
|
|
|
|
float sign_s = (s[id] >= 0) ? 1.0f : -1.0f;
|
|
vid[id] += (int)sign_s;
|
|
|
|
offset[0] -= d;
|
|
offset[1] -= d;
|
|
offset[2] -= d;
|
|
offset[id] = L[id];
|
|
}
|
|
}
|
|
#endif
|
|
|
|
template <typename VoxelT>
|
|
void VoxRaytracer::AccumulateLinesCUDA(const HLine3f *lines, size_t num_lines,
|
|
VoxImage<VoxelT> &image) {
|
|
if (num_lines == 0)
|
|
return;
|
|
|
|
image.Data().MoveToVRAM();
|
|
|
|
float *d_lines = nullptr;
|
|
size_t lines_size = num_lines * sizeof(HLine3f);
|
|
cudaMalloc(&d_lines, lines_size);
|
|
cudaMemcpy(d_lines, lines, lines_size, cudaMemcpyHostToDevice);
|
|
int threadsPerBlock = 256;
|
|
int blocksPerGrid = (num_lines + threadsPerBlock - 1) / threadsPerBlock;
|
|
|
|
Vector3i dims = image.GetDims();
|
|
Matrix4f inv_world_matrix = image.GetWorldMatrix().inverse();
|
|
|
|
float *d_inv_world;
|
|
cudaMalloc(&d_inv_world, 16 * sizeof(float));
|
|
cudaMemcpy(d_inv_world, inv_world_matrix.data(), 16 * sizeof(float),
|
|
cudaMemcpyHostToDevice);
|
|
|
|
#ifdef __CUDACC__
|
|
RaytraceAccumulateKernel<<<blocksPerGrid, threadsPerBlock>>>(
|
|
d_lines, num_lines, image.Data().GetVRAMData(), dims(0), dims(1), dims(2),
|
|
d_inv_world, m_scale(0), m_scale(1), m_scale(2));
|
|
cudaDeviceSynchronize();
|
|
|
|
cudaError_t err = cudaGetLastError();
|
|
if (err != cudaSuccess) {
|
|
std::cerr << "CUDA Error in AccumulateLinesCUDA: "
|
|
<< cudaGetErrorString(err) << std::endl;
|
|
}
|
|
#else
|
|
std::cerr << "RaytraceAccumulateKernel requires NVCC!" << std::endl;
|
|
#endif
|
|
|
|
cudaFree(d_lines);
|
|
cudaFree(d_inv_world);
|
|
}
|
|
|
|
#ifdef __CUDACC__
|
|
|
|
__global__ void TraceBetweenPointsKernel(
|
|
const float *in_pts_data, const float *out_pts_data, int num_lines,
|
|
VoxRaytracer::RayData::Element **d_out_elements, size_t *d_out_counts,
|
|
float *d_out_lengths, int max_elements, int dim0, int dim1, int dim2,
|
|
const float *inv_world_matrix_data, float scale0, float scale1,
|
|
float scale2, int inc0, int inc1, int inc2) {
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (idx >= num_lines)
|
|
return;
|
|
|
|
VoxRaytracer::RayData::Element *ray_out = d_out_elements[idx];
|
|
size_t count = 0;
|
|
float tot_len = 0.0f;
|
|
|
|
const float *in_ptr = &in_pts_data[idx * 4];
|
|
const float *out_ptr = &out_pts_data[idx * 4];
|
|
|
|
float pt1[4] = {0, 0, 0, 0}, pt2[4] = {0, 0, 0, 0};
|
|
for (int i = 0; i < 4; ++i) {
|
|
for (int j = 0; j < 4; ++j) {
|
|
float m_val = inv_world_matrix_data[i + j * 4];
|
|
pt1[i] += m_val * in_ptr[j];
|
|
pt2[i] += m_val * out_ptr[j];
|
|
}
|
|
}
|
|
|
|
float s[4];
|
|
for (int i = 0; i < 4; ++i)
|
|
s[i] = pt2[i] - pt1[i];
|
|
|
|
float l = sqrtf(s[0] * s[0] + s[1] * s[1] + s[2] * s[2]);
|
|
if (l == 0) {
|
|
d_out_counts[idx] = count;
|
|
d_out_lengths[idx] = tot_len;
|
|
return;
|
|
}
|
|
|
|
float L[3];
|
|
L[0] = fabsf(l / s[0]);
|
|
L[1] = fabsf(l / s[1]);
|
|
L[2] = fabsf(l / s[2]);
|
|
|
|
float offset[3];
|
|
for (int i = 0; i < 3; ++i) {
|
|
float fpt_i = floorf(pt1[i]);
|
|
offset[i] = (s[i] >= 0) ? (1.0f - (pt1[i] - fpt_i)) : (pt1[i] - fpt_i);
|
|
offset[i] = fabsf(offset[i] * L[i]);
|
|
}
|
|
|
|
int vid[3] = {(int)floorf(pt1[0]), (int)floorf(pt1[1]), (int)floorf(pt1[2])};
|
|
int vid_out[3] = {(int)floorf(pt2[0]), (int)floorf(pt2[1]),
|
|
(int)floorf(pt2[2])};
|
|
|
|
float scale_arr[3] = {scale0, scale1, scale2};
|
|
|
|
if (vid[0] == vid_out[0] && vid[1] == vid_out[1] && vid[2] == vid_out[2]) {
|
|
if (vid[0] >= 0 && vid[0] < dim0 && vid[1] >= 0 && vid[1] < dim1 &&
|
|
vid[2] >= 0 && vid[2] < dim2) {
|
|
if (count < max_elements) {
|
|
int map_id = vid[0] * inc0 + vid[1] * inc1 + vid[2] * inc2;
|
|
ray_out[count].vox_id = map_id;
|
|
ray_out[count].L = l;
|
|
tot_len += l;
|
|
count++;
|
|
}
|
|
}
|
|
d_out_counts[idx] = count;
|
|
d_out_lengths[idx] = tot_len;
|
|
return;
|
|
}
|
|
|
|
int id;
|
|
float d;
|
|
while (l > 0) {
|
|
d = offset[0];
|
|
id = 0;
|
|
if (offset[1] < d) {
|
|
d = offset[1];
|
|
id = 1;
|
|
}
|
|
if (offset[2] < d) {
|
|
d = offset[2];
|
|
id = 2;
|
|
}
|
|
|
|
if (vid[0] >= 0 && vid[0] < dim0 && vid[1] >= 0 && vid[1] < dim1 &&
|
|
vid[2] >= 0 && vid[2] < dim2) {
|
|
if (count < max_elements) {
|
|
int map_id = vid[0] * inc0 + vid[1] * inc1 + vid[2] * inc2;
|
|
ray_out[count].vox_id = map_id;
|
|
ray_out[count].L = d * scale_arr[id];
|
|
tot_len += d * scale_arr[id];
|
|
count++;
|
|
}
|
|
}
|
|
|
|
float sign_s = (s[id] >= 0) ? 1.0f : -1.0f;
|
|
vid[id] += (int)sign_s;
|
|
|
|
l -= d;
|
|
|
|
offset[0] -= d;
|
|
offset[1] -= d;
|
|
offset[2] -= d;
|
|
offset[id] = fminf(L[id], l);
|
|
}
|
|
d_out_counts[idx] = count;
|
|
d_out_lengths[idx] = tot_len;
|
|
}
|
|
|
|
__global__ void TraceLineKernel(const float *lines_data, int num_lines,
|
|
VoxRaytracer::RayData::Element **d_out_elements,
|
|
size_t *d_out_counts, float *d_out_lengths,
|
|
int max_elements, int dim0, int dim1, int dim2,
|
|
const float *inv_world_matrix_data,
|
|
float scale0, float scale1, float scale2,
|
|
int inc0, int inc1, int inc2) {
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (idx >= num_lines)
|
|
return;
|
|
|
|
VoxRaytracer::RayData::Element *ray_out = d_out_elements[idx];
|
|
size_t count = 0;
|
|
float tot_len = 0.0f;
|
|
|
|
const float *line_ptr = &lines_data[idx * 8];
|
|
float o_vec[4] = {line_ptr[0], line_ptr[1], line_ptr[2], line_ptr[3]};
|
|
float d_vec[4] = {line_ptr[4], line_ptr[5], line_ptr[6], line_ptr[7]};
|
|
|
|
float pt[4] = {0, 0, 0, 0}, s[4] = {0, 0, 0, 0};
|
|
for (int i = 0; i < 4; ++i) {
|
|
for (int j = 0; j < 4; ++j) {
|
|
float m_val = inv_world_matrix_data[i + j * 4];
|
|
pt[i] += m_val * o_vec[j];
|
|
s[i] += m_val * d_vec[j];
|
|
}
|
|
}
|
|
|
|
float l = sqrtf(s[0] * s[0] + s[1] * s[1] + s[2] * s[2]);
|
|
if (l == 0) {
|
|
d_out_counts[idx] = count;
|
|
d_out_lengths[idx] = tot_len;
|
|
return;
|
|
}
|
|
|
|
float L[3];
|
|
L[0] = fabsf(l / s[0]);
|
|
L[1] = fabsf(l / s[1]);
|
|
L[2] = fabsf(l / s[2]);
|
|
|
|
float offset[3];
|
|
for (int i = 0; i < 3; ++i) {
|
|
float fpt_i = floorf(pt[i]);
|
|
offset[i] = (s[i] >= 0) ? (1.0f - (pt[i] - fpt_i)) : (pt[i] - fpt_i);
|
|
offset[i] = fabsf(offset[i] * L[i]);
|
|
}
|
|
|
|
int id;
|
|
float d;
|
|
int vid[3] = {(int)floorf(pt[0]), (int)floorf(pt[1]), (int)floorf(pt[2])};
|
|
float scale_arr[3] = {scale0, scale1, scale2};
|
|
|
|
while (vid[0] >= 0 && vid[0] < dim0 && vid[1] >= 0 && vid[1] < dim1 &&
|
|
vid[2] >= 0 && vid[2] < dim2) {
|
|
d = offset[0];
|
|
id = 0;
|
|
if (offset[1] < d) {
|
|
d = offset[1];
|
|
id = 1;
|
|
}
|
|
if (offset[2] < d) {
|
|
d = offset[2];
|
|
id = 2;
|
|
}
|
|
|
|
if (count < max_elements) {
|
|
int map_id = vid[0] * inc0 + vid[1] * inc1 + vid[2] * inc2;
|
|
ray_out[count].vox_id = map_id;
|
|
ray_out[count].L = d * scale_arr[id];
|
|
tot_len += d * scale_arr[id];
|
|
count++;
|
|
}
|
|
|
|
float sign_s = (s[id] >= 0) ? 1.0f : -1.0f;
|
|
vid[id] += (int)sign_s;
|
|
|
|
offset[0] -= d;
|
|
offset[1] -= d;
|
|
offset[2] -= d;
|
|
offset[id] = L[id];
|
|
}
|
|
d_out_counts[idx] = count;
|
|
d_out_lengths[idx] = tot_len;
|
|
}
|
|
|
|
#endif // __CUDACC__
|
|
|
|
inline void VoxRaytracer::TraceLineCUDA(const HLine3f *lines, size_t num_lines,
|
|
RayData *out_rays,
|
|
int max_elements_per_ray,
|
|
float *kernel_time_ms) {
|
|
if (num_lines == 0)
|
|
return;
|
|
|
|
float *d_lines = nullptr;
|
|
bool alloc_lines = false;
|
|
|
|
cudaPointerAttributes ptr_attr;
|
|
cudaPointerGetAttributes(&ptr_attr, lines);
|
|
if (ptr_attr.type == cudaMemoryTypeDevice) {
|
|
d_lines = (float *)lines;
|
|
} else {
|
|
alloc_lines = true;
|
|
size_t lines_size = num_lines * sizeof(HLine3f);
|
|
cudaMalloc(&d_lines, lines_size);
|
|
cudaMemcpy(d_lines, lines, lines_size, cudaMemcpyHostToDevice);
|
|
}
|
|
|
|
std::vector<RayData::Element *> h_out_elements(num_lines);
|
|
for (size_t i = 0; i < num_lines; ++i) {
|
|
out_rays[i].Data().resize(max_elements_per_ray);
|
|
out_rays[i].Data().MoveToVRAM();
|
|
h_out_elements[i] = out_rays[i].Data().GetVRAMData();
|
|
}
|
|
|
|
RayData::Element **d_out_elements;
|
|
cudaMalloc(&d_out_elements, num_lines * sizeof(RayData::Element *));
|
|
cudaMemcpy(d_out_elements, h_out_elements.data(),
|
|
num_lines * sizeof(RayData::Element *), cudaMemcpyHostToDevice);
|
|
|
|
size_t *d_out_counts;
|
|
float *d_out_lengths;
|
|
cudaMalloc(&d_out_counts, num_lines * sizeof(size_t));
|
|
cudaMalloc(&d_out_lengths, num_lines * sizeof(float));
|
|
|
|
int threadsPerBlock = 256;
|
|
int blocksPerGrid = (num_lines + threadsPerBlock - 1) / threadsPerBlock;
|
|
|
|
Vector3i dims = m_Image->GetDims();
|
|
Vector3i incs = m_Image->GetIncrements();
|
|
Matrix4f inv_world_matrix = m_Image->GetWorldMatrix().inverse();
|
|
|
|
float *d_inv_world;
|
|
cudaMalloc(&d_inv_world, 16 * sizeof(float));
|
|
cudaMemcpy(d_inv_world, inv_world_matrix.data(), 16 * sizeof(float),
|
|
cudaMemcpyHostToDevice);
|
|
|
|
#ifdef __CUDACC__
|
|
cudaEvent_t start, stop;
|
|
if (kernel_time_ms) {
|
|
cudaEventCreate(&start);
|
|
cudaEventCreate(&stop);
|
|
cudaEventRecord(start);
|
|
}
|
|
|
|
TraceLineKernel<<<blocksPerGrid, threadsPerBlock>>>(
|
|
d_lines, num_lines, d_out_elements, d_out_counts, d_out_lengths,
|
|
max_elements_per_ray, dims(0), dims(1), dims(2), d_inv_world, m_scale(0),
|
|
m_scale(1), m_scale(2), incs(0), incs(1), incs(2));
|
|
|
|
if (kernel_time_ms) {
|
|
cudaEventRecord(stop);
|
|
cudaEventSynchronize(stop);
|
|
cudaEventElapsedTime(kernel_time_ms, start, stop);
|
|
cudaEventDestroy(start);
|
|
cudaEventDestroy(stop);
|
|
} else {
|
|
cudaDeviceSynchronize();
|
|
}
|
|
|
|
cudaError_t err = cudaGetLastError();
|
|
if (err != cudaSuccess) {
|
|
std::cerr << "CUDA Error in TraceLineCUDA: " << cudaGetErrorString(err)
|
|
<< std::endl;
|
|
}
|
|
#else
|
|
std::cerr << "TraceLineKernel requires NVCC!" << std::endl;
|
|
#endif
|
|
|
|
std::vector<size_t> h_out_counts(num_lines);
|
|
std::vector<float> h_out_lengths(num_lines);
|
|
cudaMemcpy(h_out_counts.data(), d_out_counts, num_lines * sizeof(size_t),
|
|
cudaMemcpyDeviceToHost);
|
|
cudaMemcpy(h_out_lengths.data(), d_out_lengths, num_lines * sizeof(float),
|
|
cudaMemcpyDeviceToHost);
|
|
|
|
for (size_t i = 0; i < num_lines; ++i) {
|
|
out_rays[i].SetCount(h_out_counts[i]);
|
|
out_rays[i].SetTotalLength(h_out_lengths[i]);
|
|
}
|
|
|
|
if (alloc_lines) {
|
|
cudaFree(d_lines);
|
|
}
|
|
cudaFree(d_out_elements);
|
|
cudaFree(d_out_counts);
|
|
cudaFree(d_out_lengths);
|
|
cudaFree(d_inv_world);
|
|
}
|
|
|
|
inline void VoxRaytracer::TraceBetweenPointsCUDA(
|
|
const HPoint3f *in_pts, const HPoint3f *out_pts, size_t num_lines,
|
|
RayData *out_rays, int max_elements_per_ray, float *kernel_time_ms) {
|
|
if (num_lines == 0)
|
|
return;
|
|
|
|
float *d_in_pts = nullptr;
|
|
float *d_out_pts = nullptr;
|
|
bool alloc_pts = false;
|
|
|
|
cudaPointerAttributes ptr_attr;
|
|
cudaPointerGetAttributes(&ptr_attr, in_pts);
|
|
if (ptr_attr.type == cudaMemoryTypeDevice) {
|
|
d_in_pts = (float *)in_pts;
|
|
d_out_pts = (float *)out_pts;
|
|
} else {
|
|
alloc_pts = true;
|
|
size_t pts_size = num_lines * sizeof(HPoint3f);
|
|
cudaMalloc(&d_in_pts, pts_size);
|
|
cudaMalloc(&d_out_pts, pts_size);
|
|
cudaMemcpy(d_in_pts, in_pts, pts_size, cudaMemcpyHostToDevice);
|
|
cudaMemcpy(d_out_pts, out_pts, pts_size, cudaMemcpyHostToDevice);
|
|
}
|
|
|
|
std::vector<RayData::Element *> h_out_elements(num_lines);
|
|
for (size_t i = 0; i < num_lines; ++i) {
|
|
out_rays[i].Data().resize(max_elements_per_ray);
|
|
out_rays[i].Data().MoveToVRAM();
|
|
h_out_elements[i] = out_rays[i].Data().GetVRAMData();
|
|
}
|
|
|
|
RayData::Element **d_out_elements;
|
|
cudaMalloc(&d_out_elements, num_lines * sizeof(RayData::Element *));
|
|
cudaMemcpy(d_out_elements, h_out_elements.data(),
|
|
num_lines * sizeof(RayData::Element *), cudaMemcpyHostToDevice);
|
|
|
|
size_t *d_out_counts;
|
|
float *d_out_lengths;
|
|
cudaMalloc(&d_out_counts, num_lines * sizeof(size_t));
|
|
cudaMalloc(&d_out_lengths, num_lines * sizeof(float));
|
|
|
|
int threadsPerBlock = 256;
|
|
int blocksPerGrid = (num_lines + threadsPerBlock - 1) / threadsPerBlock;
|
|
|
|
Vector3i dims = m_Image->GetDims();
|
|
Vector3i incs = m_Image->GetIncrements();
|
|
Matrix4f inv_world_matrix = m_Image->GetWorldMatrix().inverse();
|
|
|
|
float *d_inv_world;
|
|
cudaMalloc(&d_inv_world, 16 * sizeof(float));
|
|
cudaMemcpy(d_inv_world, inv_world_matrix.data(), 16 * sizeof(float),
|
|
cudaMemcpyHostToDevice);
|
|
|
|
#ifdef __CUDACC__
|
|
cudaEvent_t start, stop;
|
|
if (kernel_time_ms) {
|
|
cudaEventCreate(&start);
|
|
cudaEventCreate(&stop);
|
|
cudaEventRecord(start);
|
|
}
|
|
|
|
TraceBetweenPointsKernel<<<blocksPerGrid, threadsPerBlock>>>(
|
|
d_in_pts, d_out_pts, num_lines, d_out_elements, d_out_counts,
|
|
d_out_lengths, max_elements_per_ray, dims(0), dims(1), dims(2),
|
|
d_inv_world, m_scale(0), m_scale(1), m_scale(2), incs(0), incs(1),
|
|
incs(2));
|
|
|
|
if (kernel_time_ms) {
|
|
cudaEventRecord(stop);
|
|
cudaEventSynchronize(stop);
|
|
cudaEventElapsedTime(kernel_time_ms, start, stop);
|
|
cudaEventDestroy(start);
|
|
cudaEventDestroy(stop);
|
|
} else {
|
|
cudaDeviceSynchronize();
|
|
}
|
|
|
|
cudaError_t err = cudaGetLastError();
|
|
if (err != cudaSuccess) {
|
|
std::cerr << "CUDA Error in TraceBetweenPointsCUDA: "
|
|
<< cudaGetErrorString(err) << std::endl;
|
|
}
|
|
#else
|
|
std::cerr << "TraceBetweenPointsKernel requires NVCC!" << std::endl;
|
|
#endif
|
|
|
|
std::vector<size_t> h_out_counts(num_lines);
|
|
std::vector<float> h_out_lengths(num_lines);
|
|
cudaMemcpy(h_out_counts.data(), d_out_counts, num_lines * sizeof(size_t),
|
|
cudaMemcpyDeviceToHost);
|
|
cudaMemcpy(h_out_lengths.data(), d_out_lengths, num_lines * sizeof(float),
|
|
cudaMemcpyDeviceToHost);
|
|
|
|
for (size_t i = 0; i < num_lines; ++i) {
|
|
out_rays[i].SetCount(h_out_counts[i]);
|
|
out_rays[i].SetTotalLength(h_out_lengths[i]);
|
|
}
|
|
|
|
if (alloc_pts) {
|
|
cudaFree(d_in_pts);
|
|
cudaFree(d_out_pts);
|
|
}
|
|
cudaFree(d_out_elements);
|
|
cudaFree(d_out_counts);
|
|
cudaFree(d_out_lengths);
|
|
cudaFree(d_inv_world);
|
|
}
|
|
|
|
} // namespace uLib
|
|
|
|
#endif // USE_CUDA
|
|
|
|
#endif // VOXRAYTRACERCUDA_H
|