refactor: migrate voxel data storage to DataAllocator for CUDA

This commit is contained in:
AndreaRigoni
2026-02-28 10:05:39 +00:00
parent 07915295cb
commit 52580d8cde
14 changed files with 1484 additions and 1022 deletions

View File

@@ -23,14 +23,12 @@
//////////////////////////////////////////////////////////////////////////////*/
#ifndef VOXIMAGEFILTERLINEAR_HPP
#define VOXIMAGEFILTERLINEAR_HPP
#include <Math/Dense.h>
#include "Math/VoxImage.h"
#include "VoxImageFilter.h"
#include <Math/Dense.h>
////////////////////////////////////////////////////////////////////////////////
///// VOXIMAGE FILTER LINEAR /////////////////////////////////////////////////
@@ -38,32 +36,86 @@
namespace uLib {
#ifdef USE_CUDA
template <typename VoxelT>
__global__ void LinearFilterKernel(const VoxelT *in, VoxelT *out,
const VoxelT *kernel, int vox_size,
int ker_size, int center_count) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < vox_size) {
float conv = 0;
float ksum = 0;
for (int ik = 0; ik < ker_size; ++ik) {
int pos = index + kernel[ik].Count - center_count;
if (pos < 0) {
pos += vox_size * ((-pos / vox_size) + 1);
}
pos = pos % vox_size;
conv += in[pos].Value * kernel[ik].Value;
ksum += kernel[ik].Value;
}
out[index].Value = conv / ksum;
}
}
#endif
template <typename VoxelT>
class VoxFilterAlgorithmLinear :
public VoxImageFilter<VoxelT, VoxFilterAlgorithmLinear<VoxelT> > {
class VoxFilterAlgorithmLinear
: public VoxImageFilter<VoxelT, VoxFilterAlgorithmLinear<VoxelT>> {
public:
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmLinear<VoxelT> > BaseClass;
VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {}
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmLinear<VoxelT>> BaseClass;
VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {}
float Evaluate(const VoxImage<VoxelT> &buffer, int index)
{
const std::vector<VoxelT> &vbuf = buffer.ConstData();
const std::vector<VoxelT> &vker = this->m_KernelData.ConstData();
int vox_size = vbuf.size();
int ker_size = vker.size();
int pos;
float conv = 0, ksum = 0;
for (int ik = 0; ik < ker_size; ++ik) {
pos = index + vker[ik].Count - vker[this->m_KernelData.GetCenterData()].Count;
pos = (pos + vox_size) % vox_size;
conv += vbuf[pos].Value * vker[ik].Value;
ksum += vker[ik].Value;
}
return conv / ksum;
#ifdef USE_CUDA
void Run() {
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
this->m_Image->Data().MoveToVRAM();
this->m_KernelData.Data().MoveToVRAM();
VoxImage<VoxelT> buffer = *(this->m_Image);
buffer.Data().MoveToVRAM();
int vox_size = buffer.Data().size();
int ker_size = this->m_KernelData.Data().size();
VoxelT *d_img_out = this->m_Image->Data().GetVRAMData();
const VoxelT *d_img_in = buffer.Data().GetVRAMData();
const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData();
int center_count =
this->m_KernelData[this->m_KernelData.GetCenterData()].Count;
int threadsPerBlock = 256;
int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock;
LinearFilterKernel<<<blocksPerGrid, threadsPerBlock>>>(
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count);
cudaDeviceSynchronize();
} else {
BaseClass::Run();
}
}
#endif
float Evaluate(const VoxImage<VoxelT> &buffer, int index) {
const DataAllocator<VoxelT> &vbuf = buffer.ConstData();
const DataAllocator<VoxelT> &vker = this->m_KernelData.ConstData();
int vox_size = vbuf.size();
int ker_size = vker.size();
int pos;
float conv = 0, ksum = 0;
for (int ik = 0; ik < ker_size; ++ik) {
pos = index + vker[ik].Count -
vker[this->m_KernelData.GetCenterData()].Count;
pos = (pos + vox_size) % vox_size;
conv += vbuf[pos].Value * vker[ik].Value;
ksum += vker[ik].Value;
}
return conv / ksum;
}
};
}
} // namespace uLib
#endif // VOXIMAGEFILTERLINEAR_HPP