refactor: migrate voxel data storage to DataAllocator for CUDA
This commit is contained in:
@@ -23,14 +23,12 @@
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
|
||||
|
||||
#ifndef VOXIMAGEFILTERABTRIM_HPP
|
||||
#define VOXIMAGEFILTERABTRIM_HPP
|
||||
|
||||
#include <Math/Dense.h>
|
||||
#include "Math/VoxImage.h"
|
||||
#include "VoxImageFilter.h"
|
||||
#include <Math/Dense.h>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
///// VOXIMAGE FILTER ABTRIM /////////////////////////////////////////////////
|
||||
@@ -38,142 +36,257 @@
|
||||
|
||||
namespace uLib {
|
||||
|
||||
#ifdef USE_CUDA
|
||||
template <typename VoxelT>
|
||||
class VoxFilterAlgorithmAbtrim :
|
||||
public VoxImageFilter<VoxelT, VoxFilterAlgorithmAbtrim<VoxelT> > {
|
||||
__global__ void ABTrimFilterKernel(const VoxelT *in, VoxelT *out,
|
||||
const VoxelT *kernel, int vox_size,
|
||||
int ker_size, int center_count, int mAtrim,
|
||||
int mBtrim) {
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (index < vox_size) {
|
||||
// Allocate space for sorting
|
||||
extern __shared__ char shared_mem[];
|
||||
VoxelT *mfh =
|
||||
(VoxelT *)&shared_mem[threadIdx.x * ker_size * sizeof(VoxelT)];
|
||||
|
||||
struct KernelSortAscending
|
||||
{
|
||||
bool operator()(const VoxelT& e1, const VoxelT& e2)
|
||||
{ return e1.Value < e2.Value; }
|
||||
};
|
||||
for (int i = 0; i < ker_size; ++i) {
|
||||
mfh[i].Count = i;
|
||||
}
|
||||
|
||||
for (int ik = 0; ik < ker_size; ik++) {
|
||||
int pos = index + kernel[ik].Count - center_count;
|
||||
if (pos < 0) {
|
||||
pos += vox_size * ((-pos / vox_size) + 1);
|
||||
}
|
||||
pos = pos % vox_size;
|
||||
mfh[ik].Value = in[pos].Value;
|
||||
}
|
||||
|
||||
// Simple bubble sort for small arrays
|
||||
for (int i = 0; i < ker_size - 1; i++) {
|
||||
for (int j = 0; j < ker_size - i - 1; j++) {
|
||||
if (mfh[j].Value > mfh[j + 1].Value) {
|
||||
VoxelT temp = mfh[j];
|
||||
mfh[j] = mfh[j + 1];
|
||||
mfh[j + 1] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float ker_sum = 0;
|
||||
float fconv = 0;
|
||||
for (int ik = 0; ik < mAtrim; ik++) {
|
||||
ker_sum += kernel[mfh[ik].Count].Value;
|
||||
}
|
||||
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
|
||||
fconv += mfh[ik].Value * kernel[mfh[ik].Count].Value;
|
||||
ker_sum += kernel[mfh[ik].Count].Value;
|
||||
}
|
||||
for (int ik = ker_size - mBtrim; ik < ker_size; ik++) {
|
||||
ker_sum += kernel[mfh[ik].Count].Value;
|
||||
}
|
||||
|
||||
out[index].Value = fconv / ker_sum;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename VoxelT>
|
||||
class VoxFilterAlgorithmAbtrim
|
||||
: public VoxImageFilter<VoxelT, VoxFilterAlgorithmAbtrim<VoxelT>> {
|
||||
|
||||
struct KernelSortAscending {
|
||||
bool operator()(const VoxelT &e1, const VoxelT &e2) {
|
||||
return e1.Value < e2.Value;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmAbtrim<VoxelT> > BaseClass;
|
||||
VoxFilterAlgorithmAbtrim(const Vector3i &size) :
|
||||
BaseClass(size)
|
||||
{
|
||||
mAtrim = 0;
|
||||
mBtrim = 0;
|
||||
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmAbtrim<VoxelT>> BaseClass;
|
||||
VoxFilterAlgorithmAbtrim(const Vector3i &size) : BaseClass(size) {
|
||||
mAtrim = 0;
|
||||
mBtrim = 0;
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
void Run() {
|
||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||
|
||||
this->m_Image->Data().MoveToVRAM();
|
||||
this->m_KernelData.Data().MoveToVRAM();
|
||||
|
||||
VoxImage<VoxelT> buffer = *(this->m_Image);
|
||||
buffer.Data().MoveToVRAM();
|
||||
|
||||
int vox_size = buffer.Data().size();
|
||||
int ker_size = this->m_KernelData.Data().size();
|
||||
|
||||
VoxelT *d_img_out = this->m_Image->Data().GetVRAMData();
|
||||
const VoxelT *d_img_in = buffer.Data().GetVRAMData();
|
||||
const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData();
|
||||
int center_count =
|
||||
this->m_KernelData[this->m_KernelData.GetCenterData()].Count;
|
||||
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock;
|
||||
size_t shared_mem_size = threadsPerBlock * ker_size * sizeof(VoxelT);
|
||||
|
||||
ABTrimFilterKernel<<<blocksPerGrid, threadsPerBlock, shared_mem_size>>>(
|
||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
||||
mAtrim, mBtrim);
|
||||
cudaDeviceSynchronize();
|
||||
} else {
|
||||
BaseClass::Run();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
float Evaluate(const VoxImage<VoxelT> &buffer, int index) {
|
||||
const DataAllocator<VoxelT> &vbuf = buffer.ConstData();
|
||||
const DataAllocator<VoxelT> &vker = this->m_KernelData.ConstData();
|
||||
int vox_size = vbuf.size();
|
||||
int ker_size = vker.size();
|
||||
int pos;
|
||||
|
||||
std::vector<VoxelT> mfh(ker_size);
|
||||
for (int i = 0; i < ker_size; ++i)
|
||||
mfh[i].Count = i; // index key for ordering function
|
||||
for (int ik = 0; ik < ker_size; ik++) {
|
||||
pos = index + vker[ik].Count -
|
||||
vker[this->m_KernelData.GetCenterData()].Count;
|
||||
pos = (pos + vox_size) % vox_size;
|
||||
mfh[ik].Value = vbuf[pos].Value;
|
||||
}
|
||||
|
||||
float Evaluate(const VoxImage<VoxelT> &buffer, int index)
|
||||
{
|
||||
const std::vector<VoxelT> &vbuf = buffer.ConstData();
|
||||
const std::vector<VoxelT> &vker = this->m_KernelData.ConstData();
|
||||
int vox_size = vbuf.size();
|
||||
int ker_size = vker.size();
|
||||
int pos;
|
||||
|
||||
std::vector<VoxelT> mfh(ker_size);
|
||||
for (int i = 0; i < ker_size; ++i)
|
||||
mfh[i].Count = i; //index key for ordering function
|
||||
for (int ik = 0; ik < ker_size; ik++) {
|
||||
pos = index + vker[ik].Count - vker[this->m_KernelData.GetCenterData()].Count;
|
||||
pos = (pos + vox_size) % vox_size;
|
||||
mfh[ik].Value = vbuf[pos].Value;
|
||||
}
|
||||
|
||||
std::sort(mfh.begin(), mfh.end(), KernelSortAscending());
|
||||
float ker_sum = 0;
|
||||
float fconv = 0;
|
||||
for (int ik = 0; ik < mAtrim; ik++)
|
||||
ker_sum += vker[ mfh[ik].Count ].Value;
|
||||
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
|
||||
fconv += mfh[ik].Value * vker[ mfh[ik].Count ].Value; // convloution //
|
||||
ker_sum += vker[ mfh[ik].Count ].Value;
|
||||
}
|
||||
for (int ik = ker_size - mBtrim; ik < ker_size; ik++)
|
||||
ker_sum += vker[ mfh[ik].Count ].Value;
|
||||
|
||||
return fconv / ker_sum;
|
||||
std::sort(mfh.begin(), mfh.end(), KernelSortAscending());
|
||||
float ker_sum = 0;
|
||||
float fconv = 0;
|
||||
for (int ik = 0; ik < mAtrim; ik++)
|
||||
ker_sum += vker[mfh[ik].Count].Value;
|
||||
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
|
||||
fconv += mfh[ik].Value * vker[mfh[ik].Count].Value; // convloution //
|
||||
ker_sum += vker[mfh[ik].Count].Value;
|
||||
}
|
||||
for (int ik = ker_size - mBtrim; ik < ker_size; ik++)
|
||||
ker_sum += vker[mfh[ik].Count].Value;
|
||||
|
||||
inline void SetABTrim(int a, int b) { mAtrim = a; mBtrim = b; }
|
||||
return fconv / ker_sum;
|
||||
}
|
||||
|
||||
inline void SetABTrim(int a, int b) {
|
||||
mAtrim = a;
|
||||
mBtrim = b;
|
||||
}
|
||||
|
||||
private:
|
||||
int mAtrim;
|
||||
int mBtrim;
|
||||
int mAtrim;
|
||||
int mBtrim;
|
||||
};
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Roberspierre Filter //
|
||||
|
||||
|
||||
|
||||
template <typename VoxelT>
|
||||
class VoxFilterAlgorithmSPR :
|
||||
public VoxImageFilter<VoxelT, VoxFilterAlgorithmSPR<VoxelT> > {
|
||||
class VoxFilterAlgorithmSPR
|
||||
: public VoxImageFilter<VoxelT, VoxFilterAlgorithmSPR<VoxelT>> {
|
||||
|
||||
struct KernelSortAscending
|
||||
{
|
||||
bool operator()(const VoxelT& e1, const VoxelT& e2)
|
||||
{ return e1.Value < e2.Value; }
|
||||
};
|
||||
struct KernelSortAscending {
|
||||
bool operator()(const VoxelT &e1, const VoxelT &e2) {
|
||||
return e1.Value < e2.Value;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmSPR<VoxelT> > BaseClass;
|
||||
VoxFilterAlgorithmSPR(const Vector3i &size) :
|
||||
BaseClass(size)
|
||||
{
|
||||
mAtrim = 0;
|
||||
mBtrim = 0;
|
||||
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmSPR<VoxelT>> BaseClass;
|
||||
VoxFilterAlgorithmSPR(const Vector3i &size) : BaseClass(size) {
|
||||
mAtrim = 0;
|
||||
mBtrim = 0;
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
void Run() {
|
||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||
|
||||
this->m_Image->Data().MoveToVRAM();
|
||||
this->m_KernelData.Data().MoveToVRAM();
|
||||
|
||||
VoxImage<VoxelT> buffer = *(this->m_Image);
|
||||
buffer.Data().MoveToVRAM();
|
||||
|
||||
int vox_size = buffer.Data().size();
|
||||
int ker_size = this->m_KernelData.Data().size();
|
||||
|
||||
VoxelT *d_img_out = this->m_Image->Data().GetVRAMData();
|
||||
const VoxelT *d_img_in = buffer.Data().GetVRAMData();
|
||||
const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData();
|
||||
int center_count =
|
||||
this->m_KernelData[this->m_KernelData.GetCenterData()].Count;
|
||||
|
||||
int threadsPerBlock = 256;
|
||||
int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock;
|
||||
size_t shared_mem_size = threadsPerBlock * ker_size * sizeof(VoxelT);
|
||||
|
||||
ABTrimFilterKernel<<<blocksPerGrid, threadsPerBlock, shared_mem_size>>>(
|
||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
||||
mAtrim, mBtrim);
|
||||
cudaDeviceSynchronize();
|
||||
} else {
|
||||
BaseClass::Run();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
float Evaluate(const VoxImage<VoxelT> &buffer, int index) {
|
||||
const DataAllocator<VoxelT> &vbuf = buffer.ConstData();
|
||||
const DataAllocator<VoxelT> &vker = this->m_KernelData.ConstData();
|
||||
int vox_size = vbuf.size();
|
||||
int ker_size = vker.size();
|
||||
int pos;
|
||||
|
||||
std::vector<VoxelT> mfh(ker_size);
|
||||
for (int i = 0; i < ker_size; ++i)
|
||||
mfh[i].Count = i; // index key for ordering function
|
||||
for (int ik = 0; ik < ker_size; ik++) {
|
||||
pos = index + vker[ik].Count -
|
||||
vker[this->m_KernelData.GetCenterData()].Count;
|
||||
pos = (pos + vox_size) % vox_size;
|
||||
mfh[ik].Value = vbuf[pos].Value;
|
||||
}
|
||||
|
||||
float Evaluate(const VoxImage<VoxelT> &buffer, int index)
|
||||
{
|
||||
const std::vector<VoxelT> &vbuf = buffer.ConstData();
|
||||
const std::vector<VoxelT> &vker = this->m_KernelData.ConstData();
|
||||
int vox_size = vbuf.size();
|
||||
int ker_size = vker.size();
|
||||
int pos;
|
||||
std::sort(mfh.begin(), mfh.end(), KernelSortAscending());
|
||||
float spr = vbuf[index].Value;
|
||||
if ((mAtrim > 0 && spr <= mfh[mAtrim - 1].Value) ||
|
||||
(mBtrim > 0 && spr >= mfh[ker_size - mBtrim].Value)) {
|
||||
float ker_sum = 0;
|
||||
float fconv = 0;
|
||||
for (int ik = 0; ik < mAtrim; ik++)
|
||||
ker_sum += vker[mfh[ik].Count].Value;
|
||||
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
|
||||
fconv += mfh[ik].Value * vker[mfh[ik].Count].Value;
|
||||
ker_sum += vker[mfh[ik].Count].Value;
|
||||
}
|
||||
for (int ik = ker_size - mBtrim; ik < ker_size; ik++)
|
||||
ker_sum += vker[mfh[ik].Count].Value;
|
||||
|
||||
std::vector<VoxelT> mfh(ker_size);
|
||||
for (int i = 0; i < ker_size; ++i)
|
||||
mfh[i].Count = i; //index key for ordering function
|
||||
for (int ik = 0; ik < ker_size; ik++) {
|
||||
pos = index + vker[ik].Count -
|
||||
vker[this->m_KernelData.GetCenterData()].Count;
|
||||
pos = (pos + vox_size) % vox_size;
|
||||
mfh[ik].Value = vbuf[pos].Value;
|
||||
}
|
||||
return fconv / ker_sum;
|
||||
} else
|
||||
return spr;
|
||||
}
|
||||
|
||||
std::sort(mfh.begin(), mfh.end(), KernelSortAscending());
|
||||
float spr = vbuf[index].Value;
|
||||
if( (mAtrim > 0 && spr <= mfh[mAtrim-1].Value) ||
|
||||
(mBtrim > 0 && spr >= mfh[ker_size - mBtrim].Value) )
|
||||
{
|
||||
float ker_sum = 0;
|
||||
float fconv = 0;
|
||||
for (int ik = 0; ik < mAtrim; ik++)
|
||||
ker_sum += vker[ mfh[ik].Count ].Value;
|
||||
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
|
||||
fconv += mfh[ik].Value * vker[ mfh[ik].Count ].Value;
|
||||
ker_sum += vker[ mfh[ik].Count ].Value;
|
||||
}
|
||||
for (int ik = ker_size - mBtrim; ik < ker_size; ik++)
|
||||
ker_sum += vker[ mfh[ik].Count ].Value;
|
||||
|
||||
return fconv / ker_sum;
|
||||
}
|
||||
else
|
||||
return spr;
|
||||
}
|
||||
|
||||
inline void SetABTrim(int a, int b) { mAtrim = a; mBtrim = b; }
|
||||
inline void SetABTrim(int a, int b) {
|
||||
mAtrim = a;
|
||||
mBtrim = b;
|
||||
}
|
||||
|
||||
private:
|
||||
int mAtrim;
|
||||
int mBtrim;
|
||||
int mAtrim;
|
||||
int mBtrim;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
} // namespace uLib
|
||||
|
||||
#endif // VOXIMAGEFILTERABTRIM_HPP
|
||||
|
||||
Reference in New Issue
Block a user