Files
uLib/src/Math/VoxImageFilterABTrim.hpp
2026-03-05 15:03:19 +00:00

293 lines
9.5 KiB
C++

/*//////////////////////////////////////////////////////////////////////////////
// CMT Cosmic Muon Tomography project //////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova
All rights reserved
Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it >
------------------------------------------------------------------
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3.0 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library.
//////////////////////////////////////////////////////////////////////////////*/
#ifndef VOXIMAGEFILTERABTRIM_HPP
#define VOXIMAGEFILTERABTRIM_HPP
#include "Math/VoxImage.h"
#include "VoxImageFilter.h"
#include <Math/Dense.h>
////////////////////////////////////////////////////////////////////////////////
///// VOXIMAGE FILTER ABTRIM /////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
namespace uLib {
#if defined(USE_CUDA) && defined(__CUDACC__)
template <typename VoxelT>
__global__ void ABTrimFilterKernel(const VoxelT *in, VoxelT *out,
const VoxelT *kernel, int vox_size,
int ker_size, int center_count, int mAtrim,
int mBtrim) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < vox_size) {
// Allocate space for sorting
extern __shared__ char shared_mem[];
VoxelT *mfh =
(VoxelT *)&shared_mem[threadIdx.x * ker_size * sizeof(VoxelT)];
for (int i = 0; i < ker_size; ++i) {
mfh[i].Count = i;
}
for (int ik = 0; ik < ker_size; ik++) {
int pos = index + kernel[ik].Count - center_count;
if (pos < 0) {
pos += vox_size * ((-pos / vox_size) + 1);
}
pos = pos % vox_size;
mfh[ik].Value = in[pos].Value;
}
// Simple bubble sort for small arrays
for (int i = 0; i < ker_size - 1; i++) {
for (int j = 0; j < ker_size - i - 1; j++) {
if (mfh[j].Value > mfh[j + 1].Value) {
VoxelT temp = mfh[j];
mfh[j] = mfh[j + 1];
mfh[j + 1] = temp;
}
}
}
float ker_sum = 0;
float fconv = 0;
for (int ik = 0; ik < mAtrim; ik++) {
ker_sum += kernel[mfh[ik].Count].Value;
}
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
fconv += mfh[ik].Value * kernel[mfh[ik].Count].Value;
ker_sum += kernel[mfh[ik].Count].Value;
}
for (int ik = ker_size - mBtrim; ik < ker_size; ik++) {
ker_sum += kernel[mfh[ik].Count].Value;
}
out[index].Value = fconv / ker_sum;
}
}
#endif
template <typename VoxelT>
class VoxFilterAlgorithmAbtrim
: public VoxImageFilter<VoxelT, VoxFilterAlgorithmAbtrim<VoxelT>> {
struct KernelSortAscending {
bool operator()(const VoxelT &e1, const VoxelT &e2) {
return e1.Value < e2.Value;
}
};
public:
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmAbtrim<VoxelT>> BaseClass;
VoxFilterAlgorithmAbtrim(const Vector3i &size) : BaseClass(size) {
mAtrim = 0;
mBtrim = 0;
}
#if defined(USE_CUDA) && defined(__CUDACC__)
void Run() {
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
this->m_Image->Data().MoveToVRAM();
this->m_KernelData.Data().MoveToVRAM();
VoxImage<VoxelT> buffer = *(this->m_Image);
buffer.Data().MoveToVRAM();
int vox_size = buffer.Data().size();
int ker_size = this->m_KernelData.Data().size();
VoxelT *d_img_out = this->m_Image->Data().GetVRAMData();
const VoxelT *d_img_in = buffer.Data().GetVRAMData();
const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData();
int center_count =
this->m_KernelData[this->m_KernelData.GetCenterData()].Count;
int threadsPerBlock = 256;
int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock;
size_t shared_mem_size = threadsPerBlock * ker_size * sizeof(VoxelT);
ABTrimFilterKernel<<<blocksPerGrid, threadsPerBlock, shared_mem_size>>>(
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
mAtrim, mBtrim);
cudaDeviceSynchronize();
} else {
BaseClass::Run();
}
}
#endif
float Evaluate(const VoxImage<VoxelT> &buffer, int index) {
const DataAllocator<VoxelT> &vbuf = buffer.ConstData();
const DataAllocator<VoxelT> &vker = this->m_KernelData.ConstData();
int vox_size = vbuf.size();
int ker_size = vker.size();
int pos;
std::vector<VoxelT> mfh(ker_size);
for (int i = 0; i < ker_size; ++i)
mfh[i].Count = i; // index key for ordering function
for (int ik = 0; ik < ker_size; ik++) {
pos = index + vker[ik].Count -
vker[this->m_KernelData.GetCenterData()].Count;
pos = (pos + vox_size) % vox_size;
mfh[ik].Value = vbuf[pos].Value;
}
std::sort(mfh.begin(), mfh.end(), KernelSortAscending());
float ker_sum = 0;
float fconv = 0;
for (int ik = 0; ik < mAtrim; ik++)
ker_sum += vker[mfh[ik].Count].Value;
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
fconv += mfh[ik].Value * vker[mfh[ik].Count].Value; // convloution //
ker_sum += vker[mfh[ik].Count].Value;
}
for (int ik = ker_size - mBtrim; ik < ker_size; ik++)
ker_sum += vker[mfh[ik].Count].Value;
return fconv / ker_sum;
}
inline void SetABTrim(int a, int b) {
mAtrim = a;
mBtrim = b;
}
private:
int mAtrim;
int mBtrim;
};
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Roberspierre Filter //
template <typename VoxelT>
class VoxFilterAlgorithmSPR
: public VoxImageFilter<VoxelT, VoxFilterAlgorithmSPR<VoxelT>> {
struct KernelSortAscending {
bool operator()(const VoxelT &e1, const VoxelT &e2) {
return e1.Value < e2.Value;
}
};
public:
typedef VoxImageFilter<VoxelT, VoxFilterAlgorithmSPR<VoxelT>> BaseClass;
VoxFilterAlgorithmSPR(const Vector3i &size) : BaseClass(size) {
mAtrim = 0;
mBtrim = 0;
}
#if defined(USE_CUDA) && defined(__CUDACC__)
void Run() {
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
this->m_Image->Data().MoveToVRAM();
this->m_KernelData.Data().MoveToVRAM();
VoxImage<VoxelT> buffer = *(this->m_Image);
buffer.Data().MoveToVRAM();
int vox_size = buffer.Data().size();
int ker_size = this->m_KernelData.Data().size();
VoxelT *d_img_out = this->m_Image->Data().GetVRAMData();
const VoxelT *d_img_in = buffer.Data().GetVRAMData();
const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData();
int center_count =
this->m_KernelData[this->m_KernelData.GetCenterData()].Count;
int threadsPerBlock = 256;
int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock;
size_t shared_mem_size = threadsPerBlock * ker_size * sizeof(VoxelT);
ABTrimFilterKernel<<<blocksPerGrid, threadsPerBlock, shared_mem_size>>>(
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
mAtrim, mBtrim);
cudaDeviceSynchronize();
} else {
BaseClass::Run();
}
}
#endif
float Evaluate(const VoxImage<VoxelT> &buffer, int index) {
const DataAllocator<VoxelT> &vbuf = buffer.ConstData();
const DataAllocator<VoxelT> &vker = this->m_KernelData.ConstData();
int vox_size = vbuf.size();
int ker_size = vker.size();
int pos;
std::vector<VoxelT> mfh(ker_size);
for (int i = 0; i < ker_size; ++i)
mfh[i].Count = i; // index key for ordering function
for (int ik = 0; ik < ker_size; ik++) {
pos = index + vker[ik].Count -
vker[this->m_KernelData.GetCenterData()].Count;
pos = (pos + vox_size) % vox_size;
mfh[ik].Value = vbuf[pos].Value;
}
std::sort(mfh.begin(), mfh.end(), KernelSortAscending());
float spr = vbuf[index].Value;
if ((mAtrim > 0 && spr <= mfh[mAtrim - 1].Value) ||
(mBtrim > 0 && spr >= mfh[ker_size - mBtrim].Value)) {
float ker_sum = 0;
float fconv = 0;
for (int ik = 0; ik < mAtrim; ik++)
ker_sum += vker[mfh[ik].Count].Value;
for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) {
fconv += mfh[ik].Value * vker[mfh[ik].Count].Value;
ker_sum += vker[mfh[ik].Count].Value;
}
for (int ik = ker_size - mBtrim; ik < ker_size; ik++)
ker_sum += vker[mfh[ik].Count].Value;
return fconv / ker_sum;
} else
return spr;
}
inline void SetABTrim(int a, int b) {
mAtrim = a;
mBtrim = b;
}
private:
int mAtrim;
int mBtrim;
};
} // namespace uLib
#endif // VOXIMAGEFILTERABTRIM_HPP