/*////////////////////////////////////////////////////////////////////////////// // CMT Cosmic Muon Tomography project ////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova All rights reserved Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it > ------------------------------------------------------------------ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library. //////////////////////////////////////////////////////////////////////////////*/ #ifndef VOXIMAGEFILTERABTRIM_HPP #define VOXIMAGEFILTERABTRIM_HPP #include "Math/VoxImage.h" #include "VoxImageFilter.h" #include //////////////////////////////////////////////////////////////////////////////// ///// VOXIMAGE FILTER ABTRIM ///////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// namespace uLib { #if defined(USE_CUDA) && defined(__CUDACC__) template __global__ void ABTrimFilterKernel(const VoxelT *in, VoxelT *out, const VoxelT *kernel, int vox_size, int ker_size, int center_count, int mAtrim, int mBtrim) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < vox_size) { // Allocate space for sorting extern __shared__ char shared_mem[]; VoxelT *mfh = (VoxelT *)&shared_mem[threadIdx.x * ker_size * sizeof(VoxelT)]; for (int i = 0; i < ker_size; ++i) { mfh[i].Count = i; } for (int ik = 0; ik < ker_size; ik++) { int pos = index + kernel[ik].Count - center_count; if (pos < 0) { pos += vox_size * ((-pos / vox_size) + 1); } pos = pos % vox_size; mfh[ik].Value = in[pos].Value; } // Simple bubble sort for small arrays for (int i = 0; i < ker_size - 1; i++) { for (int j = 0; j < ker_size - i - 1; j++) { if (mfh[j].Value > mfh[j + 1].Value) { VoxelT temp = mfh[j]; mfh[j] = mfh[j + 1]; mfh[j + 1] = temp; } } } float ker_sum = 0; float fconv = 0; for (int ik = 0; ik < mAtrim; ik++) { ker_sum += kernel[mfh[ik].Count].Value; } for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) { fconv += mfh[ik].Value * kernel[mfh[ik].Count].Value; ker_sum += kernel[mfh[ik].Count].Value; } for (int ik = ker_size - mBtrim; ik < ker_size; ik++) { ker_sum += kernel[mfh[ik].Count].Value; } out[index].Value = fconv / ker_sum; } } #endif template class VoxFilterAlgorithmAbtrim : public VoxImageFilter> { struct KernelSortAscending { bool operator()(const VoxelT &e1, const VoxelT &e2) { return e1.Value < e2.Value; } }; public: typedef VoxImageFilter> BaseClass; VoxFilterAlgorithmAbtrim(const Vector3i &size) : BaseClass(size) { mAtrim = 0; mBtrim = 0; } #if defined(USE_CUDA) && defined(__CUDACC__) void Run() { if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM || this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) { this->m_Image->Data().MoveToVRAM(); this->m_KernelData.Data().MoveToVRAM(); VoxImage buffer = *(this->m_Image); buffer.Data().MoveToVRAM(); int vox_size = buffer.Data().size(); int ker_size = this->m_KernelData.Data().size(); VoxelT *d_img_out = this->m_Image->Data().GetVRAMData(); const VoxelT *d_img_in = buffer.Data().GetVRAMData(); const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData(); int center_count = this->m_KernelData[this->m_KernelData.GetCenterData()].Count; int threadsPerBlock = 256; int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock; size_t shared_mem_size = threadsPerBlock * ker_size * sizeof(VoxelT); ABTrimFilterKernel<<>>( d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count, mAtrim, mBtrim); cudaDeviceSynchronize(); } else { BaseClass::Run(); } } #endif float Evaluate(const VoxImage &buffer, int index) { const DataAllocator &vbuf = buffer.ConstData(); const DataAllocator &vker = this->m_KernelData.ConstData(); int vox_size = vbuf.size(); int ker_size = vker.size(); int pos; std::vector mfh(ker_size); for (int i = 0; i < ker_size; ++i) mfh[i].Count = i; // index key for ordering function for (int ik = 0; ik < ker_size; ik++) { pos = index + vker[ik].Count - vker[this->m_KernelData.GetCenterData()].Count; pos = (pos + vox_size) % vox_size; mfh[ik].Value = vbuf[pos].Value; } std::sort(mfh.begin(), mfh.end(), KernelSortAscending()); float ker_sum = 0; float fconv = 0; for (int ik = 0; ik < mAtrim; ik++) ker_sum += vker[mfh[ik].Count].Value; for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) { fconv += mfh[ik].Value * vker[mfh[ik].Count].Value; // convloution // ker_sum += vker[mfh[ik].Count].Value; } for (int ik = ker_size - mBtrim; ik < ker_size; ik++) ker_sum += vker[mfh[ik].Count].Value; return fconv / ker_sum; } inline void SetABTrim(int a, int b) { mAtrim = a; mBtrim = b; } private: int mAtrim; int mBtrim; }; //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Roberspierre Filter // template class VoxFilterAlgorithmSPR : public VoxImageFilter> { struct KernelSortAscending { bool operator()(const VoxelT &e1, const VoxelT &e2) { return e1.Value < e2.Value; } }; public: typedef VoxImageFilter> BaseClass; VoxFilterAlgorithmSPR(const Vector3i &size) : BaseClass(size) { mAtrim = 0; mBtrim = 0; } #if defined(USE_CUDA) && defined(__CUDACC__) void Run() { if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM || this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) { this->m_Image->Data().MoveToVRAM(); this->m_KernelData.Data().MoveToVRAM(); VoxImage buffer = *(this->m_Image); buffer.Data().MoveToVRAM(); int vox_size = buffer.Data().size(); int ker_size = this->m_KernelData.Data().size(); VoxelT *d_img_out = this->m_Image->Data().GetVRAMData(); const VoxelT *d_img_in = buffer.Data().GetVRAMData(); const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData(); int center_count = this->m_KernelData[this->m_KernelData.GetCenterData()].Count; int threadsPerBlock = 256; int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock; size_t shared_mem_size = threadsPerBlock * ker_size * sizeof(VoxelT); ABTrimFilterKernel<<>>( d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count, mAtrim, mBtrim); cudaDeviceSynchronize(); } else { BaseClass::Run(); } } #endif float Evaluate(const VoxImage &buffer, int index) { const DataAllocator &vbuf = buffer.ConstData(); const DataAllocator &vker = this->m_KernelData.ConstData(); int vox_size = vbuf.size(); int ker_size = vker.size(); int pos; std::vector mfh(ker_size); for (int i = 0; i < ker_size; ++i) mfh[i].Count = i; // index key for ordering function for (int ik = 0; ik < ker_size; ik++) { pos = index + vker[ik].Count - vker[this->m_KernelData.GetCenterData()].Count; pos = (pos + vox_size) % vox_size; mfh[ik].Value = vbuf[pos].Value; } std::sort(mfh.begin(), mfh.end(), KernelSortAscending()); float spr = vbuf[index].Value; if ((mAtrim > 0 && spr <= mfh[mAtrim - 1].Value) || (mBtrim > 0 && spr >= mfh[ker_size - mBtrim].Value)) { float ker_sum = 0; float fconv = 0; for (int ik = 0; ik < mAtrim; ik++) ker_sum += vker[mfh[ik].Count].Value; for (int ik = mAtrim; ik < ker_size - mBtrim; ik++) { fconv += mfh[ik].Value * vker[mfh[ik].Count].Value; ker_sum += vker[mfh[ik].Count].Value; } for (int ik = ker_size - mBtrim; ik < ker_size; ik++) ker_sum += vker[mfh[ik].Count].Value; return fconv / ker_sum; } else return spr; } inline void SetABTrim(int a, int b) { mAtrim = a; mBtrim = b; } private: int mAtrim; int mBtrim; }; } // namespace uLib #endif // VOXIMAGEFILTERABTRIM_HPP