/*////////////////////////////////////////////////////////////////////////////// // CMT Cosmic Muon Tomography project ////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova All rights reserved Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it > ------------------------------------------------------------------ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library. //////////////////////////////////////////////////////////////////////////////*/ #ifndef VOXIMAGEFILTERLINEAR_HPP #define VOXIMAGEFILTERLINEAR_HPP #include "Math/VoxImage.h" #include "VoxImageFilter.h" #include //////////////////////////////////////////////////////////////////////////////// ///// VOXIMAGE FILTER LINEAR ///////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// namespace uLib { #if defined(USE_CUDA) && defined(__CUDACC__) template __global__ void LinearFilterKernel(const VoxelT *in, VoxelT *out, const VoxelT *kernel, int vox_size, int ker_size, int center_count) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < vox_size) { float conv = 0; float ksum = 0; for (int ik = 0; ik < ker_size; ++ik) { int pos = index + kernel[ik].Count - center_count; if (pos < 0) { pos += vox_size * ((-pos / vox_size) + 1); } pos = pos % vox_size; conv += in[pos].Value * kernel[ik].Value; ksum += kernel[ik].Value; } out[index].Value = conv / ksum; } } #endif template class VoxFilterAlgorithmLinear : public VoxImageFilter> { public: typedef VoxImageFilter> BaseClass; VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {} #if defined(USE_CUDA) && defined(__CUDACC__) void Run() { if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM || this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) { this->m_Image->Data().MoveToVRAM(); this->m_KernelData.Data().MoveToVRAM(); VoxImage buffer = *(this->m_Image); buffer.Data().MoveToVRAM(); int vox_size = buffer.Data().size(); int ker_size = this->m_KernelData.Data().size(); VoxelT *d_img_out = this->m_Image->Data().GetVRAMData(); const VoxelT *d_img_in = buffer.Data().GetVRAMData(); const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData(); int center_count = this->m_KernelData[this->m_KernelData.GetCenterData()].Count; int threadsPerBlock = 256; int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock; LinearFilterKernel<<>>( d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count); cudaDeviceSynchronize(); } else { BaseClass::Run(); } } #endif float Evaluate(const VoxImage &buffer, int index) { const DataAllocator &vbuf = buffer.ConstData(); const DataAllocator &vker = this->m_KernelData.ConstData(); int vox_size = vbuf.size(); int ker_size = vker.size(); int pos; float conv = 0, ksum = 0; for (int ik = 0; ik < ker_size; ++ik) { pos = index + vker[ik].Count - vker[this->m_KernelData.GetCenterData()].Count; pos = (pos + vox_size) % vox_size; conv += vbuf[pos].Value * vker[ik].Value; ksum += vker[ik].Value; } return conv / ksum; } }; } // namespace uLib #endif // VOXIMAGEFILTERLINEAR_HPP