/*////////////////////////////////////////////////////////////////////////////// // CMT Cosmic Muon Tomography project ////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova All rights reserved Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it > ------------------------------------------------------------------ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library. //////////////////////////////////////////////////////////////////////////////*/ #ifndef VOXIMAGEFILTERLINEAR_HPP #define VOXIMAGEFILTERLINEAR_HPP #include "Math/VoxImage.h" #include "VoxImageFilter.h" #include

////////////////////////////////////////////////////////////////////////////////
/////  VOXIMAGE FILTER LINEAR  /////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

namespace uLib {

#if defined(USE_CUDA) && defined(__CUDACC__)
template__global__ void LinearFilterKernel(const VoxelT *in, VoxelT *out,
                                   const VoxelT *kernel, int vox_size,
                                   int ker_size, int center_count) {
  int index = blockIdx.x * blockDim.x + threadIdx.x;
  if (index < vox_size) {
    float conv = 0;
    float ksum = 0;
    for (int ik = 0; ik < ker_size; ++ik) {
      int pos = index + kernel[ik].Count - center_count;
      if (pos < 0) {
        pos += vox_size * ((-pos / vox_size) + 1);
      }
      pos = pos % vox_size;
      conv += in[pos].Value * kernel[ik].Value;
      ksum += kernel[ik].Value;
    }
    out[index].Value = conv / ksum;
  }
}
#endif

template class VoxFilterAlgorithmLinear
    : public VoxImageFilter > {
public:
  typedef VoxImageFilter > BaseClass;
  VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {}

#if defined(USE_CUDA) && defined(__CUDACC__)
  void Run() {
    if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
        this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {

      this->m_Image->Data().MoveToVRAM();
      this->m_KernelData.Data().MoveToVRAM();

      VoxImage buffer = *(this->m_Image);
      buffer.Data().MoveToVRAM();

      int vox_size = buffer.Data().size();
      int ker_size = this->m_KernelData.Data().size();

      VoxelT *d_img_out = this->m_Image->Data().GetVRAMData();
      const VoxelT *d_img_in = buffer.Data().GetVRAMData();
      const VoxelT *d_kernel = this->m_KernelData.Data().GetVRAMData();
      int center_count =
          this->m_KernelData[this->m_KernelData.GetCenterData()].Count;

      int threadsPerBlock = 256;
      int blocksPerGrid = (vox_size + threadsPerBlock - 1) / threadsPerBlock;

      LinearFilterKernel<< >>(
          d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count);
      cudaDeviceSynchronize();
    } else {
      BaseClass::Run();
    }
  }
#endif

  float Evaluate(const VoxImage &buffer, int index) {
    const DataAllocator &vbuf = buffer.ConstData();
    const DataAllocator &vker = this->m_KernelData.ConstData();
    int vox_size = vbuf.size();
    int ker_size = vker.size();
    int pos;
    float conv = 0, ksum = 0;
    for (int ik = 0; ik < ker_size; ++ik) {
      pos = index + vker[ik].Count -
            vker[this->m_KernelData.GetCenterData()].Count;
      pos = (pos + vox_size) % vox_size;
      conv += vbuf[pos].Value * vker[ik].Value;
      ksum += vker[ik].Value;
    }
    return conv / ksum;
  }
};

} // namespace uLib

#endif // VOXIMAGEFILTERLINEAR_HPP