uLib/src/Core/DataAllocator.h

/*//////////////////////////////////////////////////////////////////////////////
// CMT Cosmic Muon Tomography project //////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

  Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova
  All rights reserved

  Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it >

  ------------------------------------------------------------------
  This library is free software;  you  can  redistribute  it  and/or
  modify it  under the  terms  of  the  GNU  Lesser  General  Public
  License as published  by  the  Free  Software  Foundation;  either
  version 3.0 of the License, or (at your option) any later version.

  This library is  distributed in  the hope that it will  be useful,
  but  WITHOUT ANY WARRANTY;  without  even  the implied warranty of
  MERCHANTABILITY  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of  the GNU Lesser General  Public
  License along with this library.

//////////////////////////////////////////////////////////////////////////////*/

#ifndef U_MATH_DATAALLOCATOR_H
#define U_MATH_DATAALLOCATOR_H

#include <algorithm>
#include <cstring>
#include <iostream>
#include <stdexcept>
#include <vector>

#ifdef USE_CUDA
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#endif

namespace uLib {

enum class MemoryDevice { RAM, VRAM };

template <typename T> class DataAllocator {
public:
  DataAllocator(size_t size = 0, bool owns_objects = true)
      : m_Size(size), m_RamData(nullptr), m_VramData(nullptr),
        m_Device(MemoryDevice::RAM), m_OwnsObjects(owns_objects) {
    if (m_Size > 0) {
      if (m_OwnsObjects)
        m_RamData = new T[m_Size]();
      else
        m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
    }
  }

  DataAllocator(const DataAllocator<T> &other)
      : m_Size(other.m_Size), m_RamData(nullptr), m_VramData(nullptr),
        m_Device(other.m_Device), m_OwnsObjects(other.m_OwnsObjects) {
    if (m_Size > 0) {
      if (other.m_RamData) {
        if (m_OwnsObjects)
          m_RamData = new T[m_Size];
        else
          m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
        std::memcpy(m_RamData, other.m_RamData, m_Size * sizeof(T));
      }
#ifdef USE_CUDA
      if (other.m_VramData) {
        cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
        cudaMemcpy(m_VramData, other.m_VramData, m_Size * sizeof(T),
                   cudaMemcpyDeviceToDevice);
      }
#endif
    }
  }

  ~DataAllocator() {
    if (m_RamData) {
      if (m_OwnsObjects)
        delete[] m_RamData;
      else
        ::operator delete(m_RamData);
    }
#ifdef USE_CUDA
    if (m_VramData) {
      cudaFree(m_VramData);
    }
#endif
  }

  DataAllocator &operator=(const DataAllocator &other) {
    if (this != &other) {
      m_OwnsObjects = other.m_OwnsObjects;
      resize(other.m_Size);
      m_Device = other.m_Device;
      if (other.m_RamData) {
        if (!m_RamData) {
          if (m_OwnsObjects)
            m_RamData = new T[m_Size];
          else
            m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
        }
        std::memcpy(m_RamData, other.m_RamData, m_Size * sizeof(T));
      }
#ifdef USE_CUDA
      if (other.m_VramData) {
        if (!m_VramData)
          cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
        cudaMemcpy(m_VramData, other.m_VramData, m_Size * sizeof(T),
                   cudaMemcpyDeviceToDevice);
      }
#endif
    }
    return *this;
  }

  void MoveToRAM() {
    if (m_Device == MemoryDevice::RAM)
      return;
    if (!m_RamData && m_Size > 0) {
      if (m_OwnsObjects)
        m_RamData = new T[m_Size]();
      else
        m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
    }
#ifdef USE_CUDA
    if (m_VramData && m_Size > 0) {
      cudaMemcpy(m_RamData, m_VramData, m_Size * sizeof(T),
                 cudaMemcpyDeviceToHost);
    }
#endif
    m_Device = MemoryDevice::RAM;
  }

  void MoveToVRAM() {
    if (m_Device == MemoryDevice::VRAM)
      return;
#ifdef USE_CUDA
    if (!m_VramData && m_Size > 0) {
      cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
    }
    if (m_RamData && m_Size > 0) {
      cudaMemcpy(m_VramData, m_RamData, m_Size * sizeof(T),
                 cudaMemcpyHostToDevice);
    }
#endif
    m_Device = MemoryDevice::VRAM;
  }

  void resize(size_t size) {
    if (m_Size == size)
      return;

    T *newRam = nullptr;
    T *newVram = nullptr;

    if (size > 0) {
      if (m_OwnsObjects)
        newRam = new T[size]();
      else
        newRam = static_cast<T *>(::operator new(size * sizeof(T)));

      if (m_RamData) {
        std::memcpy(newRam, m_RamData, std::min(m_Size, size) * sizeof(T));
      }

#ifdef USE_CUDA
      cudaMalloc((void **)&newVram, size * sizeof(T));
      if (m_VramData) {
        cudaMemcpy(newVram, m_VramData, std::min(m_Size, size) * sizeof(T),
                   cudaMemcpyDeviceToDevice);
      }
#endif
    }

    if (m_RamData) {
      if (m_OwnsObjects)
        delete[] m_RamData;
      else
        ::operator delete(m_RamData);
    }
#ifdef USE_CUDA
    if (m_VramData)
      cudaFree(m_VramData);
#endif

    m_Size = size;
    m_RamData = newRam;
    m_VramData = newVram;
  }

  size_t size() const { return m_Size; }

  T &at(size_t index) {
    MoveToRAM();
    if (index >= m_Size)
      throw std::out_of_range("Index out of range");
    return m_RamData[index];
  }

  const T &at(size_t index) const {
    const_cast<DataAllocator *>(this)->MoveToRAM();
    if (index >= m_Size)
      throw std::out_of_range("Index out of range");
    return m_RamData[index];
  }

  T &operator[](size_t index) {
    MoveToRAM();
    return m_RamData[index];
  }

  const T &operator[](size_t index) const {
    const_cast<DataAllocator *>(this)->MoveToRAM();
    return m_RamData[index];
  }

  T *data() { return (m_Device == MemoryDevice::RAM) ? m_RamData : m_VramData; }
  const T *data() const {
    return (m_Device == MemoryDevice::RAM) ? m_RamData : m_VramData;
  }

  T *GetRAMData() { return m_RamData; }
  const T *GetRAMData() const { return m_RamData; }

  T *GetVRAMData() { return m_VramData; }
  const T *GetVRAMData() const { return m_VramData; }

  MemoryDevice GetDevice() const { return m_Device; }

  // Iterator support for RAM operations
  T *begin() {
    MoveToRAM();
    return m_RamData;
  }
  T *end() {
    MoveToRAM();
    return m_RamData + m_Size;
  }
  const T *begin() const {
    const_cast<DataAllocator *>(this)->MoveToRAM();
    return m_RamData;
  }
  const T *end() const {
    const_cast<DataAllocator *>(this)->MoveToRAM();
    return m_RamData + m_Size;
  }

private:
  size_t m_Size;
  T *m_RamData;
  T *m_VramData;
  MemoryDevice m_Device;
  bool m_OwnsObjects;
};

} // namespace uLib

#endif // U_MATH_DATAALLOCATOR_H