feat: Implement a custom MetaAllocator for uLib::Vector to enable GPU memory management and integrate CUDA support into the build system.
This commit is contained in:
259
src/Core/DataAllocator.h
Normal file
259
src/Core/DataAllocator.h
Normal file
@@ -0,0 +1,259 @@
|
||||
/*//////////////////////////////////////////////////////////////////////////////
|
||||
// CMT Cosmic Muon Tomography project //////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova
|
||||
All rights reserved
|
||||
|
||||
Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it >
|
||||
|
||||
------------------------------------------------------------------
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 3.0 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library.
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
#ifndef U_MATH_DATAALLOCATOR_H
|
||||
#define U_MATH_DATAALLOCATOR_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
namespace uLib {
|
||||
|
||||
enum class MemoryDevice { RAM, VRAM };
|
||||
|
||||
template <typename T> class DataAllocator {
|
||||
public:
|
||||
DataAllocator(size_t size = 0, bool owns_objects = true)
|
||||
: m_Size(size), m_RamData(nullptr), m_VramData(nullptr),
|
||||
m_Device(MemoryDevice::RAM), m_OwnsObjects(owns_objects) {
|
||||
if (m_Size > 0) {
|
||||
if (m_OwnsObjects)
|
||||
m_RamData = new T[m_Size]();
|
||||
else
|
||||
m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
|
||||
}
|
||||
}
|
||||
|
||||
DataAllocator(const DataAllocator<T> &other)
|
||||
: m_Size(other.m_Size), m_RamData(nullptr), m_VramData(nullptr),
|
||||
m_Device(other.m_Device), m_OwnsObjects(other.m_OwnsObjects) {
|
||||
if (m_Size > 0) {
|
||||
if (other.m_RamData) {
|
||||
if (m_OwnsObjects)
|
||||
m_RamData = new T[m_Size];
|
||||
else
|
||||
m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
|
||||
std::memcpy(m_RamData, other.m_RamData, m_Size * sizeof(T));
|
||||
}
|
||||
#ifdef USE_CUDA
|
||||
if (other.m_VramData) {
|
||||
cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
|
||||
cudaMemcpy(m_VramData, other.m_VramData, m_Size * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
~DataAllocator() {
|
||||
if (m_RamData) {
|
||||
if (m_OwnsObjects)
|
||||
delete[] m_RamData;
|
||||
else
|
||||
::operator delete(m_RamData);
|
||||
}
|
||||
#ifdef USE_CUDA
|
||||
if (m_VramData) {
|
||||
cudaFree(m_VramData);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
DataAllocator &operator=(const DataAllocator &other) {
|
||||
if (this != &other) {
|
||||
m_OwnsObjects = other.m_OwnsObjects;
|
||||
resize(other.m_Size);
|
||||
m_Device = other.m_Device;
|
||||
if (other.m_RamData) {
|
||||
if (!m_RamData) {
|
||||
if (m_OwnsObjects)
|
||||
m_RamData = new T[m_Size];
|
||||
else
|
||||
m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
|
||||
}
|
||||
std::memcpy(m_RamData, other.m_RamData, m_Size * sizeof(T));
|
||||
}
|
||||
#ifdef USE_CUDA
|
||||
if (other.m_VramData) {
|
||||
if (!m_VramData)
|
||||
cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
|
||||
cudaMemcpy(m_VramData, other.m_VramData, m_Size * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void MoveToRAM() {
|
||||
if (m_Device == MemoryDevice::RAM)
|
||||
return;
|
||||
if (!m_RamData && m_Size > 0) {
|
||||
if (m_OwnsObjects)
|
||||
m_RamData = new T[m_Size]();
|
||||
else
|
||||
m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
|
||||
}
|
||||
#ifdef USE_CUDA
|
||||
if (m_VramData && m_Size > 0) {
|
||||
cudaMemcpy(m_RamData, m_VramData, m_Size * sizeof(T),
|
||||
cudaMemcpyDeviceToHost);
|
||||
}
|
||||
#endif
|
||||
m_Device = MemoryDevice::RAM;
|
||||
}
|
||||
|
||||
void MoveToVRAM() {
|
||||
if (m_Device == MemoryDevice::VRAM)
|
||||
return;
|
||||
#ifdef USE_CUDA
|
||||
if (!m_VramData && m_Size > 0) {
|
||||
cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
|
||||
}
|
||||
if (m_RamData && m_Size > 0) {
|
||||
cudaMemcpy(m_VramData, m_RamData, m_Size * sizeof(T),
|
||||
cudaMemcpyHostToDevice);
|
||||
}
|
||||
#endif
|
||||
m_Device = MemoryDevice::VRAM;
|
||||
}
|
||||
|
||||
void resize(size_t size) {
|
||||
if (m_Size == size)
|
||||
return;
|
||||
|
||||
T *newRam = nullptr;
|
||||
T *newVram = nullptr;
|
||||
|
||||
if (size > 0) {
|
||||
if (m_OwnsObjects)
|
||||
newRam = new T[size]();
|
||||
else
|
||||
newRam = static_cast<T *>(::operator new(size * sizeof(T)));
|
||||
|
||||
if (m_RamData) {
|
||||
std::memcpy(newRam, m_RamData, std::min(m_Size, size) * sizeof(T));
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
cudaMalloc((void **)&newVram, size * sizeof(T));
|
||||
if (m_VramData) {
|
||||
cudaMemcpy(newVram, m_VramData, std::min(m_Size, size) * sizeof(T),
|
||||
cudaMemcpyDeviceToDevice);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (m_RamData) {
|
||||
if (m_OwnsObjects)
|
||||
delete[] m_RamData;
|
||||
else
|
||||
::operator delete(m_RamData);
|
||||
}
|
||||
#ifdef USE_CUDA
|
||||
if (m_VramData)
|
||||
cudaFree(m_VramData);
|
||||
#endif
|
||||
|
||||
m_Size = size;
|
||||
m_RamData = newRam;
|
||||
m_VramData = newVram;
|
||||
}
|
||||
|
||||
size_t size() const { return m_Size; }
|
||||
|
||||
T &at(size_t index) {
|
||||
MoveToRAM();
|
||||
if (index >= m_Size)
|
||||
throw std::out_of_range("Index out of range");
|
||||
return m_RamData[index];
|
||||
}
|
||||
|
||||
const T &at(size_t index) const {
|
||||
const_cast<DataAllocator *>(this)->MoveToRAM();
|
||||
if (index >= m_Size)
|
||||
throw std::out_of_range("Index out of range");
|
||||
return m_RamData[index];
|
||||
}
|
||||
|
||||
T &operator[](size_t index) {
|
||||
MoveToRAM();
|
||||
return m_RamData[index];
|
||||
}
|
||||
|
||||
const T &operator[](size_t index) const {
|
||||
const_cast<DataAllocator *>(this)->MoveToRAM();
|
||||
return m_RamData[index];
|
||||
}
|
||||
|
||||
T *data() { return (m_Device == MemoryDevice::RAM) ? m_RamData : m_VramData; }
|
||||
const T *data() const {
|
||||
return (m_Device == MemoryDevice::RAM) ? m_RamData : m_VramData;
|
||||
}
|
||||
|
||||
T *GetRAMData() { return m_RamData; }
|
||||
const T *GetRAMData() const { return m_RamData; }
|
||||
|
||||
T *GetVRAMData() { return m_VramData; }
|
||||
const T *GetVRAMData() const { return m_VramData; }
|
||||
|
||||
MemoryDevice GetDevice() const { return m_Device; }
|
||||
|
||||
// Iterator support for RAM operations
|
||||
T *begin() {
|
||||
MoveToRAM();
|
||||
return m_RamData;
|
||||
}
|
||||
T *end() {
|
||||
MoveToRAM();
|
||||
return m_RamData + m_Size;
|
||||
}
|
||||
const T *begin() const {
|
||||
const_cast<DataAllocator *>(this)->MoveToRAM();
|
||||
return m_RamData;
|
||||
}
|
||||
const T *end() const {
|
||||
const_cast<DataAllocator *>(this)->MoveToRAM();
|
||||
return m_RamData + m_Size;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t m_Size;
|
||||
T *m_RamData;
|
||||
T *m_VramData;
|
||||
MemoryDevice m_Device;
|
||||
bool m_OwnsObjects;
|
||||
};
|
||||
|
||||
} // namespace uLib
|
||||
|
||||
#endif // U_MATH_DATAALLOCATOR_H
|
||||
Reference in New Issue
Block a user