feat: Implement a custom MetaAllocator for uLib::Vector to enable GPU memory management and integrate CUDA support into the build system.

2026-03-04 20:52:01 +00:00
parent adedbcc37c
commit 9a59e031ed
10 changed files with 540 additions and 188 deletions
--- a/src/Core/DataAllocator.h
+++ b/src/Core/DataAllocator.h
@@ -0,0 +1,259 @@
+/*//////////////////////////////////////////////////////////////////////////////
+// CMT Cosmic Muon Tomography project //////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
+
+  Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova
+  All rights reserved
+
+  Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it >
+
+  ------------------------------------------------------------------
+  This library is free software;  you  can  redistribute  it  and/or
+  modify it  under the  terms  of  the  GNU  Lesser  General  Public
+  License as published  by  the  Free  Software  Foundation;  either
+  version 3.0 of the License, or (at your option) any later version.
+
+  This library is  distributed in  the hope that it will  be useful,
+  but  WITHOUT ANY WARRANTY;  without  even  the implied warranty of
+  MERCHANTABILITY  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of  the GNU Lesser General  Public
+  License along with this library.
+
+//////////////////////////////////////////////////////////////////////////////*/
+
+#ifndef U_MATH_DATAALLOCATOR_H
+#define U_MATH_DATAALLOCATOR_H
+
+#include <algorithm>
+#include <cstring>
+#include <iostream>
+#include <stdexcept>
+#include <vector>
+
+#ifdef USE_CUDA
+#include <cuda_runtime.h>
+#endif
+
+namespace uLib {
+
+enum class MemoryDevice { RAM, VRAM };
+
+template <typename T> class DataAllocator {
+public:
+  DataAllocator(size_t size = 0, bool owns_objects = true)
+      : m_Size(size), m_RamData(nullptr), m_VramData(nullptr),
+        m_Device(MemoryDevice::RAM), m_OwnsObjects(owns_objects) {
+    if (m_Size > 0) {
+      if (m_OwnsObjects)
+        m_RamData = new T[m_Size]();
+      else
+        m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
+    }
+  }
+
+  DataAllocator(const DataAllocator<T> &other)
+      : m_Size(other.m_Size), m_RamData(nullptr), m_VramData(nullptr),
+        m_Device(other.m_Device), m_OwnsObjects(other.m_OwnsObjects) {
+    if (m_Size > 0) {
+      if (other.m_RamData) {
+        if (m_OwnsObjects)
+          m_RamData = new T[m_Size];
+        else
+          m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
+        std::memcpy(m_RamData, other.m_RamData, m_Size * sizeof(T));
+      }
+#ifdef USE_CUDA
+      if (other.m_VramData) {
+        cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
+        cudaMemcpy(m_VramData, other.m_VramData, m_Size * sizeof(T),
+                   cudaMemcpyDeviceToDevice);
+      }
+#endif
+    }
+  }
+
+  ~DataAllocator() {
+    if (m_RamData) {
+      if (m_OwnsObjects)
+        delete[] m_RamData;
+      else
+        ::operator delete(m_RamData);
+    }
+#ifdef USE_CUDA
+    if (m_VramData) {
+      cudaFree(m_VramData);
+    }
+#endif
+  }
+
+  DataAllocator &operator=(const DataAllocator &other) {
+    if (this != &other) {
+      m_OwnsObjects = other.m_OwnsObjects;
+      resize(other.m_Size);
+      m_Device = other.m_Device;
+      if (other.m_RamData) {
+        if (!m_RamData) {
+          if (m_OwnsObjects)
+            m_RamData = new T[m_Size];
+          else
+            m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
+        }
+        std::memcpy(m_RamData, other.m_RamData, m_Size * sizeof(T));
+      }
+#ifdef USE_CUDA
+      if (other.m_VramData) {
+        if (!m_VramData)
+          cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
+        cudaMemcpy(m_VramData, other.m_VramData, m_Size * sizeof(T),
+                   cudaMemcpyDeviceToDevice);
+      }
+#endif
+    }
+    return *this;
+  }
+
+  void MoveToRAM() {
+    if (m_Device == MemoryDevice::RAM)
+      return;
+    if (!m_RamData && m_Size > 0) {
+      if (m_OwnsObjects)
+        m_RamData = new T[m_Size]();
+      else
+        m_RamData = static_cast<T *>(::operator new(m_Size * sizeof(T)));
+    }
+#ifdef USE_CUDA
+    if (m_VramData && m_Size > 0) {
+      cudaMemcpy(m_RamData, m_VramData, m_Size * sizeof(T),
+                 cudaMemcpyDeviceToHost);
+    }
+#endif
+    m_Device = MemoryDevice::RAM;
+  }
+
+  void MoveToVRAM() {
+    if (m_Device == MemoryDevice::VRAM)
+      return;
+#ifdef USE_CUDA
+    if (!m_VramData && m_Size > 0) {
+      cudaMalloc((void **)&m_VramData, m_Size * sizeof(T));
+    }
+    if (m_RamData && m_Size > 0) {
+      cudaMemcpy(m_VramData, m_RamData, m_Size * sizeof(T),
+                 cudaMemcpyHostToDevice);
+    }
+#endif
+    m_Device = MemoryDevice::VRAM;
+  }
+
+  void resize(size_t size) {
+    if (m_Size == size)
+      return;
+
+    T *newRam = nullptr;
+    T *newVram = nullptr;
+
+    if (size > 0) {
+      if (m_OwnsObjects)
+        newRam = new T[size]();
+      else
+        newRam = static_cast<T *>(::operator new(size * sizeof(T)));
+
+      if (m_RamData) {
+        std::memcpy(newRam, m_RamData, std::min(m_Size, size) * sizeof(T));
+      }
+
+#ifdef USE_CUDA
+      cudaMalloc((void **)&newVram, size * sizeof(T));
+      if (m_VramData) {
+        cudaMemcpy(newVram, m_VramData, std::min(m_Size, size) * sizeof(T),
+                   cudaMemcpyDeviceToDevice);
+      }
+#endif
+    }
+
+    if (m_RamData) {
+      if (m_OwnsObjects)
+        delete[] m_RamData;
+      else
+        ::operator delete(m_RamData);
+    }
+#ifdef USE_CUDA
+    if (m_VramData)
+      cudaFree(m_VramData);
+#endif
+
+    m_Size = size;
+    m_RamData = newRam;
+    m_VramData = newVram;
+  }
+
+  size_t size() const { return m_Size; }
+
+  T &at(size_t index) {
+    MoveToRAM();
+    if (index >= m_Size)
+      throw std::out_of_range("Index out of range");
+    return m_RamData[index];
+  }
+
+  const T &at(size_t index) const {
+    const_cast<DataAllocator *>(this)->MoveToRAM();
+    if (index >= m_Size)
+      throw std::out_of_range("Index out of range");
+    return m_RamData[index];
+  }
+
+  T &operator[](size_t index) {
+    MoveToRAM();
+    return m_RamData[index];
+  }
+
+  const T &operator[](size_t index) const {
+    const_cast<DataAllocator *>(this)->MoveToRAM();
+    return m_RamData[index];
+  }
+
+  T *data() { return (m_Device == MemoryDevice::RAM) ? m_RamData : m_VramData; }
+  const T *data() const {
+    return (m_Device == MemoryDevice::RAM) ? m_RamData : m_VramData;
+  }
+
+  T *GetRAMData() { return m_RamData; }
+  const T *GetRAMData() const { return m_RamData; }
+
+  T *GetVRAMData() { return m_VramData; }
+  const T *GetVRAMData() const { return m_VramData; }
+
+  MemoryDevice GetDevice() const { return m_Device; }
+
+  // Iterator support for RAM operations
+  T *begin() {
+    MoveToRAM();
+    return m_RamData;
+  }
+  T *end() {
+    MoveToRAM();
+    return m_RamData + m_Size;
+  }
+  const T *begin() const {
+    const_cast<DataAllocator *>(this)->MoveToRAM();
+    return m_RamData;
+  }
+  const T *end() const {
+    const_cast<DataAllocator *>(this)->MoveToRAM();
+    return m_RamData + m_Size;
+  }
+
+private:
+  size_t m_Size;
+  T *m_RamData;
+  T *m_VramData;
+  MemoryDevice m_Device;
+  bool m_OwnsObjects;
+};
+
+} // namespace uLib
+
+#endif // U_MATH_DATAALLOCATOR_H