andrea-dev #1

Merged
andrea merged 19 commits from andrea-dev into main 2026-03-06 17:17:52 +01:00
4 changed files with 76 additions and 7 deletions
Showing only changes of commit f3ebba4931 - Show all commits

View File

@@ -34,6 +34,7 @@
#ifdef USE_CUDA #ifdef USE_CUDA
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <thrust/device_vector.h>
#endif #endif
namespace uLib { namespace uLib {

View File

@@ -37,6 +37,11 @@
#include <Core/SmartPointer.h> #include <Core/SmartPointer.h>
#include <Core/StaticInterface.h> #include <Core/StaticInterface.h>
#ifdef USE_CUDA
#include <thrust/device_ptr.h>
#include <thrust/device_vector.h>
#endif
namespace uLib { namespace uLib {
// MetaAllocator Implementation ... // MetaAllocator Implementation ...
@@ -117,10 +122,6 @@ bool operator!=(const MetaAllocator<T> &, const MetaAllocator<U> &) {
return false; return false;
} }
// Vector Implemetation ... wraps std::vector // Vector Implemetation ... wraps std::vector
template <typename T> class Vector : public std::vector<T, MetaAllocator<T>> { template <typename T> class Vector : public std::vector<T, MetaAllocator<T>> {
typedef std::vector<T, MetaAllocator<T>> BaseClass; typedef std::vector<T, MetaAllocator<T>> BaseClass;
@@ -173,6 +174,42 @@ public:
return nullptr; return nullptr;
} }
#ifdef USE_CUDA
/// Returns a thrust::device_ptr to the VRAM data (valid after MoveToVRAM()).
/// thrust::device_ptr<T> is itself a random-access iterator compatible with
/// all thrust algorithms (thrust::transform, thrust::sort,
/// thrust::for_each…).
thrust::device_ptr<T> DeviceData() {
if (auto alloc = MetaAllocator<T>::GetDataAllocator(BaseClass::data())) {
return thrust::device_pointer_cast(alloc->GetVRAMData());
}
return thrust::device_ptr<T>(nullptr);
}
thrust::device_ptr<const T> DeviceData() const {
if (auto alloc = MetaAllocator<T>::GetDataAllocator(
const_cast<T *>(BaseClass::data()))) {
return thrust::device_pointer_cast(
static_cast<const T *>(alloc->GetVRAMData()));
}
return thrust::device_ptr<const T>(nullptr);
}
/// Device-side begin iterator (valid after MoveToVRAM()).
thrust::device_ptr<T> DeviceBegin() { return DeviceData(); }
/// Device-side end iterator (valid after MoveToVRAM()).
thrust::device_ptr<T> DeviceEnd() {
return DeviceData() + static_cast<std::ptrdiff_t>(BaseClass::size());
}
thrust::device_ptr<const T> DeviceBegin() const { return DeviceData(); }
thrust::device_ptr<const T> DeviceEnd() const {
return DeviceData() + static_cast<std::ptrdiff_t>(BaseClass::size());
}
#endif // USE_CUDA
inline void PrintSelf(std::ostream &o); inline void PrintSelf(std::ostream &o);
// Overrides for auto-sync // // Overrides for auto-sync //

View File

@@ -31,3 +31,8 @@ set(LIBRARIES
${ROOT_LIBRARIES} ${ROOT_LIBRARIES}
) )
uLib_add_tests(Core) uLib_add_tests(Core)
if(USE_CUDA)
set_source_files_properties(VectorMetaAllocatorTest.cpp PROPERTIES LANGUAGE CUDA)
endif()

View File

@@ -12,6 +12,15 @@
#include "testing-prototype.h" #include "testing-prototype.h"
#include <Core/Vector.h> #include <Core/Vector.h>
#ifdef USE_CUDA
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
struct DoubleFunctor {
__host__ __device__ int operator()(int x) const { return x * 2; }
};
#endif
int main() { int main() {
BEGIN_TESTING(VectorMetaAllocator); BEGIN_TESTING(VectorMetaAllocator);
@@ -41,14 +50,31 @@ int main() {
exit(1); exit(1);
} }
// Verify DeviceData() matches GetVRAMData()
{
thrust::device_ptr<int> dev_ptr = v.DeviceData();
if (dev_ptr.get() != vram_ptr) {
std::cout << "Error: DeviceData() does not match GetVRAMData()!\n";
exit(1);
}
std::cout << "DeviceData() matches GetVRAMData(). OK\n";
}
// Use thrust::transform via DeviceBegin()/DeviceEnd() to double all elements
// on device
std::cout << "Doubling elements on device via thrust::transform...\n";
thrust::transform(v.DeviceBegin(), v.DeviceEnd(), v.DeviceBegin(),
DoubleFunctor{});
std::cout << "Moving back to RAM...\n"; std::cout << "Moving back to RAM...\n";
v.MoveToRAM(); v.MoveToRAM();
std::cout << "RAM contents after VRAM trip: "; std::cout << "RAM contents after VRAM trip + thrust transform: ";
for (size_t i = 0; i < v.size(); ++i) { for (size_t i = 0; i < v.size(); ++i) {
std::cout << v[i] << " "; std::cout << v[i] << " ";
if (v[i] != (int)(i + 1)) { if (v[i] != (int)((i + 1) * 2)) {
std::cout << "\nError: Data corrupted after RAM->VRAM->RAM trip at index " std::cout << "\nError: Data corrupted after RAM->VRAM->thrust->RAM trip "
"at index "
<< i << "\n"; << i << "\n";
exit(1); exit(1);
} }