diff --git a/src/Core/DataAllocator.h b/src/Core/DataAllocator.h index 147dd9f..7e5a7a3 100644 --- a/src/Core/DataAllocator.h +++ b/src/Core/DataAllocator.h @@ -34,6 +34,7 @@ #ifdef USE_CUDA #include +#include #endif namespace uLib { diff --git a/src/Core/Vector.h b/src/Core/Vector.h index d7cf928..4ff69c7 100644 --- a/src/Core/Vector.h +++ b/src/Core/Vector.h @@ -37,6 +37,11 @@ #include #include +#ifdef USE_CUDA +#include +#include +#endif + namespace uLib { // MetaAllocator Implementation ... @@ -117,10 +122,6 @@ bool operator!=(const MetaAllocator &, const MetaAllocator &) { return false; } - - - - // Vector Implemetation ... wraps std::vector template class Vector : public std::vector> { typedef std::vector> BaseClass; @@ -173,6 +174,42 @@ public: return nullptr; } +#ifdef USE_CUDA + /// Returns a thrust::device_ptr to the VRAM data (valid after MoveToVRAM()). + /// thrust::device_ptr is itself a random-access iterator compatible with + /// all thrust algorithms (thrust::transform, thrust::sort, + /// thrust::for_each…). + thrust::device_ptr DeviceData() { + if (auto alloc = MetaAllocator::GetDataAllocator(BaseClass::data())) { + return thrust::device_pointer_cast(alloc->GetVRAMData()); + } + return thrust::device_ptr(nullptr); + } + + thrust::device_ptr DeviceData() const { + if (auto alloc = MetaAllocator::GetDataAllocator( + const_cast(BaseClass::data()))) { + return thrust::device_pointer_cast( + static_cast(alloc->GetVRAMData())); + } + return thrust::device_ptr(nullptr); + } + + /// Device-side begin iterator (valid after MoveToVRAM()). + thrust::device_ptr DeviceBegin() { return DeviceData(); } + + /// Device-side end iterator (valid after MoveToVRAM()). + thrust::device_ptr DeviceEnd() { + return DeviceData() + static_cast(BaseClass::size()); + } + + thrust::device_ptr DeviceBegin() const { return DeviceData(); } + + thrust::device_ptr DeviceEnd() const { + return DeviceData() + static_cast(BaseClass::size()); + } +#endif // USE_CUDA + inline void PrintSelf(std::ostream &o); // Overrides for auto-sync // diff --git a/src/Core/testing/CMakeLists.txt b/src/Core/testing/CMakeLists.txt index 82dc679..7d08dde 100644 --- a/src/Core/testing/CMakeLists.txt +++ b/src/Core/testing/CMakeLists.txt @@ -31,3 +31,8 @@ set(LIBRARIES ${ROOT_LIBRARIES} ) uLib_add_tests(Core) + +if(USE_CUDA) + set_source_files_properties(VectorMetaAllocatorTest.cpp PROPERTIES LANGUAGE CUDA) +endif() + diff --git a/src/Core/testing/VectorMetaAllocatorTest.cpp b/src/Core/testing/VectorMetaAllocatorTest.cpp index c75a643..a0c65ce 100644 --- a/src/Core/testing/VectorMetaAllocatorTest.cpp +++ b/src/Core/testing/VectorMetaAllocatorTest.cpp @@ -12,6 +12,15 @@ #include "testing-prototype.h" #include +#ifdef USE_CUDA +#include +#include + +struct DoubleFunctor { + __host__ __device__ int operator()(int x) const { return x * 2; } +}; +#endif + int main() { BEGIN_TESTING(VectorMetaAllocator); @@ -41,14 +50,31 @@ int main() { exit(1); } + // Verify DeviceData() matches GetVRAMData() + { + thrust::device_ptr dev_ptr = v.DeviceData(); + if (dev_ptr.get() != vram_ptr) { + std::cout << "Error: DeviceData() does not match GetVRAMData()!\n"; + exit(1); + } + std::cout << "DeviceData() matches GetVRAMData(). OK\n"; + } + + // Use thrust::transform via DeviceBegin()/DeviceEnd() to double all elements + // on device + std::cout << "Doubling elements on device via thrust::transform...\n"; + thrust::transform(v.DeviceBegin(), v.DeviceEnd(), v.DeviceBegin(), + DoubleFunctor{}); + std::cout << "Moving back to RAM...\n"; v.MoveToRAM(); - std::cout << "RAM contents after VRAM trip: "; + std::cout << "RAM contents after VRAM trip + thrust transform: "; for (size_t i = 0; i < v.size(); ++i) { std::cout << v[i] << " "; - if (v[i] != (int)(i + 1)) { - std::cout << "\nError: Data corrupted after RAM->VRAM->RAM trip at index " + if (v[i] != (int)((i + 1) * 2)) { + std::cout << "\nError: Data corrupted after RAM->VRAM->thrust->RAM trip " + "at index " << i << "\n"; exit(1); }