andrea-dev #1
@@ -34,6 +34,7 @@
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#endif
|
||||
|
||||
namespace uLib {
|
||||
|
||||
@@ -37,6 +37,11 @@
|
||||
#include <Core/SmartPointer.h>
|
||||
#include <Core/StaticInterface.h>
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#endif
|
||||
|
||||
namespace uLib {
|
||||
|
||||
// MetaAllocator Implementation ...
|
||||
@@ -117,10 +122,6 @@ bool operator!=(const MetaAllocator<T> &, const MetaAllocator<U> &) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Vector Implemetation ... wraps std::vector
|
||||
template <typename T> class Vector : public std::vector<T, MetaAllocator<T>> {
|
||||
typedef std::vector<T, MetaAllocator<T>> BaseClass;
|
||||
@@ -173,6 +174,42 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef USE_CUDA
|
||||
/// Returns a thrust::device_ptr to the VRAM data (valid after MoveToVRAM()).
|
||||
/// thrust::device_ptr<T> is itself a random-access iterator compatible with
|
||||
/// all thrust algorithms (thrust::transform, thrust::sort,
|
||||
/// thrust::for_each…).
|
||||
thrust::device_ptr<T> DeviceData() {
|
||||
if (auto alloc = MetaAllocator<T>::GetDataAllocator(BaseClass::data())) {
|
||||
return thrust::device_pointer_cast(alloc->GetVRAMData());
|
||||
}
|
||||
return thrust::device_ptr<T>(nullptr);
|
||||
}
|
||||
|
||||
thrust::device_ptr<const T> DeviceData() const {
|
||||
if (auto alloc = MetaAllocator<T>::GetDataAllocator(
|
||||
const_cast<T *>(BaseClass::data()))) {
|
||||
return thrust::device_pointer_cast(
|
||||
static_cast<const T *>(alloc->GetVRAMData()));
|
||||
}
|
||||
return thrust::device_ptr<const T>(nullptr);
|
||||
}
|
||||
|
||||
/// Device-side begin iterator (valid after MoveToVRAM()).
|
||||
thrust::device_ptr<T> DeviceBegin() { return DeviceData(); }
|
||||
|
||||
/// Device-side end iterator (valid after MoveToVRAM()).
|
||||
thrust::device_ptr<T> DeviceEnd() {
|
||||
return DeviceData() + static_cast<std::ptrdiff_t>(BaseClass::size());
|
||||
}
|
||||
|
||||
thrust::device_ptr<const T> DeviceBegin() const { return DeviceData(); }
|
||||
|
||||
thrust::device_ptr<const T> DeviceEnd() const {
|
||||
return DeviceData() + static_cast<std::ptrdiff_t>(BaseClass::size());
|
||||
}
|
||||
#endif // USE_CUDA
|
||||
|
||||
inline void PrintSelf(std::ostream &o);
|
||||
|
||||
// Overrides for auto-sync //
|
||||
|
||||
@@ -31,3 +31,8 @@ set(LIBRARIES
|
||||
${ROOT_LIBRARIES}
|
||||
)
|
||||
uLib_add_tests(Core)
|
||||
|
||||
if(USE_CUDA)
|
||||
set_source_files_properties(VectorMetaAllocatorTest.cpp PROPERTIES LANGUAGE CUDA)
|
||||
endif()
|
||||
|
||||
|
||||
@@ -12,6 +12,15 @@
|
||||
#include "testing-prototype.h"
|
||||
#include <Core/Vector.h>
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/transform.h>
|
||||
|
||||
struct DoubleFunctor {
|
||||
__host__ __device__ int operator()(int x) const { return x * 2; }
|
||||
};
|
||||
#endif
|
||||
|
||||
int main() {
|
||||
BEGIN_TESTING(VectorMetaAllocator);
|
||||
|
||||
@@ -41,14 +50,31 @@ int main() {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Verify DeviceData() matches GetVRAMData()
|
||||
{
|
||||
thrust::device_ptr<int> dev_ptr = v.DeviceData();
|
||||
if (dev_ptr.get() != vram_ptr) {
|
||||
std::cout << "Error: DeviceData() does not match GetVRAMData()!\n";
|
||||
exit(1);
|
||||
}
|
||||
std::cout << "DeviceData() matches GetVRAMData(). OK\n";
|
||||
}
|
||||
|
||||
// Use thrust::transform via DeviceBegin()/DeviceEnd() to double all elements
|
||||
// on device
|
||||
std::cout << "Doubling elements on device via thrust::transform...\n";
|
||||
thrust::transform(v.DeviceBegin(), v.DeviceEnd(), v.DeviceBegin(),
|
||||
DoubleFunctor{});
|
||||
|
||||
std::cout << "Moving back to RAM...\n";
|
||||
v.MoveToRAM();
|
||||
|
||||
std::cout << "RAM contents after VRAM trip: ";
|
||||
std::cout << "RAM contents after VRAM trip + thrust transform: ";
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
std::cout << v[i] << " ";
|
||||
if (v[i] != (int)(i + 1)) {
|
||||
std::cout << "\nError: Data corrupted after RAM->VRAM->RAM trip at index "
|
||||
if (v[i] != (int)((i + 1) * 2)) {
|
||||
std::cout << "\nError: Data corrupted after RAM->VRAM->thrust->RAM trip "
|
||||
"at index "
|
||||
<< i << "\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user