andrea-dev #1
@@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
#ifdef USE_CUDA
|
#ifdef USE_CUDA
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace uLib {
|
namespace uLib {
|
||||||
|
|||||||
@@ -37,6 +37,11 @@
|
|||||||
#include <Core/SmartPointer.h>
|
#include <Core/SmartPointer.h>
|
||||||
#include <Core/StaticInterface.h>
|
#include <Core/StaticInterface.h>
|
||||||
|
|
||||||
|
#ifdef USE_CUDA
|
||||||
|
#include <thrust/device_ptr.h>
|
||||||
|
#include <thrust/device_vector.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace uLib {
|
namespace uLib {
|
||||||
|
|
||||||
// MetaAllocator Implementation ...
|
// MetaAllocator Implementation ...
|
||||||
@@ -117,10 +122,6 @@ bool operator!=(const MetaAllocator<T> &, const MetaAllocator<U> &) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Vector Implemetation ... wraps std::vector
|
// Vector Implemetation ... wraps std::vector
|
||||||
template <typename T> class Vector : public std::vector<T, MetaAllocator<T>> {
|
template <typename T> class Vector : public std::vector<T, MetaAllocator<T>> {
|
||||||
typedef std::vector<T, MetaAllocator<T>> BaseClass;
|
typedef std::vector<T, MetaAllocator<T>> BaseClass;
|
||||||
@@ -173,6 +174,42 @@ public:
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_CUDA
|
||||||
|
/// Returns a thrust::device_ptr to the VRAM data (valid after MoveToVRAM()).
|
||||||
|
/// thrust::device_ptr<T> is itself a random-access iterator compatible with
|
||||||
|
/// all thrust algorithms (thrust::transform, thrust::sort,
|
||||||
|
/// thrust::for_each…).
|
||||||
|
thrust::device_ptr<T> DeviceData() {
|
||||||
|
if (auto alloc = MetaAllocator<T>::GetDataAllocator(BaseClass::data())) {
|
||||||
|
return thrust::device_pointer_cast(alloc->GetVRAMData());
|
||||||
|
}
|
||||||
|
return thrust::device_ptr<T>(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
thrust::device_ptr<const T> DeviceData() const {
|
||||||
|
if (auto alloc = MetaAllocator<T>::GetDataAllocator(
|
||||||
|
const_cast<T *>(BaseClass::data()))) {
|
||||||
|
return thrust::device_pointer_cast(
|
||||||
|
static_cast<const T *>(alloc->GetVRAMData()));
|
||||||
|
}
|
||||||
|
return thrust::device_ptr<const T>(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Device-side begin iterator (valid after MoveToVRAM()).
|
||||||
|
thrust::device_ptr<T> DeviceBegin() { return DeviceData(); }
|
||||||
|
|
||||||
|
/// Device-side end iterator (valid after MoveToVRAM()).
|
||||||
|
thrust::device_ptr<T> DeviceEnd() {
|
||||||
|
return DeviceData() + static_cast<std::ptrdiff_t>(BaseClass::size());
|
||||||
|
}
|
||||||
|
|
||||||
|
thrust::device_ptr<const T> DeviceBegin() const { return DeviceData(); }
|
||||||
|
|
||||||
|
thrust::device_ptr<const T> DeviceEnd() const {
|
||||||
|
return DeviceData() + static_cast<std::ptrdiff_t>(BaseClass::size());
|
||||||
|
}
|
||||||
|
#endif // USE_CUDA
|
||||||
|
|
||||||
inline void PrintSelf(std::ostream &o);
|
inline void PrintSelf(std::ostream &o);
|
||||||
|
|
||||||
// Overrides for auto-sync //
|
// Overrides for auto-sync //
|
||||||
|
|||||||
@@ -31,3 +31,8 @@ set(LIBRARIES
|
|||||||
${ROOT_LIBRARIES}
|
${ROOT_LIBRARIES}
|
||||||
)
|
)
|
||||||
uLib_add_tests(Core)
|
uLib_add_tests(Core)
|
||||||
|
|
||||||
|
if(USE_CUDA)
|
||||||
|
set_source_files_properties(VectorMetaAllocatorTest.cpp PROPERTIES LANGUAGE CUDA)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,15 @@
|
|||||||
#include "testing-prototype.h"
|
#include "testing-prototype.h"
|
||||||
#include <Core/Vector.h>
|
#include <Core/Vector.h>
|
||||||
|
|
||||||
|
#ifdef USE_CUDA
|
||||||
|
#include <thrust/device_ptr.h>
|
||||||
|
#include <thrust/transform.h>
|
||||||
|
|
||||||
|
struct DoubleFunctor {
|
||||||
|
__host__ __device__ int operator()(int x) const { return x * 2; }
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
BEGIN_TESTING(VectorMetaAllocator);
|
BEGIN_TESTING(VectorMetaAllocator);
|
||||||
|
|
||||||
@@ -41,14 +50,31 @@ int main() {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify DeviceData() matches GetVRAMData()
|
||||||
|
{
|
||||||
|
thrust::device_ptr<int> dev_ptr = v.DeviceData();
|
||||||
|
if (dev_ptr.get() != vram_ptr) {
|
||||||
|
std::cout << "Error: DeviceData() does not match GetVRAMData()!\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
std::cout << "DeviceData() matches GetVRAMData(). OK\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use thrust::transform via DeviceBegin()/DeviceEnd() to double all elements
|
||||||
|
// on device
|
||||||
|
std::cout << "Doubling elements on device via thrust::transform...\n";
|
||||||
|
thrust::transform(v.DeviceBegin(), v.DeviceEnd(), v.DeviceBegin(),
|
||||||
|
DoubleFunctor{});
|
||||||
|
|
||||||
std::cout << "Moving back to RAM...\n";
|
std::cout << "Moving back to RAM...\n";
|
||||||
v.MoveToRAM();
|
v.MoveToRAM();
|
||||||
|
|
||||||
std::cout << "RAM contents after VRAM trip: ";
|
std::cout << "RAM contents after VRAM trip + thrust transform: ";
|
||||||
for (size_t i = 0; i < v.size(); ++i) {
|
for (size_t i = 0; i < v.size(); ++i) {
|
||||||
std::cout << v[i] << " ";
|
std::cout << v[i] << " ";
|
||||||
if (v[i] != (int)(i + 1)) {
|
if (v[i] != (int)((i + 1) * 2)) {
|
||||||
std::cout << "\nError: Data corrupted after RAM->VRAM->RAM trip at index "
|
std::cout << "\nError: Data corrupted after RAM->VRAM->thrust->RAM trip "
|
||||||
|
"at index "
|
||||||
<< i << "\n";
|
<< i << "\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user