check if algorithm could be run on cuda
This commit is contained in:
@@ -30,10 +30,15 @@
|
|||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
|
|
||||||
|
#ifdef USE_CUDA
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "Core/Object.h"
|
#include "Core/Object.h"
|
||||||
#include "Core/Monitor.h"
|
#include "Core/Monitor.h"
|
||||||
#include "Core/Threads.h"
|
#include "Core/Threads.h"
|
||||||
#include "Core/Property.h"
|
#include "Core/Property.h"
|
||||||
|
#include "Core/DataAllocator.h"
|
||||||
|
|
||||||
namespace uLib {
|
namespace uLib {
|
||||||
|
|
||||||
@@ -57,7 +62,12 @@ public:
|
|||||||
using EncoderType = T_enc;
|
using EncoderType = T_enc;
|
||||||
using DecoderType = T_dec;
|
using DecoderType = T_dec;
|
||||||
|
|
||||||
Algorithm() : Object(), m_Encoder(nullptr), m_Decoder(nullptr) {}
|
Algorithm()
|
||||||
|
: Object()
|
||||||
|
, m_Encoder(nullptr)
|
||||||
|
, m_Decoder(nullptr)
|
||||||
|
, m_PreferredDevice(MemoryDevice::RAM)
|
||||||
|
{}
|
||||||
virtual ~Algorithm() = default;
|
virtual ~Algorithm() = default;
|
||||||
|
|
||||||
virtual const char* GetClassName() const override { return "Algorithm"; }
|
virtual const char* GetClassName() const override { return "Algorithm"; }
|
||||||
@@ -79,6 +89,20 @@ public:
|
|||||||
void SetDecoder(Algorithm* dec) { m_Decoder = dec; }
|
void SetDecoder(Algorithm* dec) { m_Decoder = dec; }
|
||||||
Algorithm* GetDecoder() const { return m_Decoder; }
|
Algorithm* GetDecoder() const { return m_Decoder; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns the preferred memory device for this algorithm.
|
||||||
|
* CUDA-capable algorithms should override to return VRAM when their
|
||||||
|
* data resides on the GPU. AlgorithmTask uses this to synchronize
|
||||||
|
* appropriately.
|
||||||
|
*/
|
||||||
|
virtual MemoryDevice GetPreferredDevice() const { return m_PreferredDevice; }
|
||||||
|
void SetPreferredDevice(MemoryDevice dev) { m_PreferredDevice = dev; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns true if this algorithm prefers GPU execution.
|
||||||
|
*/
|
||||||
|
bool IsGPU() const { return GetPreferredDevice() == MemoryDevice::VRAM; }
|
||||||
|
|
||||||
signals:
|
signals:
|
||||||
virtual void Started() { ULIB_SIGNAL_EMIT(Algorithm::Started); }
|
virtual void Started() { ULIB_SIGNAL_EMIT(Algorithm::Started); }
|
||||||
virtual void Finished() { ULIB_SIGNAL_EMIT(Algorithm::Finished); }
|
virtual void Finished() { ULIB_SIGNAL_EMIT(Algorithm::Finished); }
|
||||||
@@ -86,6 +110,7 @@ signals:
|
|||||||
protected:
|
protected:
|
||||||
Algorithm* m_Encoder;
|
Algorithm* m_Encoder;
|
||||||
Algorithm* m_Decoder;
|
Algorithm* m_Decoder;
|
||||||
|
MemoryDevice m_PreferredDevice;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -192,11 +217,19 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void ExecuteAlgorithm() {
|
||||||
|
if (!m_Algorithm) return;
|
||||||
|
m_Algorithm->Process(m_Input);
|
||||||
|
#ifdef USE_CUDA
|
||||||
|
if (m_Algorithm->IsGPU()) {
|
||||||
|
cudaDeviceSynchronize();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void RunCyclic() {
|
void RunCyclic() {
|
||||||
while (!m_StopRequested.load()) {
|
while (!m_StopRequested.load()) {
|
||||||
if (m_Algorithm) {
|
ExecuteAlgorithm();
|
||||||
m_Algorithm->Process(m_Input);
|
|
||||||
}
|
|
||||||
std::unique_lock<std::timed_mutex> lock(m_WaitMutex.GetNative());
|
std::unique_lock<std::timed_mutex> lock(m_WaitMutex.GetNative());
|
||||||
m_Condition.wait_for(lock,
|
m_Condition.wait_for(lock,
|
||||||
std::chrono::milliseconds(m_CycleTime_ms),
|
std::chrono::milliseconds(m_CycleTime_ms),
|
||||||
@@ -212,9 +245,7 @@ private:
|
|||||||
});
|
});
|
||||||
if (m_StopRequested.load()) break;
|
if (m_StopRequested.load()) break;
|
||||||
m_Triggered.store(false);
|
m_Triggered.store(false);
|
||||||
if (m_Algorithm) {
|
ExecuteAlgorithm();
|
||||||
m_Algorithm->Process(m_Input);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -79,6 +79,17 @@ public:
|
|||||||
*/
|
*/
|
||||||
void Run();
|
void Run();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Returns VRAM if image or kernel data is on GPU, RAM otherwise.
|
||||||
|
*/
|
||||||
|
MemoryDevice GetPreferredDevice() const override {
|
||||||
|
if (m_Image && m_Image->Data().GetDevice() == MemoryDevice::VRAM)
|
||||||
|
return MemoryDevice::VRAM;
|
||||||
|
if (m_KernelData.ConstData().GetDevice() == MemoryDevice::VRAM)
|
||||||
|
return MemoryDevice::VRAM;
|
||||||
|
return MemoryDevice::RAM;
|
||||||
|
}
|
||||||
|
|
||||||
void SetKernelNumericXZY(const std::vector<float> &numeric);
|
void SetKernelNumericXZY(const std::vector<float> &numeric);
|
||||||
|
|
||||||
void SetKernelSpherical(float (*shape)(float));
|
void SetKernelSpherical(float (*shape)(float));
|
||||||
|
|||||||
@@ -109,7 +109,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(USE_CUDA) && defined(__CUDACC__)
|
#if defined(USE_CUDA) && defined(__CUDACC__)
|
||||||
void Run() {
|
VoxImage<VoxelT>* Process(VoxImage<VoxelT>* const& image) override {
|
||||||
|
if (this->m_Image != image) this->SetImage(image);
|
||||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||||
|
|
||||||
@@ -136,8 +137,9 @@ public:
|
|||||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
||||||
mAtrim, mBtrim);
|
mAtrim, mBtrim);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
return this->m_Image;
|
||||||
} else {
|
} else {
|
||||||
BaseClass::Run();
|
return BaseClass::Process(image);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -207,7 +209,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(USE_CUDA) && defined(__CUDACC__)
|
#if defined(USE_CUDA) && defined(__CUDACC__)
|
||||||
void Run() {
|
VoxImage<VoxelT>* Process(VoxImage<VoxelT>* const& image) override {
|
||||||
|
if (this->m_Image != image) this->SetImage(image);
|
||||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||||
|
|
||||||
@@ -234,8 +237,9 @@ public:
|
|||||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
||||||
mAtrim, mBtrim);
|
mAtrim, mBtrim);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
return this->m_Image;
|
||||||
} else {
|
} else {
|
||||||
BaseClass::Run();
|
return BaseClass::Process(image);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -67,7 +67,8 @@ public:
|
|||||||
VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {}
|
VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {}
|
||||||
|
|
||||||
#if defined(USE_CUDA) && defined(__CUDACC__)
|
#if defined(USE_CUDA) && defined(__CUDACC__)
|
||||||
void Run() {
|
VoxImage<VoxelT>* Process(VoxImage<VoxelT>* const& image) override {
|
||||||
|
if (this->m_Image != image) this->SetImage(image);
|
||||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||||
|
|
||||||
@@ -92,8 +93,9 @@ public:
|
|||||||
LinearFilterKernel<<<blocksPerGrid, threadsPerBlock>>>(
|
LinearFilterKernel<<<blocksPerGrid, threadsPerBlock>>>(
|
||||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count);
|
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count);
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
return this->m_Image;
|
||||||
} else {
|
} else {
|
||||||
BaseClass::Run();
|
return BaseClass::Process(image);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user