check if algorithm could be run on cuda
This commit is contained in:
@@ -30,10 +30,15 @@
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#include "Core/Object.h"
|
||||
#include "Core/Monitor.h"
|
||||
#include "Core/Threads.h"
|
||||
#include "Core/Property.h"
|
||||
#include "Core/DataAllocator.h"
|
||||
|
||||
namespace uLib {
|
||||
|
||||
@@ -57,7 +62,12 @@ public:
|
||||
using EncoderType = T_enc;
|
||||
using DecoderType = T_dec;
|
||||
|
||||
Algorithm() : Object(), m_Encoder(nullptr), m_Decoder(nullptr) {}
|
||||
Algorithm()
|
||||
: Object()
|
||||
, m_Encoder(nullptr)
|
||||
, m_Decoder(nullptr)
|
||||
, m_PreferredDevice(MemoryDevice::RAM)
|
||||
{}
|
||||
virtual ~Algorithm() = default;
|
||||
|
||||
virtual const char* GetClassName() const override { return "Algorithm"; }
|
||||
@@ -79,6 +89,20 @@ public:
|
||||
void SetDecoder(Algorithm* dec) { m_Decoder = dec; }
|
||||
Algorithm* GetDecoder() const { return m_Decoder; }
|
||||
|
||||
/**
|
||||
* @brief Returns the preferred memory device for this algorithm.
|
||||
* CUDA-capable algorithms should override to return VRAM when their
|
||||
* data resides on the GPU. AlgorithmTask uses this to synchronize
|
||||
* appropriately.
|
||||
*/
|
||||
virtual MemoryDevice GetPreferredDevice() const { return m_PreferredDevice; }
|
||||
void SetPreferredDevice(MemoryDevice dev) { m_PreferredDevice = dev; }
|
||||
|
||||
/**
|
||||
* @brief Returns true if this algorithm prefers GPU execution.
|
||||
*/
|
||||
bool IsGPU() const { return GetPreferredDevice() == MemoryDevice::VRAM; }
|
||||
|
||||
signals:
|
||||
virtual void Started() { ULIB_SIGNAL_EMIT(Algorithm::Started); }
|
||||
virtual void Finished() { ULIB_SIGNAL_EMIT(Algorithm::Finished); }
|
||||
@@ -86,6 +110,7 @@ signals:
|
||||
protected:
|
||||
Algorithm* m_Encoder;
|
||||
Algorithm* m_Decoder;
|
||||
MemoryDevice m_PreferredDevice;
|
||||
};
|
||||
|
||||
|
||||
@@ -192,11 +217,19 @@ protected:
|
||||
}
|
||||
|
||||
private:
|
||||
void ExecuteAlgorithm() {
|
||||
if (!m_Algorithm) return;
|
||||
m_Algorithm->Process(m_Input);
|
||||
#ifdef USE_CUDA
|
||||
if (m_Algorithm->IsGPU()) {
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void RunCyclic() {
|
||||
while (!m_StopRequested.load()) {
|
||||
if (m_Algorithm) {
|
||||
m_Algorithm->Process(m_Input);
|
||||
}
|
||||
ExecuteAlgorithm();
|
||||
std::unique_lock<std::timed_mutex> lock(m_WaitMutex.GetNative());
|
||||
m_Condition.wait_for(lock,
|
||||
std::chrono::milliseconds(m_CycleTime_ms),
|
||||
@@ -212,9 +245,7 @@ private:
|
||||
});
|
||||
if (m_StopRequested.load()) break;
|
||||
m_Triggered.store(false);
|
||||
if (m_Algorithm) {
|
||||
m_Algorithm->Process(m_Input);
|
||||
}
|
||||
ExecuteAlgorithm();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -79,6 +79,17 @@ public:
|
||||
*/
|
||||
void Run();
|
||||
|
||||
/**
|
||||
* @brief Returns VRAM if image or kernel data is on GPU, RAM otherwise.
|
||||
*/
|
||||
MemoryDevice GetPreferredDevice() const override {
|
||||
if (m_Image && m_Image->Data().GetDevice() == MemoryDevice::VRAM)
|
||||
return MemoryDevice::VRAM;
|
||||
if (m_KernelData.ConstData().GetDevice() == MemoryDevice::VRAM)
|
||||
return MemoryDevice::VRAM;
|
||||
return MemoryDevice::RAM;
|
||||
}
|
||||
|
||||
void SetKernelNumericXZY(const std::vector<float> &numeric);
|
||||
|
||||
void SetKernelSpherical(float (*shape)(float));
|
||||
|
||||
@@ -109,7 +109,8 @@ public:
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA) && defined(__CUDACC__)
|
||||
void Run() {
|
||||
VoxImage<VoxelT>* Process(VoxImage<VoxelT>* const& image) override {
|
||||
if (this->m_Image != image) this->SetImage(image);
|
||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||
|
||||
@@ -136,8 +137,9 @@ public:
|
||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
||||
mAtrim, mBtrim);
|
||||
cudaDeviceSynchronize();
|
||||
return this->m_Image;
|
||||
} else {
|
||||
BaseClass::Run();
|
||||
return BaseClass::Process(image);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -207,7 +209,8 @@ public:
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA) && defined(__CUDACC__)
|
||||
void Run() {
|
||||
VoxImage<VoxelT>* Process(VoxImage<VoxelT>* const& image) override {
|
||||
if (this->m_Image != image) this->SetImage(image);
|
||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||
|
||||
@@ -234,8 +237,9 @@ public:
|
||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count,
|
||||
mAtrim, mBtrim);
|
||||
cudaDeviceSynchronize();
|
||||
return this->m_Image;
|
||||
} else {
|
||||
BaseClass::Run();
|
||||
return BaseClass::Process(image);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -67,7 +67,8 @@ public:
|
||||
VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {}
|
||||
|
||||
#if defined(USE_CUDA) && defined(__CUDACC__)
|
||||
void Run() {
|
||||
VoxImage<VoxelT>* Process(VoxImage<VoxelT>* const& image) override {
|
||||
if (this->m_Image != image) this->SetImage(image);
|
||||
if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM ||
|
||||
this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) {
|
||||
|
||||
@@ -92,8 +93,9 @@ public:
|
||||
LinearFilterKernel<<<blocksPerGrid, threadsPerBlock>>>(
|
||||
d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count);
|
||||
cudaDeviceSynchronize();
|
||||
return this->m_Image;
|
||||
} else {
|
||||
BaseClass::Run();
|
||||
return BaseClass::Process(image);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user