From ec2027e980dc9e2cbf97e5367656f2eb9ce22c9f Mon Sep 17 00:00:00 2001 From: AndreaRigoni Date: Fri, 27 Mar 2026 16:42:04 +0000 Subject: [PATCH] check if algorithm could be run on cuda --- src/Core/Algorithm.h | 45 ++++++++++++++++++++++++++----- src/Math/VoxImageFilter.h | 11 ++++++++ src/Math/VoxImageFilterABTrim.hpp | 12 ++++++--- src/Math/VoxImageFilterLinear.hpp | 6 +++-- 4 files changed, 61 insertions(+), 13 deletions(-) diff --git a/src/Core/Algorithm.h b/src/Core/Algorithm.h index 720b117..e5375a4 100644 --- a/src/Core/Algorithm.h +++ b/src/Core/Algorithm.h @@ -30,10 +30,15 @@ #include #include +#ifdef USE_CUDA +#include +#endif + #include "Core/Object.h" #include "Core/Monitor.h" #include "Core/Threads.h" #include "Core/Property.h" +#include "Core/DataAllocator.h" namespace uLib { @@ -57,7 +62,12 @@ public: using EncoderType = T_enc; using DecoderType = T_dec; - Algorithm() : Object(), m_Encoder(nullptr), m_Decoder(nullptr) {} + Algorithm() + : Object() + , m_Encoder(nullptr) + , m_Decoder(nullptr) + , m_PreferredDevice(MemoryDevice::RAM) + {} virtual ~Algorithm() = default; virtual const char* GetClassName() const override { return "Algorithm"; } @@ -79,6 +89,20 @@ public: void SetDecoder(Algorithm* dec) { m_Decoder = dec; } Algorithm* GetDecoder() const { return m_Decoder; } + /** + * @brief Returns the preferred memory device for this algorithm. + * CUDA-capable algorithms should override to return VRAM when their + * data resides on the GPU. AlgorithmTask uses this to synchronize + * appropriately. + */ + virtual MemoryDevice GetPreferredDevice() const { return m_PreferredDevice; } + void SetPreferredDevice(MemoryDevice dev) { m_PreferredDevice = dev; } + + /** + * @brief Returns true if this algorithm prefers GPU execution. + */ + bool IsGPU() const { return GetPreferredDevice() == MemoryDevice::VRAM; } + signals: virtual void Started() { ULIB_SIGNAL_EMIT(Algorithm::Started); } virtual void Finished() { ULIB_SIGNAL_EMIT(Algorithm::Finished); } @@ -86,6 +110,7 @@ signals: protected: Algorithm* m_Encoder; Algorithm* m_Decoder; + MemoryDevice m_PreferredDevice; }; @@ -192,11 +217,19 @@ protected: } private: + void ExecuteAlgorithm() { + if (!m_Algorithm) return; + m_Algorithm->Process(m_Input); +#ifdef USE_CUDA + if (m_Algorithm->IsGPU()) { + cudaDeviceSynchronize(); + } +#endif + } + void RunCyclic() { while (!m_StopRequested.load()) { - if (m_Algorithm) { - m_Algorithm->Process(m_Input); - } + ExecuteAlgorithm(); std::unique_lock lock(m_WaitMutex.GetNative()); m_Condition.wait_for(lock, std::chrono::milliseconds(m_CycleTime_ms), @@ -212,9 +245,7 @@ private: }); if (m_StopRequested.load()) break; m_Triggered.store(false); - if (m_Algorithm) { - m_Algorithm->Process(m_Input); - } + ExecuteAlgorithm(); } } diff --git a/src/Math/VoxImageFilter.h b/src/Math/VoxImageFilter.h index 9af5099..6e13c66 100644 --- a/src/Math/VoxImageFilter.h +++ b/src/Math/VoxImageFilter.h @@ -79,6 +79,17 @@ public: */ void Run(); + /** + * @brief Returns VRAM if image or kernel data is on GPU, RAM otherwise. + */ + MemoryDevice GetPreferredDevice() const override { + if (m_Image && m_Image->Data().GetDevice() == MemoryDevice::VRAM) + return MemoryDevice::VRAM; + if (m_KernelData.ConstData().GetDevice() == MemoryDevice::VRAM) + return MemoryDevice::VRAM; + return MemoryDevice::RAM; + } + void SetKernelNumericXZY(const std::vector &numeric); void SetKernelSpherical(float (*shape)(float)); diff --git a/src/Math/VoxImageFilterABTrim.hpp b/src/Math/VoxImageFilterABTrim.hpp index 621c00d..deb50e6 100644 --- a/src/Math/VoxImageFilterABTrim.hpp +++ b/src/Math/VoxImageFilterABTrim.hpp @@ -109,7 +109,8 @@ public: } #if defined(USE_CUDA) && defined(__CUDACC__) - void Run() { + VoxImage* Process(VoxImage* const& image) override { + if (this->m_Image != image) this->SetImage(image); if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM || this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) { @@ -136,8 +137,9 @@ public: d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count, mAtrim, mBtrim); cudaDeviceSynchronize(); + return this->m_Image; } else { - BaseClass::Run(); + return BaseClass::Process(image); } } #endif @@ -207,7 +209,8 @@ public: } #if defined(USE_CUDA) && defined(__CUDACC__) - void Run() { + VoxImage* Process(VoxImage* const& image) override { + if (this->m_Image != image) this->SetImage(image); if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM || this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) { @@ -234,8 +237,9 @@ public: d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count, mAtrim, mBtrim); cudaDeviceSynchronize(); + return this->m_Image; } else { - BaseClass::Run(); + return BaseClass::Process(image); } } #endif diff --git a/src/Math/VoxImageFilterLinear.hpp b/src/Math/VoxImageFilterLinear.hpp index 420254a..c27bf92 100644 --- a/src/Math/VoxImageFilterLinear.hpp +++ b/src/Math/VoxImageFilterLinear.hpp @@ -67,7 +67,8 @@ public: VoxFilterAlgorithmLinear(const Vector3i &size) : BaseClass(size) {} #if defined(USE_CUDA) && defined(__CUDACC__) - void Run() { + VoxImage* Process(VoxImage* const& image) override { + if (this->m_Image != image) this->SetImage(image); if (this->m_Image->Data().GetDevice() == MemoryDevice::VRAM || this->m_KernelData.Data().GetDevice() == MemoryDevice::VRAM) { @@ -92,8 +93,9 @@ public: LinearFilterKernel<<>>( d_img_in, d_img_out, d_kernel, vox_size, ker_size, center_count); cudaDeviceSynchronize(); + return this->m_Image; } else { - BaseClass::Run(); + return BaseClass::Process(image); } } #endif