algorithm chain for ram-vram

This commit is contained in:
AndreaRigoni
2026-03-28 08:22:14 +00:00
parent ec2027e980
commit 876b8f4592
8 changed files with 883 additions and 172 deletions

View File

@@ -0,0 +1,408 @@
/*//////////////////////////////////////////////////////////////////////////////
// CMT Cosmic Muon Tomography project //////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
Copyright (c) 2014, Universita' degli Studi di Padova, INFN sez. di Padova
All rights reserved
Authors: Andrea Rigoni Garola < andrea.rigoni@pd.infn.it >
------------------------------------------------------------------
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3.0 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library.
//////////////////////////////////////////////////////////////////////////////*/
#include "testing-prototype.h"
#include "Core/Algorithm.h"
#include "Math/VoxImage.h"
#include "Math/VoxImageFilter.h"
#include <iostream>
#include <thread>
#include <chrono>
using namespace uLib;
struct TestVoxel {
Scalarf Value;
unsigned int Count;
};
int main() {
BEGIN_TESTING(AlgorithmCudaChain);
////////////////////////////////////////////////////////////////////////////
// TEST 1: Single filter — GetPreferredDevice reflects data location
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 1: GetPreferredDevice reflects data location ---\n";
VoxImage<TestVoxel> image(Vector3i(10, 10, 10));
image[Vector3i(5, 5, 5)].Value = 1;
VoxFilterAlgorithmLinear<TestVoxel> filter(Vector3i(3, 3, 3));
std::vector<float> weights(27, 1.0f);
filter.SetImage(&image);
filter.SetKernelNumericXZY(weights);
// Before VRAM move: should prefer RAM
TEST1(filter.GetPreferredDevice() == MemoryDevice::RAM);
TEST1(!filter.IsGPU());
std::cout << " RAM mode: PreferredDevice=RAM, IsGPU=false OK\n";
// Move image data to VRAM
image.Data().MoveToVRAM();
// After VRAM move: should prefer VRAM
TEST1(filter.GetPreferredDevice() == MemoryDevice::VRAM);
TEST1(filter.IsGPU());
std::cout << " VRAM mode: PreferredDevice=VRAM, IsGPU=true OK\n";
// Move back to RAM
image.Data().MoveToRAM();
TEST1(filter.GetPreferredDevice() == MemoryDevice::RAM);
std::cout << " Back to RAM: PreferredDevice=RAM OK\n";
}
////////////////////////////////////////////////////////////////////////////
// TEST 2: Kernel data on VRAM also triggers GPU preference
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 2: Kernel on VRAM triggers GPU preference ---\n";
VoxImage<TestVoxel> image(Vector3i(8, 8, 8));
VoxFilterAlgorithmLinear<TestVoxel> filter(Vector3i(3, 3, 3));
std::vector<float> weights(27, 1.0f);
filter.SetImage(&image);
filter.SetKernelNumericXZY(weights);
TEST1(filter.GetPreferredDevice() == MemoryDevice::RAM);
// Only kernel on VRAM
filter.GetKernelData().Data().MoveToVRAM();
TEST1(filter.GetPreferredDevice() == MemoryDevice::VRAM);
std::cout << " Kernel on VRAM: PreferredDevice=VRAM OK\n";
filter.GetKernelData().Data().MoveToRAM();
TEST1(filter.GetPreferredDevice() == MemoryDevice::RAM);
}
////////////////////////////////////////////////////////////////////////////
// TEST 3: Algorithm interface — Process through base pointer
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 3: Process through Algorithm base pointer ---\n";
VoxImage<TestVoxel> image(Vector3i(10, 10, 10));
image[Vector3i(5, 5, 5)].Value = 10;
VoxFilterAlgorithmLinear<TestVoxel> filter(Vector3i(3, 3, 3));
std::vector<float> weights(27, 1.0f);
filter.SetImage(&image);
filter.SetKernelNumericXZY(weights);
// Use through Algorithm base class pointer
Algorithm<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*>* alg = &filter;
VoxImage<TestVoxel>* result = alg->Process(&image);
TEST1(result == &image);
std::cout << " Process through base pointer returned correct image OK\n";
// Verify filter actually ran (center voxel should be averaged)
// With uniform 3x3x3 kernel and single non-zero voxel at center,
// the center value should be 10/27 ≈ 0.37
TEST1(image[Vector3i(5, 5, 5)].Value < 10.0f);
std::cout << " Filter modified voxel values OK\n";
}
////////////////////////////////////////////////////////////////////////////
// TEST 4: Encoder/decoder chain — two filters linked
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 4: Encoder/decoder chain ---\n";
VoxImage<TestVoxel> image(Vector3i(10, 10, 10));
image[Vector3i(5, 5, 5)].Value = 100;
// First filter: linear smoothing
VoxFilterAlgorithmLinear<TestVoxel> filter1(Vector3i(3, 3, 3));
std::vector<float> weights1(27, 1.0f);
filter1.SetImage(&image);
filter1.SetKernelNumericXZY(weights1);
// Second filter: threshold
VoxFilterAlgorithmThreshold<TestVoxel> filter2(Vector3i(1, 1, 1));
filter2.SetThreshold(0.5f);
filter2.SetImage(&image);
// 1x1x1 kernel with value 1
std::vector<float> weights2(1, 1.0f);
filter2.SetKernelNumericXZY(weights2);
// Chain: filter1 → filter2
filter1.SetDecoder(&filter2);
filter2.SetEncoder(&filter1);
TEST1(filter1.GetDecoder() == &filter2);
TEST1(filter2.GetEncoder() == &filter1);
std::cout << " Chain linked: filter1 -> filter2 OK\n";
// Execute chain manually (encoder first, then decoder)
filter1.Process(&image);
float smoothed_center = image[Vector3i(5, 5, 5)].Value;
std::cout << " After linear: center = " << smoothed_center << "\n";
filter2.Process(&image);
float thresholded_center = image[Vector3i(5, 5, 5)].Value;
std::cout << " After threshold: center = " << thresholded_center << "\n";
// After threshold, values should be 0 or 1
TEST1(thresholded_center == 0.0f || thresholded_center == 1.0f);
std::cout << " Chain execution produced valid results OK\n";
}
////////////////////////////////////////////////////////////////////////////
// TEST 5: CUDA chain — VRAM data through chained filters
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 5: VRAM data through chained filters ---\n";
VoxImage<TestVoxel> image(Vector3i(10, 10, 10));
image[Vector3i(5, 5, 5)].Value = 50;
VoxFilterAlgorithmLinear<TestVoxel> filter1(Vector3i(3, 3, 3));
std::vector<float> weights1(27, 1.0f);
filter1.SetImage(&image);
filter1.SetKernelNumericXZY(weights1);
VoxFilterAlgorithmAbtrim<TestVoxel> filter2(Vector3i(3, 3, 3));
std::vector<float> weights2(27, 1.0f);
filter2.SetImage(&image);
filter2.SetKernelNumericXZY(weights2);
filter2.SetABTrim(1, 1);
// Chain
filter1.SetDecoder(&filter2);
filter2.SetEncoder(&filter1);
// Move data to VRAM
image.Data().MoveToVRAM();
filter1.GetKernelData().Data().MoveToVRAM();
filter2.GetKernelData().Data().MoveToVRAM();
// Both filters should report VRAM preference
TEST1(filter1.GetPreferredDevice() == MemoryDevice::VRAM);
TEST1(filter2.GetPreferredDevice() == MemoryDevice::VRAM);
TEST1(filter1.IsGPU());
TEST1(filter2.IsGPU());
std::cout << " Both filters detect VRAM preference OK\n";
// Verify the chain's device consistency
auto* encoder = filter2.GetEncoder();
TEST1(encoder != nullptr);
TEST1(encoder->IsGPU());
std::cout << " Encoder in chain also reports GPU OK\n";
#ifdef USE_CUDA
// With CUDA: filters execute on GPU via Process()
image.Data().MoveToRAM(); // reset for clean test
image[Vector3i(5, 5, 5)].Value = 50;
image.Data().MoveToVRAM();
filter1.Process(&image);
TEST1(image.Data().GetDevice() == MemoryDevice::VRAM);
std::cout << " CUDA: data stays in VRAM after filter1 OK\n";
filter2.Process(&image);
TEST1(image.Data().GetDevice() == MemoryDevice::VRAM);
std::cout << " CUDA: data stays in VRAM after filter2 OK\n";
#else
// Without CUDA: verify Process still works via CPU fallback
image.Data().MoveToRAM();
image[Vector3i(5, 5, 5)].Value = 50;
filter1.GetKernelData().Data().MoveToRAM();
filter2.GetKernelData().Data().MoveToRAM();
filter1.Process(&image);
filter2.Process(&image);
std::cout << " No CUDA: CPU fallback executed correctly OK\n";
#endif
}
////////////////////////////////////////////////////////////////////////////
// TEST 6: AlgorithmTask with VRAM-aware filter
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 6: AlgorithmTask with VRAM-aware filter ---\n";
VoxImage<TestVoxel> image(Vector3i(8, 8, 8));
image[Vector3i(4, 4, 4)].Value = 20;
VoxFilterAlgorithmLinear<TestVoxel> filter(Vector3i(3, 3, 3));
std::vector<float> weights(27, 1.0f);
filter.SetImage(&image);
filter.SetKernelNumericXZY(weights);
// Set up task
AlgorithmTask<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*> task;
task.SetAlgorithm(&filter);
task.SetMode(AlgorithmTask<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*>::Cyclic);
task.SetCycleTime(50);
// Run task for a few cycles
task.Run(&image);
std::this_thread::sleep_for(std::chrono::milliseconds(200));
task.Stop();
// After cyclic execution, the filter should have smoothed values
TEST1(image[Vector3i(4, 4, 4)].Value < 20.0f);
std::cout << " Task cyclic execution modified image OK\n";
std::cout << " Center value after smoothing: "
<< image[Vector3i(4, 4, 4)].Value << "\n";
}
////////////////////////////////////////////////////////////////////////////
// TEST 7: AlgorithmTask async with chained filters
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 7: AlgorithmTask async with filter ---\n";
VoxImage<TestVoxel> image(Vector3i(8, 8, 8));
image[Vector3i(4, 4, 4)].Value = 30;
VoxFilterAlgorithmLinear<TestVoxel> filter(Vector3i(3, 3, 3));
std::vector<float> weights(27, 1.0f);
filter.SetImage(&image);
filter.SetKernelNumericXZY(weights);
AlgorithmTask<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*> task;
task.SetAlgorithm(&filter);
task.SetMode(AlgorithmTask<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*>::Async);
float before = image[Vector3i(4, 4, 4)].Value;
task.Run(&image);
// Trigger one execution
task.Notify();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
task.Stop();
float after = image[Vector3i(4, 4, 4)].Value;
TEST1(after < before);
std::cout << " Async trigger: value " << before << " -> " << after << " OK\n";
}
////////////////////////////////////////////////////////////////////////////
// TEST 8: Device preference propagation in chain
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 8: Device preference propagation check ---\n";
VoxImage<TestVoxel> image(Vector3i(8, 8, 8));
image[Vector3i(4, 4, 4)].Value = 10;
VoxFilterAlgorithmLinear<TestVoxel> filterA(Vector3i(3, 3, 3));
VoxFilterAlgorithmAbtrim<TestVoxel> filterB(Vector3i(3, 3, 3));
VoxFilterAlgorithmThreshold<TestVoxel> filterC(Vector3i(1, 1, 1));
std::vector<float> w27(27, 1.0f);
std::vector<float> w1(1, 1.0f);
filterA.SetImage(&image);
filterA.SetKernelNumericXZY(w27);
filterB.SetImage(&image);
filterB.SetKernelNumericXZY(w27);
filterB.SetABTrim(1, 1);
filterC.SetImage(&image);
filterC.SetKernelNumericXZY(w1);
filterC.SetThreshold(0.1f);
// Chain: A → B → C
filterA.SetDecoder(&filterB);
filterB.SetEncoder(&filterA);
filterB.SetDecoder(&filterC);
filterC.SetEncoder(&filterB);
// All on RAM
TEST1(!filterA.IsGPU());
TEST1(!filterB.IsGPU());
TEST1(!filterC.IsGPU());
std::cout << " All filters on RAM OK\n";
// Move image to VRAM — filters A and B should detect it
image.Data().MoveToVRAM();
TEST1(filterA.IsGPU());
TEST1(filterB.IsGPU());
// filterC with 1x1x1 kernel doesn't have CUDA override, but still detects VRAM
TEST1(filterC.IsGPU());
std::cout << " Image on VRAM: all filters report GPU OK\n";
// Can walk the chain and check device consistency
auto* step = static_cast<Algorithm<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*>*>(&filterA);
bool all_gpu = true;
while (step) {
if (!step->IsGPU()) all_gpu = false;
step = static_cast<Algorithm<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*>*>(step->GetDecoder());
}
TEST1(all_gpu);
std::cout << " Chain walk: all steps report GPU OK\n";
image.Data().MoveToRAM();
}
////////////////////////////////////////////////////////////////////////////
// TEST 9: Process through chain with Algorithm interface
////////////////////////////////////////////////////////////////////////////
{
std::cout << "\n--- Test 9: Sequential chain processing via Algorithm interface ---\n";
VoxImage<TestVoxel> image(Vector3i(10, 10, 10));
// Set a pattern: single bright voxel
image[Vector3i(5, 5, 5)].Value = 100;
VoxFilterAlgorithmLinear<TestVoxel> filterA(Vector3i(3, 3, 3));
std::vector<float> w(27, 1.0f);
filterA.SetImage(&image);
filterA.SetKernelNumericXZY(w);
VoxFilterAlgorithmLinear<TestVoxel> filterB(Vector3i(3, 3, 3));
filterB.SetImage(&image);
filterB.SetKernelNumericXZY(w);
// Chain
filterA.SetDecoder(&filterB);
filterB.SetEncoder(&filterA);
// Process chain through base pointer
using AlgType = Algorithm<VoxImage<TestVoxel>*, VoxImage<TestVoxel>*>;
AlgType* chain = &filterA;
// Walk and process
AlgType* current = chain;
while (current) {
current->Process(&image);
current = static_cast<AlgType*>(current->GetDecoder());
}
// After two rounds of smoothing, the peak should be smaller than original
float final_val = image[Vector3i(5, 5, 5)].Value;
TEST1(final_val < 100.0f);
std::cout << " Two-stage smoothing: peak = " << final_val << " OK\n";
}
END_TESTING;
}

View File

@@ -16,6 +16,7 @@ set(TESTS
QuadMeshTest
BitCodeTest
UnitsTest
AlgorithmCudaChainTest
)
set(LIBRARIES
@@ -28,6 +29,6 @@ set(LIBRARIES
uLib_add_tests(Math)
if(USE_CUDA)
set_source_files_properties(VoxImageTest.cpp VoxImageCopyTest.cpp VoxImageFilterTest.cpp VoxRaytracerTest.cpp VoxRaytracerTestExtended.cpp PROPERTIES LANGUAGE CUDA)
set_source_files_properties(VoxImageTest.cpp VoxImageCopyTest.cpp VoxImageFilterTest.cpp VoxRaytracerTest.cpp VoxRaytracerTestExtended.cpp AlgorithmCudaChainTest.cpp PROPERTIES LANGUAGE CUDA)
set_source_files_properties(VoxRaytracerTest.cpp VoxRaytracerTestExtended.cpp PROPERTIES CXX_STANDARD 17 CUDA_STANDARD 17)
endif()